Skip to content
Snippets Groups Projects
Select Git revision
  • 8837ec65bf4eac12464202775935bb9c0cd5f3a9
  • main default protected
2 results

example.md

Blame
  • common.py NaN GiB
    from os import environ, path
    from sys import stderr
    from typing import Any
    
    from lxml import html
    from rdflib import RDFS, Graph, Literal, Namespace, URIRef
    
    
    def env_path_or_rel_default(env_var: str, default: str) -> str:
        """Get a path stored in an environment variable, using a default path
        relative to this file if the variable is unset
        :param env_var: The name of the variable
        :param default: A *relative* path to use as default
        """
        return path.realpath(environ.get(env_var) or path.join(MODULE_DIR, default))
    
    
    # Path constants ###############################################################
    
    MACAO_VERSION = environ.get("VERSION") or "full"
    MODULE_DIR = path.dirname(path.realpath(__file__))
    """Absolute path of this module's directory"""
    MACAO_ROOT = env_path_or_rel_default("MACAO_ROOT", "../../..")
    """Path to the Macao root directory"""
    SOURCE_DIR = env_path_or_rel_default("SOURCES_DIR", "../../../Basilisk/MACAO")
    """Path to the Macao source directory (i.e. the one with the manifest)"""
    RESULT_DIR = env_path_or_rel_default("RESULTS_DIR", f"../../result/{MACAO_VERSION}")
    """Path to the directory containing various results (RDF, content, media...)"""
    NEW_CONTENT_ROOT = env_path_or_rel_default(
        "NEW_CONTENT_ROOT", RESULT_DIR + "/activities"
    )
    RDF_CONTENT_FILE = env_path_or_rel_default(
        "RDF_CONTENT_FILE", RESULT_DIR + "/macao_content.ttl"
    )
    """Path to the file containing the extracted content"""
    RDF_SCHEMA_FILE = env_path_or_rel_default("RDF_SCHEMA_FILE", "../../schemes/macao_schema.ttl")
    """Path to the file containing manual editions"""
    RDF_MANUAL_EDITION_FILE = env_path_or_rel_default("RDF_SCHEMA_FILE", "../../schemes/macao-manual-edition.ttl")
    """Path to the schema file"""
    RDF_FULL_FILE = env_path_or_rel_default("RDF_FULL_FILE", RESULT_DIR + "/macao_full.ttl")
    """Path to the full RDF file, including schema, extracted content and inferences"""
    
    
    class Context:
        """Some global variables like paths are not constant, and may change at
        runtime. This singleton holds the non-constant copies of such variables."""
    
        version = MACAO_VERSION
        source_dir = SOURCE_DIR
    
    
    NS = Namespace("http://www.semanticweb.org/eliott/ontologies/2024/4/macao/")
    """The rdflib base Namespace for our ontology"""
    
    # Utility functions ############################################################
    
    
    def eprint(*args, **kwargs):  # pyright: ignore[reportMissingParameterType]
        """Just like `print()`, but to standard error instead of standard output"""
        print(*args, file=stderr, **kwargs)
    
    
    def to_html(elem: html.HtmlElement) -> str:
        """Shorthand function to serialise a `HtmlElement` to a HTML string"""
        return html.tostring(elem, encoding="unicode")
    
    
    def insert_grow(l: list[Any], index: int, value: Any, fill_value: Any | None = None):
        """Insert at a given position in a list, growing it if necessary
    
        :param l: list
        :param index: The position where the value is inserted
        :param value: The value to insert
        :param fill_value: The value used for elements created automatically when growing, defaults to None
        """
        for _ in range(len(l), index + 1):
            l.append(fill_value)
        l[index] = value
    
    
    def set_title(g: Graph, subject: URIRef, title: str):
        """Add triples to define the `subject`'s title and label"""
        g.set((subject, RDFS.label, Literal(title)))
        g.set((subject, NS["titre"], Literal(title)))
    
    
    def add_index(g: Graph, subject: URIRef, index: int):
        """Add triples to define the `subject`'s index"""
        g.add((subject, NS["index"], Literal(index)))
        ## Generate Protégé display name if a title is set
        title = g.value(subject, NS["titre"])
        if isinstance(title, Literal):
            name = str(subject).split("/")[-1]  # Last path component of subject URI
            g.add(
                (
                    subject,
                    NS["__protege_display_name"],
                    Literal(f"{index:02} | {name} | ") + title,
                )
            )
    
    
    # Exceptions ###################################################################
    
    
    class ParseError(Exception):
        pass
    
    
    # Logging ######################################################################
    
    import logging
    
    log_handler = logging.StreamHandler()
    log_handler.setFormatter(
        logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
    )
    
    match environ.get("LOGLEVEL"):
        case (
            "critical"
            | "CRITICAL"
            | "error"
            | "ERROR"
            | "warning"
            | "WARNING"
            | "info"
            | "INFO"
            | "debug"
            | "DEBUG" as l
        ):
            log_level = l.upper()
        case _:
            log_level = logging.INFO
    
    
    def get_logger(name: str) -> logging.Logger:
        """Returns a configured `Logger`.
    
        :param name: The name of the logger (it is recommended to use the file name)
        """
        logger = logging.getLogger(name)
        logger.setLevel(log_level)
        logger.addHandler(log_handler)
        return logger