diff --git a/tetras_extraction/macao_12/script/src/common.py b/tetras_extraction/macao_12/script/src/common.py index 31fcc7d405bc17903b29ad15190d7b048ed0f2b5..73cb9c7469e7d5a6226e0c347ed0d3557757740b 100644 --- a/tetras_extraction/macao_12/script/src/common.py +++ b/tetras_extraction/macao_12/script/src/common.py @@ -88,3 +88,24 @@ def add_index(g: Graph, subject: URIRef, index: int): class ParseError(Exception): pass + + +# Logging ###################################################################### + +import logging + +log_handler = logging.StreamHandler() +log_handler.setFormatter( + logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") +) + + +def get_logger(name: str) -> logging.Logger: + """Returns a configured `Logger`. + + :param name: The name of the logger (it is recommended to use `__name__`) + """ + logger = logging.getLogger(name) + logger.setLevel(logging.INFO) + logger.addHandler(log_handler) + return logger diff --git a/tetras_extraction/macao_12/script/src/extract.py b/tetras_extraction/macao_12/script/src/extract.py index 196848dd2e287857096adc9a7ae08c025f9b22ab..70439bd0bceb1a19caecaa93832e5bd028e3d40b 100644 --- a/tetras_extraction/macao_12/script/src/extract.py +++ b/tetras_extraction/macao_12/script/src/extract.py @@ -7,6 +7,9 @@ from rdflib.namespace import OWL, RDF import extract_mosetp from common import * +# Initialise logger +log = get_logger(__name__) + schema_ontology_uri = URIRef( "http://www.semanticweb.org/eliott/ontologies/2024/4/macao" ) @@ -36,7 +39,7 @@ def export_graph(g: Graph): g.add((this_onto, RDFS.label, Literal("macao-content"))) g.add((this_onto, OWL.imports, schema_ontology_uri)) g.serialize(RESULT_FILE, "turtle", base=NS) - print(f"Exported {len(g)} triples to {RESULT_FILE}.") + log.info(f"Exported {len(g)} triples to {RESULT_FILE}.") def ns_find(elem: etree._Element, query: str): @@ -122,7 +125,7 @@ def parse_manifest_rec( def compare_files(f1: str, f2: str): - print( + log.info( "Files {} and {} {}.".format( f1, f2, "are identical" if filecmp.cmp(f1, f2) else "differ" ) diff --git a/tetras_extraction/macao_12/script/src/extract_mosetp.py b/tetras_extraction/macao_12/script/src/extract_mosetp.py index 3bb26f155952eec5c6f5b54b13266eb5926ce51c..bda3e5d624dfce392faae4be31d58465a86bedbd 100644 --- a/tetras_extraction/macao_12/script/src/extract_mosetp.py +++ b/tetras_extraction/macao_12/script/src/extract_mosetp.py @@ -7,6 +7,9 @@ from rdflib import OWL, RDF, RDFS, Graph, Literal from common import * from extract_page import parse_page +# Initialise logger +log = get_logger(__name__) + def generate_triples( graph: Graph, mosetp_id: str, page_id: str, page_title: str, page_index: int @@ -65,12 +68,13 @@ def parse_mosetp(graph: Graph, filepath: str, id: str): # Call the page parser parse_page(graph, f"{SOURCE_DIR}/contenu/pages/{page_id}.html", page_id) else: - eprint(f"page regex found no match on line '{line}'") + log.warning(f"skipping page: regex found no match on line '{line}'") except FileNotFoundError as e: - eprint(f"Failed to run 'grep': {e.strerror}: {e.filename}") + e.add_note(f"Failed to parse '{filepath}' with subprocess") + raise e except subprocess.CalledProcessError as e: if e.returncode == 1: - eprint(f"{filepath}: grep found no match, skipping.'") + log.error(f"{filepath}: grep found no match, skipping.'") else: - eprint(e.stderr, end="") + log.error(e.stderr) diff --git a/tetras_extraction/macao_12/script/src/extract_page.py b/tetras_extraction/macao_12/script/src/extract_page.py index 038f698f40c3296e5a1295410d1b0f878987cc1a..4199ae580e6ad7ecb9733e14f88625e8ba02e84a 100644 --- a/tetras_extraction/macao_12/script/src/extract_page.py +++ b/tetras_extraction/macao_12/script/src/extract_page.py @@ -12,6 +12,9 @@ from typing_extensions import override from common import * +# Initialise logger +log = get_logger(__name__) + class Comment: def __init__(self): @@ -99,7 +102,9 @@ class Activity: case ["divConsigne", _]: self.comment_consigne = comment case [alpha, num]: - eprint(f"No match for comment {alpha}[{num}] ('{comment.id}')") + log.warning( + f"No match for comment {alpha}[{num}] ('{comment.id}')" + ) def get_name(self) -> str: return type(self).__name__ @@ -546,8 +551,9 @@ def parse_page(graph: Graph, filepath: str, id: str): activity: Activity = parser.parse(js) print(activity, end="", file=f) except ParseError as e: - eprint(f"{parser} -> {id}: Parsing error: {e}") - eprint("Treating this as a generic Activity.") + log.error( + f"{parser} -> {id}: Parsing error: {e}. Treating this as a generic Activity." + ) activity.id = id # Parse the HTML portion