Add proper logging

3782ed68 · Eliott Sammier · 997e1cbc · 3782ed68 · 3782ed68 · 3782ed68
Commit 3782ed68 authored Jun 19, 2024 by Eliott Sammier
--- a/tetras_extraction/macao_12/script/src/common.py
+++ b/tetras_extraction/macao_12/script/src/common.py
@@ -88,3 +88,24 @@ def add_index(g: Graph, subject: URIRef, index: int):

 class ParseError(Exception):
    pass
+
+
+# Logging ######################################################################
+
+import logging
+
+log_handler = logging.StreamHandler()
+log_handler.setFormatter(
+    logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
+)
+
+
+def get_logger(name: str) -> logging.Logger:
+    """Returns a configured `Logger`.
+
+    :param name: The name of the logger (it is recommended to use `__name__`)
+    """
+    logger = logging.getLogger(name)
+    logger.setLevel(logging.INFO)
+    logger.addHandler(log_handler)
+    return logger
--- a/tetras_extraction/macao_12/script/src/extract.py
+++ b/tetras_extraction/macao_12/script/src/extract.py
@@ -7,6 +7,9 @@ from rdflib.namespace import OWL, RDF
 import extract_mosetp
 from common import *

+# Initialise logger
+log = get_logger(__name__)
+
 schema_ontology_uri = URIRef(
    "http://www.semanticweb.org/eliott/ontologies/2024/4/macao"
 )
@@ -36,7 +39,7 @@ def export_graph(g: Graph):
    g.add((this_onto, RDFS.label, Literal("macao-content")))
    g.add((this_onto, OWL.imports, schema_ontology_uri))
    g.serialize(RESULT_FILE, "turtle", base=NS)
-    print(f"Exported {len(g)} triples to {RESULT_FILE}.")
+    log.info(f"Exported {len(g)} triples to {RESULT_FILE}.")


 def ns_find(elem: etree._Element, query: str):
@@ -122,7 +125,7 @@ def parse_manifest_rec(


 def compare_files(f1: str, f2: str):
-    print(
+    log.info(
        "Files {} and {} {}.".format(
            f1, f2, "are identical" if filecmp.cmp(f1, f2) else "differ"
        )

--- a/tetras_extraction/macao_12/script/src/extract_mosetp.py
+++ b/tetras_extraction/macao_12/script/src/extract_mosetp.py
@@ -7,6 +7,9 @@ from rdflib import OWL, RDF, RDFS, Graph, Literal
 from common import *
 from extract_page import parse_page

+# Initialise logger
+log = get_logger(__name__)
+

 def generate_triples(
    graph: Graph, mosetp_id: str, page_id: str, page_title: str, page_index: int
@@ -65,12 +68,13 @@ def parse_mosetp(graph: Graph, filepath: str, id: str):
                # Call the page parser
                parse_page(graph, f"{SOURCE_DIR}/contenu/pages/{page_id}.html", page_id)
            else:
-                eprint(f"page regex found no match on line '{line}'")
+                log.warning(f"skipping page: regex found no match on line '{line}'")

    except FileNotFoundError as e:
-        eprint(f"Failed to run 'grep': {e.strerror}: {e.filename}")
+        e.add_note(f"Failed to parse '{filepath}' with subprocess")
+        raise e
    except subprocess.CalledProcessError as e:
        if e.returncode == 1:
-            eprint(f"{filepath}: grep found no match, skipping.'")
+            log.error(f"{filepath}: grep found no match, skipping.'")
        else:
-            eprint(e.stderr, end="")
+            log.error(e.stderr)
--- a/tetras_extraction/macao_12/script/src/extract_page.py
+++ b/tetras_extraction/macao_12/script/src/extract_page.py
@@ -12,6 +12,9 @@ from typing_extensions import override

 from common import *

+# Initialise logger
+log = get_logger(__name__)
+

 class Comment:
    def __init__(self):
@@ -99,7 +102,9 @@ class Activity:
                    case ["divConsigne", _]:
                        self.comment_consigne = comment
                    case [alpha, num]:
-                        eprint(f"No match for comment {alpha}[{num}] ('{comment.id}')")
+                        log.warning(
+                            f"No match for comment {alpha}[{num}] ('{comment.id}')"
+                        )

    def get_name(self) -> str:
        return type(self).__name__
@@ -546,8 +551,9 @@ def parse_page(graph: Graph, filepath: str, id: str):
                activity: Activity = parser.parse(js)
                print(activity, end="", file=f)
            except ParseError as e:
-                eprint(f"{parser} -> {id}: Parsing error: {e}")
-                eprint("Treating this as a generic Activity.")
+                log.error(
+                    f"{parser} -> {id}: Parsing error: {e}. Treating this as a generic Activity."
+                )

    activity.id = id
    # Parse the HTML portion