From 5b66f3838eda6e3cd864a80e66d9971f9138011c Mon Sep 17 00:00:00 2001
From: eliott <eliott.sammier@tetras-libre.fr>
Date: Mon, 5 Aug 2024 14:16:41 +0200
Subject: [PATCH] Add basic tests counting Modules, SousParties and Activites

---
 mcli                                          | 18 ++++-
 tetras_extraction/script/.vscode/launch.json  |  8 ++
 .../script/.vscode/settings.json              | 12 ++-
 tetras_extraction/script/src/test.py          | 79 +++++++++++++++++++
 4 files changed, 112 insertions(+), 5 deletions(-)
 create mode 100644 tetras_extraction/script/src/test.py

diff --git a/mcli b/mcli
index ea8a5c24..c9d2fde2 100755
--- a/mcli
+++ b/mcli
@@ -55,12 +55,18 @@ setup
     Initialize Python environment required by extractors
 setup-debug
     (Re)create .env file used by the Python debugger launch config
-extract-rdf
-    Run the extractor to generate RDF from text sources
-extract-mp3 [-y|--yes-overwrite]
-    Extract audio streams from all Flash SWF files
+extract
+    Run the extract stage, to generate RDF from text sources
+transform
+    Run the transform stage, to complete and clean-up the RDF data
+export
+    Run the export stage, to generate Macao-Hugo content pages
 convert
     Run the full conversion process (extract -> transform -> export)
+test
+    Run simple tests on the extracted RDF data
+extract-mp3 [-y|--yes-overwrite]
+    Extract audio streams from all Flash SWF files
 help
     Print this help and exit
 EOF
@@ -264,6 +270,10 @@ export)
     activate_venv
     python "$SCRIPTS_DIR/src/export.py"
     ;;
+test)
+    activate_venv
+    python "$SCRIPTS_DIR/src/test.py"
+    ;;
 extract-mp3)
     for audio_file in "$SOURCES_DIR/contenu/media/"*.swf; do
         "$SCRIPTS_DIR/extract_mp3.sh" "$@" --output-dir "$RESULTS_DIR/audio" "$audio_file"
diff --git a/tetras_extraction/script/.vscode/launch.json b/tetras_extraction/script/.vscode/launch.json
index a98355be..a42deee6 100644
--- a/tetras_extraction/script/.vscode/launch.json
+++ b/tetras_extraction/script/.vscode/launch.json
@@ -35,6 +35,14 @@
             "program": "src/main.py",
             "console": "integratedTerminal",
             "envFile": "${workspaceFolder}/.env"
+        },
+        {
+            "name": "Python: test",
+            "type": "debugpy",
+            "request": "launch",
+            "program": "src/test.py",
+            "console": "integratedTerminal",
+            "envFile": "${workspaceFolder}/.env"
         }
     ]
 }
\ No newline at end of file
diff --git a/tetras_extraction/script/.vscode/settings.json b/tetras_extraction/script/.vscode/settings.json
index c3434325..60740715 100644
--- a/tetras_extraction/script/.vscode/settings.json
+++ b/tetras_extraction/script/.vscode/settings.json
@@ -9,5 +9,15 @@
     },
     "editor.formatOnSave": true,
     "python.analysis.autoImportCompletions": true,
-    "python.analysis.typeCheckingMode": "basic"
+    "python.analysis.typeCheckingMode": "basic",
+    "python.envFile": "${workspaceFolder}/.env",
+    "python.testing.unittestArgs": [
+        "-v",
+        "-s",
+        "./src",
+        "-p",
+        "test*.py"
+    ],
+    "python.testing.pytestEnabled": false,
+    "python.testing.unittestEnabled": true
 }
\ No newline at end of file
diff --git a/tetras_extraction/script/src/test.py b/tetras_extraction/script/src/test.py
new file mode 100644
index 00000000..04982a5e
--- /dev/null
+++ b/tetras_extraction/script/src/test.py
@@ -0,0 +1,79 @@
+import unittest
+
+from rdflib import Graph
+
+import extract
+import transform
+from common import *
+
+
+class TestObjectCount(unittest.TestCase):
+    def __init__(self, methodName):  # pyright: ignore[reportMissingParameterType]
+        super().__init__()
+        # Run extraction
+        extract.main()
+        transform.main()
+        # Load graph
+        self.graph = Graph()
+        self.graph.bind("", NS)
+        self.graph.parse(RDF_FULL_FILE)
+
+    def runTest(self):
+        # Modules
+        self.assertCount(
+            """SELECT DISTINCT ?mod WHERE {
+                ?mod a :Module .
+
+                ?mod :id ?id .
+                ?mod :index ?index .
+                ?mod :titre ?titre .
+                
+                MINUS { ?mod a :SousPartie }
+                }""",
+            (9, 6, 9 + 6),
+        )
+        # SousParties
+        self.assertCount(
+            """SELECT DISTINCT ?subj WHERE {
+                ?subj a :SousPartie .
+
+                ?subj :id ?id .
+                ?subj :index ?index .
+                ?subj :titre ?titre .
+                }""",
+            (18, 14, 18 + 14),
+        )
+        # Activités
+        self.assertCount(
+            """SELECT DISTINCT ?subj WHERE {
+                ?subj a :Activite .
+
+                ?subj :id ?id .
+                ?subj :index ?index .
+                ?subj :titre ?titre .
+                }""",
+            (132, 86, 132 + 86),
+        )
+
+    def assertCount(
+        self, query: str, expected_tuple: tuple[int | None, int | None, int | None]
+    ):
+        """Checks that the `query` produces the expected number of results.
+        The `expected_tuple` contains 3 values, for the macao_12 graph,
+        macao_3 graph, and both combined. A `None` value in the tuple ignores this check.
+        """
+        res = self.graph.query(query)
+        count = len(res)
+
+        versions = ("macao_12", "macao_3", "full")
+        try:
+            expected = expected_tuple[versions.index(MACAO_VERSION)]
+        except ValueError:
+            self.fail(f"Unknown version '{Context.version}'")
+        if expected is not None:
+            self.assertEqual(count, expected)
+        # else skip test
+
+
+if __name__ == "__main__":
+    unittest.main()
-- 
GitLab