diff --git a/mcli b/mcli index ea8a5c24d0ed81d1cc07e83901b1d302363061cf..c9d2fde2822729492a86639f225a96fdf2690fd3 100755 --- a/mcli +++ b/mcli @@ -55,12 +55,18 @@ setup Initialize Python environment required by extractors setup-debug (Re)create .env file used by the Python debugger launch config -extract-rdf - Run the extractor to generate RDF from text sources -extract-mp3 [-y|--yes-overwrite] - Extract audio streams from all Flash SWF files +extract + Run the extract stage, to generate RDF from text sources +transform + Run the transform stage, to complete and clean-up the RDF data +export + Run the export stage, to generate Macao-Hugo content pages convert Run the full conversion process (extract -> transform -> export) +test + Run simple tests on the extracted RDF data +extract-mp3 [-y|--yes-overwrite] + Extract audio streams from all Flash SWF files help Print this help and exit EOF @@ -264,6 +270,10 @@ export) activate_venv python "$SCRIPTS_DIR/src/export.py" ;; +test) + activate_venv + python "$SCRIPTS_DIR/src/test.py" + ;; extract-mp3) for audio_file in "$SOURCES_DIR/contenu/media/"*.swf; do "$SCRIPTS_DIR/extract_mp3.sh" "$@" --output-dir "$RESULTS_DIR/audio" "$audio_file" diff --git a/tetras_extraction/script/.vscode/launch.json b/tetras_extraction/script/.vscode/launch.json index a98355be67884e272ff9b05314624bc82389c740..a42deee659da338dcde90666b2fbe03b9624030e 100644 --- a/tetras_extraction/script/.vscode/launch.json +++ b/tetras_extraction/script/.vscode/launch.json @@ -35,6 +35,14 @@ "program": "src/main.py", "console": "integratedTerminal", "envFile": "${workspaceFolder}/.env" + }, + { + "name": "Python: test", + "type": "debugpy", + "request": "launch", + "program": "src/test.py", + "console": "integratedTerminal", + "envFile": "${workspaceFolder}/.env" } ] } \ No newline at end of file diff --git a/tetras_extraction/script/.vscode/settings.json b/tetras_extraction/script/.vscode/settings.json index c3434325b7625865a21b082eb6dac06611985a19..607407154f6fdafee02fb7432853db1aaf2bceda 100644 --- a/tetras_extraction/script/.vscode/settings.json +++ b/tetras_extraction/script/.vscode/settings.json @@ -9,5 +9,15 @@ }, "editor.formatOnSave": true, "python.analysis.autoImportCompletions": true, - "python.analysis.typeCheckingMode": "basic" + "python.analysis.typeCheckingMode": "basic", + "python.envFile": "${workspaceFolder}/.env", + "python.testing.unittestArgs": [ + "-v", + "-s", + "./src", + "-p", + "test*.py" + ], + "python.testing.pytestEnabled": false, + "python.testing.unittestEnabled": true } \ No newline at end of file diff --git a/tetras_extraction/script/src/test.py b/tetras_extraction/script/src/test.py new file mode 100644 index 0000000000000000000000000000000000000000..04982a5e4726a9f88e1404ab05c1d4dc68ec9ca5 --- /dev/null +++ b/tetras_extraction/script/src/test.py @@ -0,0 +1,79 @@ +import unittest + +from rdflib import Graph + +import extract +import transform +from common import * + + +class TestObjectCount(unittest.TestCase): + def __init__(self, methodName): # pyright: ignore[reportMissingParameterType] + super().__init__() + # Run extraction + extract.main() + transform.main() + # Load graph + self.graph = Graph() + self.graph.bind("", NS) + self.graph.parse(RDF_FULL_FILE) + + def runTest(self): + # Modules + self.assertCount( + """SELECT DISTINCT ?mod WHERE { + ?mod a :Module . + + ?mod :id ?id . + ?mod :index ?index . + ?mod :titre ?titre . + + MINUS { ?mod a :SousPartie } + }""", + (9, 6, 9 + 6), + ) + # SousParties + self.assertCount( + """SELECT DISTINCT ?subj WHERE { + ?subj a :SousPartie . + + ?subj :id ?id . + ?subj :index ?index . + ?subj :titre ?titre . + }""", + (18, 14, 18 + 14), + ) + # Activités + self.assertCount( + """SELECT DISTINCT ?subj WHERE { + ?subj a :Activite . + + ?subj :id ?id . + ?subj :index ?index . + ?subj :titre ?titre . + }""", + (132, 86, 132 + 86), + ) + + def assertCount( + self, query: str, expected_tuple: tuple[int | None, int | None, int | None] + ): + """Checks that the `query` produces the expected number of results. + The `expected_tuple` contains 3 values, for the macao_12 graph, + macao_3 graph, and both combined. A `None` value in the tuple ignores this check. + """ + res = self.graph.query(query) + count = len(res) + + versions = ("macao_12", "macao_3", "full") + try: + expected = expected_tuple[versions.index(MACAO_VERSION)] + except ValueError: + self.fail(f"Unknown version '{Context.version}'") + if expected is not None: + self.assertEqual(count, expected) + # else skip test + + +if __name__ == "__main__": + unittest.main()