diff --git a/mcli b/mcli index 83b7ac097c34802cc04cff40da87d53642cfc966..e7bd32cffa022f99e1964f04352efc650aec2c6d 100755 --- a/mcli +++ b/mcli @@ -51,6 +51,8 @@ extract-rdf Run the extractor to generate RDF from text sources extract-mp3 Extract audio streams from all Flash SWF files +convert + Run the full conversion process (extract -> transform -> export) help Print this help and exit EOF @@ -70,6 +72,13 @@ check_file() { return 1 } +activate_venv() { + if ! source "$SCRIPTS_DIR/venv/bin/activate"; then + echo "Python venv not found, did you run setup first?" >&2 + exit 1 + fi +} + # List MP3 streams in a file list_streams() { ffprobe -i "$1" 2>&1 | grep -E 'Stream.*Audio: mp3' @@ -206,13 +215,13 @@ count-all) setup) "$SCRIPTS_DIR/setup.sh" ;; +convert) + activate_venv + python "$SCRIPTS_DIR/src/main.py" + ;; extract-rdf) - if source "$SCRIPTS_DIR/venv/bin/activate"; then - python "$SCRIPTS_DIR/extract.py" - else - echo "Python venv not found, did you run setup first?" >&2 - exit 1 - fi + activate_venv + python "$SCRIPTS_DIR/src/extract.py" ;; extract-mp3) for audio_file in "$SOURCES_DIR/contenu/media/"*.swf; do @@ -221,10 +230,7 @@ extract-mp3) ;; shell) if [[ "$1" = "-p" || "$1" = "--pyenv" ]]; then - if ! source "$SCRIPTS_DIR/venv/bin/activate"; then - echo "Python venv not found, did you run setup first?" >&2 - exit 1 - fi + activate_venv fi export PATH="$PATH:$MACAO_ROOT" cd "$MACAO_ROOT" diff --git a/tetras_extraction/macao_12/script/.vscode/extensions.json b/tetras_extraction/macao_12/script/.vscode/extensions.json index 9e75ef5179cf4e5c6a85ca0508198fe49c76f4ed..85395aa10fee6f7c7313f1c191c5946cad24782b 100644 --- a/tetras_extraction/macao_12/script/.vscode/extensions.json +++ b/tetras_extraction/macao_12/script/.vscode/extensions.json @@ -1,6 +1,7 @@ { "recommendations": [ "detachhead.basedpyright", - "ms-python.black-formatter" + "ms-python.black-formatter", + "ms-python.isort" ] } \ No newline at end of file diff --git a/tetras_extraction/macao_12/script/.vscode/launch.json b/tetras_extraction/macao_12/script/.vscode/launch.json index b7b64a148c9cf104df2e87d4554d611ee2cb38b6..f228b954905c3ca04bfb6741245c88a6ecb9d868 100644 --- a/tetras_extraction/macao_12/script/.vscode/launch.json +++ b/tetras_extraction/macao_12/script/.vscode/launch.json @@ -8,7 +8,7 @@ "name": "Python Debugger: Current File", "type": "debugpy", "request": "launch", - "program": "extract.py", + "program": "src/extract.py", "console": "integratedTerminal" } ] diff --git a/tetras_extraction/macao_12/script/.vscode/settings.json b/tetras_extraction/macao_12/script/.vscode/settings.json index dcb1530c94cb29ed7c94b12c369885e87a04e689..c3434325b7625865a21b082eb6dac06611985a19 100644 --- a/tetras_extraction/macao_12/script/.vscode/settings.json +++ b/tetras_extraction/macao_12/script/.vscode/settings.json @@ -1,4 +1,13 @@ { - "python.analysis.typeCheckingMode": "basic", - "python.analysis.autoImportCompletions": true + "basedpyright.analysis.diagnosticMode": "workspace", + "basedpyright.analysis.exclude": [ + "venv/" + ], + "basedpyright.disableOrganizeImports": true, + "editor.codeActionsOnSave": { + "source.organizeImports": "always" + }, + "editor.formatOnSave": true, + "python.analysis.autoImportCompletions": true, + "python.analysis.typeCheckingMode": "basic" } \ No newline at end of file diff --git a/tetras_extraction/macao_12/script/README.md b/tetras_extraction/macao_12/script/README.md index cd15a089c46d84c2f078e0982f7076bf64e7b4dd..9cfa2903c9824b540d70a2b85b7510df06b35454 100644 --- a/tetras_extraction/macao_12/script/README.md +++ b/tetras_extraction/macao_12/script/README.md @@ -9,7 +9,7 @@ Ensuite pour chaque shell, il est nécessaire de `source venv/bin/activate` avan de pouvoir lancer Python. ```sh -python extract.py +python src/extract.py ``` `extract.py` est le point d'entrée de l'extracteur, qui produit une représentation RDF des contenus textuels extraits de Macao12. diff --git a/tetras_extraction/macao_12/script/common.py b/tetras_extraction/macao_12/script/src/common.py similarity index 86% rename from tetras_extraction/macao_12/script/common.py rename to tetras_extraction/macao_12/script/src/common.py index e9aa55a848fde8c39ba901a525540c5455a4d496..31fcc7d405bc17903b29ad15190d7b048ed0f2b5 100644 --- a/tetras_extraction/macao_12/script/common.py +++ b/tetras_extraction/macao_12/script/src/common.py @@ -3,8 +3,7 @@ from sys import stderr from typing import Any from lxml import html -from rdflib import Graph, Literal, RDFS, URIRef -from rdflib import Namespace +from rdflib import RDFS, Graph, Literal, Namespace, URIRef def env_path_or_rel_default(env_var: str, default: str) -> str: @@ -20,15 +19,17 @@ def env_path_or_rel_default(env_var: str, default: str) -> str: MODULE_DIR = path.dirname(path.realpath(__file__)) """Absolute path of this module's directory""" -MACAO_ROOT = env_path_or_rel_default("MACAO_ROOT", "../../..") +MACAO_ROOT = env_path_or_rel_default("MACAO_ROOT", "../../../..") """Path to the Macao root directory""" -SOURCE_DIR = env_path_or_rel_default("SOURCES_DIR", "../../../Basilisk/MACAO/macao_12") +SOURCE_DIR = env_path_or_rel_default( + "SOURCES_DIR", "../../../../Basilisk/MACAO/macao_12" +) """Path to the Macao source directory (i.e. the one with the manifest)""" -RESULT_DIR = env_path_or_rel_default("RESULTS_DIR", "../result") +RESULT_DIR = env_path_or_rel_default("RESULTS_DIR", "../../result") """Path to the output directory""" -RESULT_FILE = env_path_or_rel_default("RESULT_FILE", "../result/macao_content.ttl") +RESULT_FILE = env_path_or_rel_default("RESULT_FILE", RESULT_DIR + "/macao_content.ttl") """Path to the Turtle output file""" -SCHEMA_FILE = env_path_or_rel_default("SCHEMA_FILE", "../macao_schema.ttl") +SCHEMA_FILE = env_path_or_rel_default("SCHEMA_FILE", "../../macao_schema.ttl") """Path to the schema file""" NS = Namespace("http://www.semanticweb.org/eliott/ontologies/2024/4/macao/") diff --git a/tetras_extraction/macao_12/script/compare_rdf.py b/tetras_extraction/macao_12/script/src/compare_rdf.py similarity index 84% rename from tetras_extraction/macao_12/script/compare_rdf.py rename to tetras_extraction/macao_12/script/src/compare_rdf.py index 3b7775e84ead495178d3712cf76a966f8a8a725f..1676c466f433ab11e1f4f8f414f88d3e0a6736b8 100755 --- a/tetras_extraction/macao_12/script/compare_rdf.py +++ b/tetras_extraction/macao_12/script/src/compare_rdf.py @@ -5,18 +5,22 @@ isomorphic, and their differences otherwise. Implemented from https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html#module-rdflib.compare """ from sys import argv, stderr + from rdflib import Graph -from rdflib.compare import to_isomorphic, graph_diff +from rdflib.compare import graph_diff, to_isomorphic + def dump_nt_sorted(g: Graph): - for l in sorted(g.serialize(format='nt').splitlines()): - if l: print("\t"+l) + for l in sorted(g.serialize(format="nt").splitlines()): + if l: + print("\t" + l) + def main(): if len(argv) < 3: print(f"Usage: {argv[0]} <first_graph> <second_graph>", file=stderr) exit() - + g1 = Graph().parse(argv[1]) g2 = Graph().parse(argv[2]) iso1 = to_isomorphic(g1) @@ -33,5 +37,6 @@ def main(): print("In second:") dump_nt_sorted(in_second) + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/tetras_extraction/macao_12/script/extract.py b/tetras_extraction/macao_12/script/src/extract.py similarity index 100% rename from tetras_extraction/macao_12/script/extract.py rename to tetras_extraction/macao_12/script/src/extract.py diff --git a/tetras_extraction/macao_12/script/extract_mosetp.py b/tetras_extraction/macao_12/script/src/extract_mosetp.py similarity index 98% rename from tetras_extraction/macao_12/script/extract_mosetp.py rename to tetras_extraction/macao_12/script/src/extract_mosetp.py index 426a5c9701f28bf2f87ecffecaacf50e77c62693..3bb26f155952eec5c6f5b54b13266eb5926ce51c 100644 --- a/tetras_extraction/macao_12/script/extract_mosetp.py +++ b/tetras_extraction/macao_12/script/src/extract_mosetp.py @@ -1,8 +1,8 @@ -from os import path import re import subprocess +from os import path -from rdflib import Graph, Literal, OWL, RDF, RDFS +from rdflib import OWL, RDF, RDFS, Graph, Literal from common import * from extract_page import parse_page diff --git a/tetras_extraction/macao_12/script/extract_page.py b/tetras_extraction/macao_12/script/src/extract_page.py similarity index 99% rename from tetras_extraction/macao_12/script/extract_page.py rename to tetras_extraction/macao_12/script/src/extract_page.py index d69f0e4a3a9eefe80fe128396336f7086d3c1829..038f698f40c3296e5a1295410d1b0f878987cc1a 100644 --- a/tetras_extraction/macao_12/script/extract_page.py +++ b/tetras_extraction/macao_12/script/src/extract_page.py @@ -1,13 +1,13 @@ +import re from abc import abstractmethod from dataclasses import dataclass -import re from typing import Any, Callable import esprima as es from lxml import etree, html from lxml.etree import _Element from lxml.html import HtmlElement -from rdflib import Graph, Literal, RDF +from rdflib import RDF, Graph, Literal from typing_extensions import override from common import * diff --git a/tetras_extraction/macao_12/script/src/main.py b/tetras_extraction/macao_12/script/src/main.py new file mode 100644 index 0000000000000000000000000000000000000000..c276a20a5d61d9a65f488c886594e6e4a70427b6 --- /dev/null +++ b/tetras_extraction/macao_12/script/src/main.py @@ -0,0 +1,11 @@ +import extract + + +def main(): + extract.main() + # transform.main() + # export.main() + + +if __name__ == "__main__": + main()