diff --git a/tetras_extraction/macao_12/script/.vscode/launch.json b/tetras_extraction/macao_12/script/.vscode/launch.json index 633afdf8f599c12b70ddb819c901c7d0d0d9411d..15f1806c1d56e6db5b32929efc187b098e546414 100644 --- a/tetras_extraction/macao_12/script/.vscode/launch.json +++ b/tetras_extraction/macao_12/script/.vscode/launch.json @@ -5,7 +5,28 @@ "version": "0.2.0", "configurations": [ { - "name": "Python Debugger: Current File", + "name": "Python: extract", + "type": "debugpy", + "request": "launch", + "program": "src/extract.py", + "console": "integratedTerminal" + }, + { + "name": "Python: transform", + "type": "debugpy", + "request": "launch", + "program": "src/transform.py", + "console": "integratedTerminal" + }, + { + "name": "Python: export", + "type": "debugpy", + "request": "launch", + "program": "src/export.py", + "console": "integratedTerminal" + }, + { + "name": "Python: main", "type": "debugpy", "request": "launch", "program": "src/main.py", diff --git a/tetras_extraction/macao_12/script/src/extract.py b/tetras_extraction/macao_12/script/src/extract.py index 161018b0e91ab50c7653712692094fb4f2dc4859..e092c8149e143354043f9057d6ef94a5b5f30e3d 100644 --- a/tetras_extraction/macao_12/script/src/extract.py +++ b/tetras_extraction/macao_12/script/src/extract.py @@ -1,5 +1,4 @@ import filecmp -from re import sub from lxml import etree from rdflib import RDFS, Graph, Literal, URIRef diff --git a/tetras_extraction/macao_12/script/src/extract_mosetp.py b/tetras_extraction/macao_12/script/src/extract_mosetp.py index 13a1a691aa094d724fc2e79170ac34bac6602e61..31e6c6b969e5daf7093850533eae9915d9087686 100644 --- a/tetras_extraction/macao_12/script/src/extract_mosetp.py +++ b/tetras_extraction/macao_12/script/src/extract_mosetp.py @@ -1,6 +1,5 @@ import re import subprocess -from os import path from rdflib import OWL, RDF, RDFS, Graph, Literal @@ -46,7 +45,6 @@ def parse_mosetp(graph: Graph, filepath: str, id: str): :param filepath: path to the MosEtp file :param id: text identifier of the subsection """ - filename = path.basename(filepath) # Prepare regex with capturing groups to match lines regex = re.compile(r'.*new PageContenu\("(.*)", "(.*)", "(.*)", ""\);') # The lines we need are fairly basic, grep is much faster diff --git a/tetras_extraction/macao_12/script/src/transform.py b/tetras_extraction/macao_12/script/src/transform.py index 43147bb93035a8e9a314129f5d20a971d11e1a8d..387a59fd30614eb7413c865b100e18bbc08b13d6 100644 --- a/tetras_extraction/macao_12/script/src/transform.py +++ b/tetras_extraction/macao_12/script/src/transform.py @@ -19,6 +19,7 @@ def construct_while(g: Graph, query: str): while True: res = g.query(query) if res.graph is not None and len(res.graph - g) > 0: + print(f"Constructed {len(res.graph - g)} triples") g += res.graph else: break @@ -92,3 +93,7 @@ def main(): graph.add((onto_uri, RDF.type, OWL.Ontology)) graph.add((onto_uri, RDFS.label, Literal("macao-full"))) graph.serialize(RDF_FULL_FILE, "turtle", base=NS) + + +if __name__ == "__main__": + main()