diff --git a/tetras_extraction/macao_schema.ttl b/tetras_extraction/macao_schema.ttl deleted file mode 100644 index db69207ecafeace6034ce0dcf9cbd729d5dd4c19..0000000000000000000000000000000000000000 --- a/tetras_extraction/macao_schema.ttl +++ /dev/null @@ -1,282 +0,0 @@ -@prefix : <http://www.semanticweb.org/eliott/ontologies/2024/4/macao/> . -@prefix owl: <http://www.w3.org/2002/07/owl#> . -@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . -@prefix xml: <http://www.w3.org/XML/1998/namespace#> . -@prefix xsd: <http://www.w3.org/2001/XMLSchema#> . -@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . -@base <http://www.semanticweb.org/eliott/ontologies/2024/4/macao/> . - -<http://www.semanticweb.org/eliott/ontologies/2024/4/macao> rdf:type owl:Ontology ; - rdfs:label "macao-schema"@fr ; - owl:versionInfo 1.1 . - -################################################################# -# Object Properties -################################################################# - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/aReponse -:aReponse rdf:type owl:ObjectProperty ; - rdfs:range :Reponse . - - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/aReponseCorrecte -:aReponseCorrecte rdf:type owl:ObjectProperty ; - rdfs:domain :Exercice ; - rdfs:range :Reponse . - - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/aReponseIncorrecte -:aReponseIncorrecte rdf:type owl:ObjectProperty ; - rdfs:domain :Exercice ; - rdfs:range :Reponse . - - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/contenuDans -:contenuDans rdf:type owl:ObjectProperty ; - owl:inverseOf :contient . - - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/contient -:contient rdf:type owl:ObjectProperty , - owl:TransitiveProperty ; - rdfs:domain :MacaoObject ; - rdfs:range :MacaoObject . - - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/contientActivite -:contientActivite rdf:type owl:ObjectProperty ; - rdfs:subPropertyOf :contient ; - rdfs:domain :SousPartie ; - rdfs:range :Activite . - - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/contientModule -:contientModule rdf:type owl:ObjectProperty ; - rdfs:subPropertyOf :contient ; - rdfs:domain :Module ; - rdfs:range :Module . - - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/contientSousPartie -:contientSousPartie rdf:type owl:ObjectProperty ; - rdfs:subPropertyOf :contientModule ; - rdfs:domain :Module ; - rdfs:range :SousPartie . - - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/incluseDans -:incluseDans rdf:type owl:ObjectProperty ; - owl:inverseOf :inclutRessource ; - rdfs:domain :MacaoRessource ; - rdfs:range :Activite . - - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/inclutRessource -:inclutRessource rdf:type owl:ObjectProperty ; - rdfs:domain :Activite ; - rdfs:range :MacaoRessource . - - -################################################################# -# Data properties -################################################################# - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao#html_md -:html_md rdf:type owl:DatatypeProperty . - - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/cheminFichier -:cheminFichier rdf:type owl:DatatypeProperty ; - rdfs:domain :MacaoRessource ; - rdfs:range xsd:anyURI . - - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/commentaireInfo -:commentaireInfo rdf:type owl:DatatypeProperty ; - rdfs:domain :Activite . - - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/commentaireInfo_html -:commentaireInfo_html rdf:type owl:DatatypeProperty ; - rdfs:subPropertyOf :commentaireInfo ; - rdfs:range rdf:XMLLiteral . - - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/commentaireInfo_md -:commentaireInfo_md rdf:type owl:DatatypeProperty ; - rdfs:subPropertyOf :commentaireInfo . - - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/commentaireSucces -:commentaireSucces rdf:type owl:DatatypeProperty ; - rdfs:domain :Activite . - - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/commentaireSucces_html -:commentaireSucces_html rdf:type owl:DatatypeProperty ; - rdfs:subPropertyOf :commentaireSucces ; - rdfs:range rdf:XMLLiteral . - - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/commentaireSucces_md -:commentaireSucces_md rdf:type owl:DatatypeProperty ; - rdfs:subPropertyOf :commentaireSucces . - - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/commentaireSugg -:commentaireSugg rdf:type owl:DatatypeProperty ; - rdfs:domain :Activite , - :Reponse . - - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/commentaireSugg_html -:commentaireSugg_html rdf:type owl:DatatypeProperty ; - rdfs:subPropertyOf :commentaireSugg ; - rdfs:range rdf:XMLLiteral . - - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/commentaireSugg_md -:commentaireSugg_md rdf:type owl:DatatypeProperty ; - rdfs:subPropertyOf :commentaireSugg . - - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/correct -:correct rdf:type owl:DatatypeProperty ; - rdfs:range xsd:boolean . - - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/description_md -:description_md rdf:type owl:DatatypeProperty . - - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/html -:html rdf:type owl:DatatypeProperty ; - rdfs:range rdf:XMLLiteral . - - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/id -:id rdf:type owl:DatatypeProperty ; - rdfs:subPropertyOf owl:topDataProperty ; - rdf:type owl:FunctionalProperty ; - rdfs:domain :MacaoContenu ; - rdfs:range xsd:positiveInteger . - - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/titre -:titre rdf:type owl:DatatypeProperty , - owl:FunctionalProperty ; - rdfs:domain :MacaoContenu ; - rdfs:range rdf:PlainLiteral . - - -################################################################# -# Classes -################################################################# - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/Activite -:Activite rdf:type owl:Class ; - rdfs:subClassOf :MacaoContenu . - - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/Cours -:Cours rdf:type owl:Class ; - rdfs:subClassOf :Activite . - - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/Exercice -:Exercice rdf:type owl:Class ; - rdfs:subClassOf :Activite . - - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/ExerciceGD -:ExerciceGD rdf:type owl:Class ; - rdfs:subClassOf :Exercice . - - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/ExerciceQC -:ExerciceQC rdf:type owl:Class ; - rdfs:subClassOf :Exercice . - - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/ExerciceQC_QCM -:ExerciceQC_QCM rdf:type owl:Class ; - rdfs:subClassOf :ExerciceQC . - - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/ExerciceQC_QCU -:ExerciceQC_QCU rdf:type owl:Class ; - rdfs:subClassOf :ExerciceQC . - - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/ExerciceQM -:ExerciceQM rdf:type owl:Class ; - rdfs:subClassOf :Exercice . - - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/ExerciceTAT -:ExerciceTAT rdf:type owl:Class ; - rdfs:subClassOf :Exercice . - - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/FlashObject -:FlashObject rdf:type owl:Class ; - rdfs:subClassOf :MacaoRessource . - - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/Image -:Image rdf:type owl:Class ; - rdfs:subClassOf :MacaoRessource . - - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/MacaoContenu -:MacaoContenu rdf:type owl:Class ; - rdfs:subClassOf :MacaoObject . - - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/MacaoObject -:MacaoObject rdf:type owl:Class . - - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/MacaoRessource -:MacaoRessource rdf:type owl:Class ; - rdfs:subClassOf :MacaoObject . - - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/MacaoRoot -:MacaoRoot rdf:type owl:Class . - - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/Module -:Module rdf:type owl:Class ; - rdfs:subClassOf :MacaoContenu . - - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/Reponse -:Reponse rdf:type owl:Class ; - rdfs:subClassOf :MacaoContenu . - - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/SimpleFlash -:SimpleFlash rdf:type owl:Class ; - rdfs:subClassOf :FlashObject . - - -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/SousPartie -:SousPartie rdf:type owl:Class ; - rdfs:subClassOf :Module . - - -################################################################# -# General axioms -################################################################# - -[ rdf:type owl:AllDisjointClasses ; - owl:members ( :ExerciceGD - :ExerciceQC - :ExerciceQM - :ExerciceTAT - ) -] . - - -### Generated by the OWL API (version 4.5.29.2024-05-13T12:11:03Z) https://github.com/owlcs/owlapi diff --git a/tetras_extraction/schemes/macao-manual-edition.ttl b/tetras_extraction/schemes/macao-manual-edition.ttl new file mode 100644 index 0000000000000000000000000000000000000000..0f1f2a13b66e861b304442f509bd78ce7bed2909 --- /dev/null +++ b/tetras_extraction/schemes/macao-manual-edition.ttl @@ -0,0 +1,21 @@ +# baseURI: http://www.semanticweb.org/eliott/ontologies/2024/10/macao-manual-edition +# imports: http://www.semanticweb.org/eliott/ontologies/2024/4/macao-full +# imports: http://www.semanticweb.org/eliott/ontologies/2024/4/macao/ +# prefix: macao-manual-edition + +@prefix macao: <http://www.semanticweb.org/eliott/ontologies/2024/4/macao/> . +@prefix macao-manual-edition: <http://www.semanticweb.org/eliott/ontologies/2024/10/macao-manual-edition#> . +@prefix owl: <http://www.w3.org/2002/07/owl#> . +@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . +@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . +@prefix xsd: <http://www.w3.org/2001/XMLSchema#> . + +<http://www.semanticweb.org/eliott/ontologies/2024/10/macao-manual-edition> + rdf:type owl:Ontology ; + owl:imports <http://www.semanticweb.org/eliott/ontologies/2024/4/macao-full> ; + owl:imports macao: ; + owl:versionInfo "Created with TopBraid Composer" ; +. +macao:pg262 + macao:description_md_manual_edition "TEST MANUAL EDITION" ; +. diff --git a/tetras_extraction/schemes/macao_schema.ttl b/tetras_extraction/schemes/macao_schema.ttl new file mode 100644 index 0000000000000000000000000000000000000000..02fb473a9f2944c83d28a5606f71717b5818f7f8 --- /dev/null +++ b/tetras_extraction/schemes/macao_schema.ttl @@ -0,0 +1,251 @@ +# baseURI: http://www.semanticweb.org/eliott/ontologies/2024/4/macao/ + +@prefix : <http://www.semanticweb.org/eliott/ontologies/2024/4/macao/> . +@prefix owl: <http://www.w3.org/2002/07/owl#> . +@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . +@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . +@prefix xml: <http://www.w3.org/XML/1998/namespace#> . +@prefix xsd: <http://www.w3.org/2001/XMLSchema#> . + +<http://www.semanticweb.org/eliott/ontologies/2024/4/macao> + rdf:type owl:Ontology ; + rdfs:label "macao-schema"@fr ; + owl:versionInfo 1.1 ; +. +:Activite + rdf:type owl:Class ; + rdfs:subClassOf :MacaoContenu ; +. +:Cours + rdf:type owl:Class ; + rdfs:subClassOf :Activite ; +. +:Exercice + rdf:type owl:Class ; + rdfs:subClassOf :Activite ; +. +:ExerciceGD + rdf:type owl:Class ; + rdfs:subClassOf :Exercice ; +. +:ExerciceQC + rdf:type owl:Class ; + rdfs:subClassOf :Exercice ; +. +:ExerciceQC_QCM + rdf:type owl:Class ; + rdfs:subClassOf :ExerciceQC ; +. +:ExerciceQC_QCU + rdf:type owl:Class ; + rdfs:subClassOf :ExerciceQC ; +. +:ExerciceQM + rdf:type owl:Class ; + rdfs:subClassOf :Exercice ; +. +:ExerciceTAT + rdf:type owl:Class ; + rdfs:subClassOf :Exercice ; +. +:FlashObject + rdf:type owl:Class ; + rdfs:subClassOf :MacaoRessource ; +. +:Image + rdf:type owl:Class ; + rdfs:subClassOf :MacaoRessource ; +. +:MacaoContenu + rdf:type owl:Class ; + rdfs:subClassOf :MacaoObject ; +. +:MacaoObject + rdf:type owl:Class ; +. +:MacaoRessource + rdf:type owl:Class ; + rdfs:subClassOf :MacaoObject ; +. +:MacaoRoot + rdf:type owl:Class ; +. +:Module + rdf:type owl:Class ; + rdfs:subClassOf :MacaoContenu ; +. +:Reponse + rdf:type owl:Class ; + rdfs:subClassOf :MacaoContenu ; +. +:SimpleFlash + rdf:type owl:Class ; + rdfs:subClassOf :FlashObject ; +. +:SousPartie + rdf:type owl:Class ; + rdfs:subClassOf :Module ; +. +:aReponse + rdf:type owl:ObjectProperty ; + rdfs:range :Reponse ; +. +:aReponseCorrecte + rdf:type owl:ObjectProperty ; + rdfs:domain :Exercice ; + rdfs:range :Reponse ; +. +:aReponseIncorrecte + rdf:type owl:ObjectProperty ; + rdfs:domain :Exercice ; + rdfs:range :Reponse ; +. +:cheminFichier + rdf:type owl:DatatypeProperty ; + rdfs:domain :MacaoRessource ; + rdfs:range xsd:anyURI ; +. +:commentaireInfo + rdf:type owl:DatatypeProperty ; + rdfs:domain :Activite ; +. +:commentaireInfo_html + rdf:type owl:DatatypeProperty ; + rdfs:range rdf:XMLLiteral ; + rdfs:subPropertyOf :commentaireInfo ; +. +:commentaireInfo_md + rdf:type owl:DatatypeProperty ; + rdfs:subPropertyOf :commentaireInfo ; +. +:commentaireInfo_md_manual_edition + rdf:type owl:DatatypeProperty ; + rdfs:subPropertyOf :commentaireInfo ; +. +:commentaireSucces + rdf:type owl:DatatypeProperty ; + rdfs:domain :Activite ; +. +:commentaireSucces_html + rdf:type owl:DatatypeProperty ; + rdfs:range rdf:XMLLiteral ; + rdfs:subPropertyOf :commentaireSucces ; +. +:commentaireSucces_md + rdf:type owl:DatatypeProperty ; + rdfs:subPropertyOf :commentaireSucces ; +. +:commentaireSucces_md_manual_edition + rdf:type owl:DatatypeProperty ; + rdfs:subPropertyOf :commentaireSucces ; +. +:commentaireSugg + rdf:type owl:DatatypeProperty ; + rdfs:domain :Activite ; + rdfs:domain :Reponse ; +. +:commentaireSugg_html + rdf:type owl:DatatypeProperty ; + rdfs:range rdf:XMLLiteral ; + rdfs:subPropertyOf :commentaireSugg ; +. +:commentaireSugg_md + rdf:type owl:DatatypeProperty ; + rdfs:subPropertyOf :commentaireSugg ; +. +:commentaireSugg_md_manual_edition + rdf:type owl:DatatypeProperty ; + rdfs:subPropertyOf :commentaireSugg ; +. +:contenuDans + rdf:type owl:ObjectProperty ; + owl:inverseOf :contient ; +. +:contient + rdf:type owl:ObjectProperty ; + rdf:type owl:TransitiveProperty ; + rdfs:domain :MacaoObject ; + rdfs:range :MacaoObject ; +. +:contientActivite + rdf:type owl:ObjectProperty ; + rdfs:domain :SousPartie ; + rdfs:range :Activite ; + rdfs:subPropertyOf :contient ; +. +:contientModule + rdf:type owl:ObjectProperty ; + rdfs:domain :Module ; + rdfs:range :Module ; + rdfs:subPropertyOf :contient ; +. +:contientSousPartie + rdf:type owl:ObjectProperty ; + rdfs:domain :Module ; + rdfs:range :SousPartie ; + rdfs:subPropertyOf :contientModule ; +. +:correct + rdf:type owl:DatatypeProperty ; + rdfs:range xsd:boolean ; +. +:description + rdf:type owl:DatatypeProperty ; + rdfs:domain :Activite ; + rdfs:domain :Reponse ; +. +:description_html + rdf:type owl:DatatypeProperty ; + rdfs:subPropertyOf :description ; +. +:description_md + rdf:type owl:DatatypeProperty ; + rdfs:subPropertyOf :description ; +. +:description_md_manual_edition + rdf:type owl:DatatypeProperty ; + rdfs:subPropertyOf :description ; +. +:html + rdf:type owl:DatatypeProperty ; + rdfs:range rdf:XMLLiteral ; +. +:html_md + rdf:type owl:DatatypeProperty ; +. +:html_md_manual_edition + rdf:type owl:DatatypeProperty ; +. +:id + rdf:type owl:DatatypeProperty ; + rdf:type owl:FunctionalProperty ; + rdfs:domain :MacaoContenu ; + rdfs:range xsd:positiveInteger ; + rdfs:subPropertyOf owl:topDataProperty ; +. +:incluseDans + rdf:type owl:ObjectProperty ; + rdfs:domain :MacaoRessource ; + rdfs:range :Activite ; + owl:inverseOf :inclutRessource ; +. +:inclutRessource + rdf:type owl:ObjectProperty ; + rdfs:domain :Activite ; + rdfs:range :MacaoRessource ; +. +:titre + rdf:type owl:DatatypeProperty ; + rdf:type owl:FunctionalProperty ; + rdfs:domain :MacaoContenu ; + rdfs:range rdf:PlainLiteral ; +. +[ + rdf:type owl:AllDisjointClasses ; + owl:members ( + :ExerciceGD + :ExerciceQC + :ExerciceQM + :ExerciceTAT + ) ; +]. diff --git a/tetras_extraction/script/src/common.py b/tetras_extraction/script/src/common.py index 3bbf4da2d87dde822b000db0b2b64441b434cc9a..99e499fd55fb579c0cf2db91deeb502103722eaa 100644 --- a/tetras_extraction/script/src/common.py +++ b/tetras_extraction/script/src/common.py @@ -33,7 +33,9 @@ RDF_CONTENT_FILE = env_path_or_rel_default( "RDF_CONTENT_FILE", RESULT_DIR + "/macao_content.ttl" ) """Path to the file containing the extracted content""" -RDF_SCHEMA_FILE = env_path_or_rel_default("RDF_SCHEMA_FILE", "../../macao_schema.ttl") +RDF_SCHEMA_FILE = env_path_or_rel_default("RDF_SCHEMA_FILE", "../../schemes/macao_schema.ttl") +"""Path to the file containing manual editions""" +RDF_MANUAL_EDITION_FILE = env_path_or_rel_default("RDF_SCHEMA_FILE", "../../schemes/macao-manual-edition.ttl") """Path to the schema file""" RDF_FULL_FILE = env_path_or_rel_default("RDF_FULL_FILE", RESULT_DIR + "/macao_full.ttl") """Path to the full RDF file, including schema, extracted content and inferences""" diff --git a/tetras_extraction/script/src/transform.py b/tetras_extraction/script/src/transform.py index 084856ed643491fca9991d7c3a8cabb5befbfb0b..b94b6def6a208eef79d72c869d460057d24ea356 100644 --- a/tetras_extraction/script/src/transform.py +++ b/tetras_extraction/script/src/transform.py @@ -80,15 +80,22 @@ def transform_html(graph: Graph): # Process all html content through Markitdown for prop in html_properties: for t in graph.triples((None, NS[prop], None)) : + desc_str = prepareHTMLforMD(t[2]) tmp = tempfile.NamedTemporaryFile(suffix=".html") with open(tmp.name, 'w') as f: f.write(desc_str) mid = MarkItDown() - desc_md = postEditMD(mid.convert(tmp.name).text_content) + #desc_md = postEditMD(mid.convert(tmp.name).text_content) + desc_md = mid.convert(tmp.name).text_content l = list(t) l[2] = Literal(desc_md) l[1] = NS[prop+'_md'] + manual_edition_list = list(graph.triples((l[0], NS[prop+"_md_manual_edition"], None))) + if len(manual_edition_list)>=1: + print(manual_edition_list) + t_manual_edition = manual_edition_list[0] + l[2] = t_manual_edition[2] graph.add(tuple(l)) @@ -97,8 +104,10 @@ def main(): graph = Graph() graph.bind("", NS) graph.parse(RDF_SCHEMA_FILE) + graph.parse(RDF_MANUAL_EDITION_FILE) graph.parse(RDF_CONTENT_FILE) + transform_html(graph) # Apply property 'subClassOf' transitively, except on the "fake" class @@ -145,6 +154,8 @@ def main(): """, ) + # A + # ==> Save # Remove dependency on previous ontologies for ontology in graph.subjects(RDF.type, OWL.Ontology):