diff --git a/tetras_extraction/macao_schema.ttl b/tetras_extraction/macao_schema.ttl index dd1d3d8dd4a35536d35d779043f97ca7b75dcfe3..db69207ecafeace6034ce0dcf9cbd729d5dd4c19 100644 --- a/tetras_extraction/macao_schema.ttl +++ b/tetras_extraction/macao_schema.ttl @@ -43,6 +43,13 @@ rdfs:range :MacaoObject . +### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/contientActivite +:contientActivite rdf:type owl:ObjectProperty ; + rdfs:subPropertyOf :contient ; + rdfs:domain :SousPartie ; + rdfs:range :Activite . + + ### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/contientModule :contientModule rdf:type owl:ObjectProperty ; rdfs:subPropertyOf :contient ; @@ -50,13 +57,6 @@ rdfs:range :Module . -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/contientPage -:contientActivite rdf:type owl:ObjectProperty ; - rdfs:subPropertyOf :contient ; - rdfs:domain :SousPartie ; - rdfs:range :Activite . - - ### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/contientSousPartie :contientSousPartie rdf:type owl:ObjectProperty ; rdfs:subPropertyOf :contientModule ; @@ -81,6 +81,10 @@ # Data properties ################################################################# +### http://www.semanticweb.org/eliott/ontologies/2024/4/macao#html_md +:html_md rdf:type owl:DatatypeProperty . + + ### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/cheminFichier :cheminFichier rdf:type owl:DatatypeProperty ; rdfs:domain :MacaoRessource ; @@ -89,21 +93,51 @@ ### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/commentaireInfo :commentaireInfo rdf:type owl:DatatypeProperty ; - rdfs:domain :Activite ; - rdfs:range rdf:XMLLiteral . + rdfs:domain :Activite . + + +### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/commentaireInfo_html +:commentaireInfo_html rdf:type owl:DatatypeProperty ; + rdfs:subPropertyOf :commentaireInfo ; + rdfs:range rdf:XMLLiteral . + + +### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/commentaireInfo_md +:commentaireInfo_md rdf:type owl:DatatypeProperty ; + rdfs:subPropertyOf :commentaireInfo . ### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/commentaireSucces :commentaireSucces rdf:type owl:DatatypeProperty ; - rdfs:domain :Activite ; - rdfs:range rdf:XMLLiteral . + rdfs:domain :Activite . + + +### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/commentaireSucces_html +:commentaireSucces_html rdf:type owl:DatatypeProperty ; + rdfs:subPropertyOf :commentaireSucces ; + rdfs:range rdf:XMLLiteral . + + +### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/commentaireSucces_md +:commentaireSucces_md rdf:type owl:DatatypeProperty ; + rdfs:subPropertyOf :commentaireSucces . ### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/commentaireSugg :commentaireSugg rdf:type owl:DatatypeProperty ; - rdfs:domain :Activite, - :Reponse ; - rdfs:range rdf:XMLLiteral . + rdfs:domain :Activite , + :Reponse . + + +### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/commentaireSugg_html +:commentaireSugg_html rdf:type owl:DatatypeProperty ; + rdfs:subPropertyOf :commentaireSugg ; + rdfs:range rdf:XMLLiteral . + + +### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/commentaireSugg_md +:commentaireSugg_md rdf:type owl:DatatypeProperty ; + rdfs:subPropertyOf :commentaireSugg . ### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/correct @@ -111,6 +145,10 @@ rdfs:range xsd:boolean . +### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/description_md +:description_md rdf:type owl:DatatypeProperty . + + ### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/html :html rdf:type owl:DatatypeProperty ; rdfs:range rdf:XMLLiteral . @@ -135,6 +173,11 @@ # Classes ################################################################# +### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/Activite +:Activite rdf:type owl:Class ; + rdfs:subClassOf :MacaoContenu . + + ### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/Cours :Cours rdf:type owl:Class ; rdfs:subClassOf :Activite . @@ -208,11 +251,6 @@ rdfs:subClassOf :MacaoContenu . -### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/Page -:Activite rdf:type owl:Class ; - rdfs:subClassOf :MacaoContenu . - - ### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/Reponse :Reponse rdf:type owl:Class ; rdfs:subClassOf :MacaoContenu . @@ -241,4 +279,4 @@ ] . -### Generated by the OWL API (version 4.5.26.2023-07-17T20:34:13Z) https://github.com/owlcs/owlapi +### Generated by the OWL API (version 4.5.29.2024-05-13T12:11:03Z) https://github.com/owlcs/owlapi diff --git a/tetras_extraction/script/requirements.txt b/tetras_extraction/script/requirements.txt index 7fbd5744524cc77f264ebd200d6ddb9518d811f3..f8f98c309a84e3274a3ac920658ed90f254bff3b 100644 --- a/tetras_extraction/script/requirements.txt +++ b/tetras_extraction/script/requirements.txt @@ -10,3 +10,4 @@ types-beautifulsoup4==4.12.0.20240511 types-html5lib==1.1.11.20240228 types-lxml==2024.4.14 typing_extensions==4.12.1 +pandoc diff --git a/tetras_extraction/script/src/transform.py b/tetras_extraction/script/src/transform.py index 01e4900aa1848c86fb4c500651fe4dcba6bc0951..672a4262c7b041a8bb81b6f42a1370e32c6c5b4e 100644 --- a/tetras_extraction/script/src/transform.py +++ b/tetras_extraction/script/src/transform.py @@ -1,4 +1,6 @@ -from rdflib import OWL, RDF, Graph +from rdflib import OWL, RDF, Graph, Literal +from lxml import html +import pandoc from common import * @@ -29,6 +31,41 @@ def construct_while(g: Graph, query: str): pass +def transform_html(graph: Graph): + html_properties = ['commentaireInfo', 'commentaireSucces', 'commentaireSugg', 'html', 'description'] + + + # 1st attempt : clean up audio <script> tags + """ + for t in graph.triples((None, NS['description'], None)): + desc_str = t[2] + tree = html.fragment_fromstring(desc_str) + for script in tree.findall(".//script"): + # `HtmlElement.drop_tree()` removes an element along with its + # children and text, however it has an interesting feature : + # the tail text is not removed, but instead joined to the previous + # sibling or parent automatically. + # This means that when we want to replace an element with string, + # we only need to prepend the string to the tail, and it will be + # inserted in the right place + script.tail = script.text + script.tail + script.drop_tree() + pass + """ + + # Process all html content through Pandoc + for prop in html_properties: + for t in graph.triples((None, NS[prop], None)) : + desc_str = t[2] + desc_doc = pandoc.read(desc_str, format="html") + desc_md = pandoc.write(desc_doc, format="markdown") + l = list(t) + l[2] = Literal(desc_md) + l[1] = NS[prop+'_md'] + graph.add(tuple(l)) + + + def main(): # Load graph graph = Graph() @@ -36,6 +73,8 @@ def main(): graph.parse(RDF_SCHEMA_FILE) graph.parse(RDF_CONTENT_FILE) + transform_html(graph) + # Apply property 'subClassOf' transitively, except on the "fake" class # hierarchy based on MacaoRoot log.info("Adding transitive subclasses...") diff --git a/tetras_extraction/script/templates/activite.rq b/tetras_extraction/script/templates/activite.rq index 485db41652eb5985bd95dbb0004709a7d753bc3b..440a4387a7b1c73a6b8602b66ff45569e4c84749 100644 --- a/tetras_extraction/script/templates/activite.rq +++ b/tetras_extraction/script/templates/activite.rq @@ -23,7 +23,7 @@ where { ?act a :Activite . ?act :id ?id . ?act :titre ?title . - ?act :description ?desc . + ?act :description_md ?desc . optional { # Turn the page index into a Hugo weight: increment and add a zero, to # leave room for adding new pages in-between later diff --git a/tetras_extraction/script/templates/qcu.rq b/tetras_extraction/script/templates/qcu.rq index e55e895f2481983d6fe845bbb667420ac8b1155c..c6fa414eed84df23c779e1c89707608e937e8148 100644 --- a/tetras_extraction/script/templates/qcu.rq +++ b/tetras_extraction/script/templates/qcu.rq @@ -31,7 +31,7 @@ where { optional { ?qcu :aReponse ?choice . ?choice :index ?choice_index . - ?choice :html ?choice_html + ?choice :html_md ?choice_html } optional { ?qcu :aReponse ?correct_choice . diff --git a/tetras_extraction/script/templates/quiz.rq b/tetras_extraction/script/templates/quiz.rq index 95f4af238877b0c023aa8e5bd47b2825db4aa965..76b5aba7291fbec410c6ddb5c110dec55ed63395 100644 --- a/tetras_extraction/script/templates/quiz.rq +++ b/tetras_extraction/script/templates/quiz.rq @@ -39,10 +39,10 @@ where { ?quiz a :Exercice . ?quiz :id ?id . optional { - ?quiz :commentaireSucces ?correct_comment . + ?quiz :commentaireSucces_md ?correct_comment . } optional { - ?quiz :commentaireSugg ?incorrect_comment . + ?quiz :commentaireSugg_md ?incorrect_comment . } } # FIXME: some exercises have multiple incorrect comments, almost identical,