Skip to content
Snippets Groups Projects
Commit 91030a07 authored by David Rouquet's avatar David Rouquet
Browse files

Merge branch 'daxid_html2md' into 'main'

Daxid html2md

See merge request !3
parents c0b2b8df ab2886ee
Branches
No related tags found
2 merge requests!4Main,!3Daxid html2md
......@@ -43,6 +43,13 @@
rdfs:range :MacaoObject .
### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/contientActivite
:contientActivite rdf:type owl:ObjectProperty ;
rdfs:subPropertyOf :contient ;
rdfs:domain :SousPartie ;
rdfs:range :Activite .
### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/contientModule
:contientModule rdf:type owl:ObjectProperty ;
rdfs:subPropertyOf :contient ;
......@@ -50,13 +57,6 @@
rdfs:range :Module .
### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/contientPage
:contientActivite rdf:type owl:ObjectProperty ;
rdfs:subPropertyOf :contient ;
rdfs:domain :SousPartie ;
rdfs:range :Activite .
### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/contientSousPartie
:contientSousPartie rdf:type owl:ObjectProperty ;
rdfs:subPropertyOf :contientModule ;
......@@ -81,6 +81,10 @@
# Data properties
#################################################################
### http://www.semanticweb.org/eliott/ontologies/2024/4/macao#html_md
:html_md rdf:type owl:DatatypeProperty .
### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/cheminFichier
:cheminFichier rdf:type owl:DatatypeProperty ;
rdfs:domain :MacaoRessource ;
......@@ -89,28 +93,62 @@
### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/commentaireInfo
:commentaireInfo rdf:type owl:DatatypeProperty ;
rdfs:domain :Activite ;
rdfs:domain :Activite .
### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/commentaireInfo_html
:commentaireInfo_html rdf:type owl:DatatypeProperty ;
rdfs:subPropertyOf :commentaireInfo ;
rdfs:range rdf:XMLLiteral .
### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/commentaireInfo_md
:commentaireInfo_md rdf:type owl:DatatypeProperty ;
rdfs:subPropertyOf :commentaireInfo .
### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/commentaireSucces
:commentaireSucces rdf:type owl:DatatypeProperty ;
rdfs:domain :Activite ;
rdfs:domain :Activite .
### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/commentaireSucces_html
:commentaireSucces_html rdf:type owl:DatatypeProperty ;
rdfs:subPropertyOf :commentaireSucces ;
rdfs:range rdf:XMLLiteral .
### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/commentaireSucces_md
:commentaireSucces_md rdf:type owl:DatatypeProperty ;
rdfs:subPropertyOf :commentaireSucces .
### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/commentaireSugg
:commentaireSugg rdf:type owl:DatatypeProperty ;
rdfs:domain :Activite ,
:Reponse ;
:Reponse .
### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/commentaireSugg_html
:commentaireSugg_html rdf:type owl:DatatypeProperty ;
rdfs:subPropertyOf :commentaireSugg ;
rdfs:range rdf:XMLLiteral .
### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/commentaireSugg_md
:commentaireSugg_md rdf:type owl:DatatypeProperty ;
rdfs:subPropertyOf :commentaireSugg .
### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/correct
:correct rdf:type owl:DatatypeProperty ;
rdfs:range xsd:boolean .
### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/description_md
:description_md rdf:type owl:DatatypeProperty .
### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/html
:html rdf:type owl:DatatypeProperty ;
rdfs:range rdf:XMLLiteral .
......@@ -135,6 +173,11 @@
# Classes
#################################################################
### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/Activite
:Activite rdf:type owl:Class ;
rdfs:subClassOf :MacaoContenu .
### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/Cours
:Cours rdf:type owl:Class ;
rdfs:subClassOf :Activite .
......@@ -208,11 +251,6 @@
rdfs:subClassOf :MacaoContenu .
### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/Page
:Activite rdf:type owl:Class ;
rdfs:subClassOf :MacaoContenu .
### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/Reponse
:Reponse rdf:type owl:Class ;
rdfs:subClassOf :MacaoContenu .
......@@ -241,4 +279,4 @@
] .
### Generated by the OWL API (version 4.5.26.2023-07-17T20:34:13Z) https://github.com/owlcs/owlapi
### Generated by the OWL API (version 4.5.29.2024-05-13T12:11:03Z) https://github.com/owlcs/owlapi
......@@ -10,3 +10,4 @@ types-beautifulsoup4==4.12.0.20240511
types-html5lib==1.1.11.20240228
types-lxml==2024.4.14
typing_extensions==4.12.1
pandoc
from rdflib import OWL, RDF, Graph
from rdflib import OWL, RDF, Graph, Literal
from lxml import html
import pandoc
from common import *
......@@ -29,6 +31,41 @@ def construct_while(g: Graph, query: str):
pass
def transform_html(graph: Graph):
html_properties = ['commentaireInfo', 'commentaireSucces', 'commentaireSugg', 'html', 'description']
# 1st attempt : clean up audio <script> tags
"""
for t in graph.triples((None, NS['description'], None)):
desc_str = t[2]
tree = html.fragment_fromstring(desc_str)
for script in tree.findall(".//script"):
# `HtmlElement.drop_tree()` removes an element along with its
# children and text, however it has an interesting feature :
# the tail text is not removed, but instead joined to the previous
# sibling or parent automatically.
# This means that when we want to replace an element with string,
# we only need to prepend the string to the tail, and it will be
# inserted in the right place
script.tail = script.text + script.tail
script.drop_tree()
pass
"""
# Process all html content through Pandoc
for prop in html_properties:
for t in graph.triples((None, NS[prop], None)) :
desc_str = t[2]
desc_doc = pandoc.read(desc_str, format="html")
desc_md = pandoc.write(desc_doc, format="markdown")
l = list(t)
l[2] = Literal(desc_md)
l[1] = NS[prop+'_md']
graph.add(tuple(l))
def main():
# Load graph
graph = Graph()
......@@ -36,6 +73,8 @@ def main():
graph.parse(RDF_SCHEMA_FILE)
graph.parse(RDF_CONTENT_FILE)
transform_html(graph)
# Apply property 'subClassOf' transitively, except on the "fake" class
# hierarchy based on MacaoRoot
log.info("Adding transitive subclasses...")
......
......@@ -23,7 +23,7 @@ where {
?act a :Activite .
?act :id ?id .
?act :titre ?title .
?act :description ?desc .
?act :description_md ?desc .
optional {
# Turn the page index into a Hugo weight: increment and add a zero, to
# leave room for adding new pages in-between later
......
......@@ -31,7 +31,7 @@ where {
optional {
?qcu :aReponse ?choice .
?choice :index ?choice_index .
?choice :html ?choice_html
?choice :html_md ?choice_html
}
optional {
?qcu :aReponse ?correct_choice .
......
......@@ -39,10 +39,10 @@ where {
?quiz a :Exercice .
?quiz :id ?id .
optional {
?quiz :commentaireSucces ?correct_comment .
?quiz :commentaireSucces_md ?correct_comment .
}
optional {
?quiz :commentaireSugg ?incorrect_comment .
?quiz :commentaireSugg_md ?incorrect_comment .
}
}
# FIXME: some exercises have multiple incorrect comments, almost identical,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment