diff --git a/.gitignore b/.gitignore index b79602f862f14c03ae8976b274eb09cf3e7b8e51..d992573007b93024a2f8bc2566afe06e7c0597d1 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,4 @@ +# JetBrains IDE config **/.idea +# Protégé catalog file (unreliable changes) +**/catalog-v001.xml diff --git a/tetras_extraction/macao_12/result/catalog-v001.xml b/tetras_extraction/macao_12/result/catalog-v001.xml deleted file mode 100644 index 5cbe3cf26a9fb157a705dc783f85fa69fdab420b..0000000000000000000000000000000000000000 --- a/tetras_extraction/macao_12/result/catalog-v001.xml +++ /dev/null @@ -1,4 +0,0 @@ -<?xml version="1.0" encoding="UTF-8" standalone="no"?> -<catalog prefer="public" xmlns="urn:oasis:names:tc:entity:xmlns:xml:catalog"> - <group id="Folder Repository, directory=, recursive=true, Auto-Update=true, version=2" prefer="public" xml:base=""/> -</catalog> diff --git a/tetras_extraction/macao_12/result/macao_content.ttl b/tetras_extraction/macao_12/result/macao_content.ttl index d2d6759007e7fa80cd688e0467bb0a09ac54e209..173cf3dbb63815fcce5108369546dd9d3fa29ec1 100644 --- a/tetras_extraction/macao_12/result/macao_content.ttl +++ b/tetras_extraction/macao_12/result/macao_content.ttl @@ -3,6 +3,10 @@ @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . @prefix xsd: <http://www.w3.org/2001/XMLSchema#> . +<http://www.semanticweb.org/eliott/ontologies/2024/4/macao-content> a owl:Ontology ; + rdfs:label "macao-content" ; + owl:imports <http://www.semanticweb.org/eliott/ontologies/2024/4/macao> . + :MosMod951 a :Module, owl:NamedIndividual ; rdfs:label "Présentation" ; @@ -2995,6 +2999,3 @@ :titre "> Utiliser les symboles phonétiques" ; rdfs:subClassOf :MosMod341 . -[] a owl:Ontology ; - owl:imports : . - diff --git a/tetras_extraction/macao_12/script/.gitignore b/tetras_extraction/macao_12/script/.gitignore index 92afa22fd84964797c24ac50c6a69227c2534b99..7e7d3fc524239e8d11aa5c39a47b38f04c96ccf1 100644 --- a/tetras_extraction/macao_12/script/.gitignore +++ b/tetras_extraction/macao_12/script/.gitignore @@ -1,2 +1,3 @@ +# Python cache and runtime env __pycache__/ venv/ diff --git a/tetras_extraction/macao_12/script/extract.py b/tetras_extraction/macao_12/script/extract.py index 94edf47344f00cd99a01259312b1356771f05339..45231b0aeef5b8dfbb2715ba81e56facc37c66c5 100644 --- a/tetras_extraction/macao_12/script/extract.py +++ b/tetras_extraction/macao_12/script/extract.py @@ -1,15 +1,22 @@ from pprint import pprint from typing import Optional -from rdflib import RDFS, Graph, BNode, URIRef, Literal -from rdflib.namespace import OWL, RDF + from lxml import etree +from rdflib import RDFS, Graph, Literal, URIRef +from rdflib.namespace import OWL, RDF + +import extract_mosetp +from common import * # All common constants are in a dedicated module from constants import * -from common import * -import extract_mosetp -import extract_page +schema_ontology_uri = URIRef( + "http://www.semanticweb.org/eliott/ontologies/2024/4/macao" +) +content_ontology_uri = URIRef( + "http://www.semanticweb.org/eliott/ontologies/2024/4/macao-content" +) def dump_graph(g: Graph): @@ -28,9 +35,10 @@ def export_graph(g: Graph): """Exports the graph to `OUT_FILE`, with OWL imports to include the schema file when loaded as an ontology """ - imports = BNode() - g.add((imports, RDF.type, OWL.Ontology)) - g.add((imports, OWL.imports, URIRef(NS))) + this_onto = content_ontology_uri + g.add((this_onto, RDF.type, OWL.Ontology)) + g.add((this_onto, RDFS.label, Literal("macao-content"))) + g.add((this_onto, OWL.imports, schema_ontology_uri)) g.serialize(OUT_FILE, "turtle", base=NS) print(f"Exported {len(g)} triples to {OUT_FILE}.")