Skip to content
Snippets Groups Projects
Select Git revision
  • 90d3fcedfbb64a29562946b8c6fea9f3413fb32b
  • mui5-annotation-on-video-stable default
  • get_setter_canvasSizeInformations
  • fix-error-div-into-p
  • annotation-on-video-v2
  • detached
  • annotation-on-video-r17
  • mui5
  • mui5-react-18
  • jacob-test
  • annotation-on-video protected
  • master
  • test-antoinev1
  • 20-fetch-thumbnail-on-annotation
  • add-research-field
  • Save
  • add-plugin
  • 14-wip-no-seek-to
  • 14-bug-on-video-time-control
  • 9_wip_videotests
  • _upgrade_material_ui
  • latest-tetras-16
  • v3.3.0
  • v3.2.0
  • v3.1.1
  • v3.1.0
  • v3.0.0
  • v3.0.0-rc.7
  • v3.0.0-rc.6
  • v3.0.0-rc.5
  • v3.0.0-rc.4
  • v3.0.0-rc.3
  • v3.0.0-rc.2
  • v3.0.0-rc.1
  • v3.0.0-beta.10
  • v3.0.0-beta.9
  • v3.0.0-beta.8
  • v3.0.0-beta.7
  • v3.0.0-beta.6
  • v3.0.0-beta.5
  • v3.0.0-beta.3
41 results

ErrorDialog.js

Blame
  • extract.py 4.15 KiB
    from pprint import pprint
    from typing import Optional
    
    from lxml import etree
    from rdflib import RDFS, Graph, Literal, URIRef
    from rdflib.namespace import OWL, RDF
    
    import extract_mosetp
    from common import *
    
    # All common constants are in a dedicated module
    from constants import *
    
    schema_ontology_uri = URIRef(
        "http://www.semanticweb.org/eliott/ontologies/2024/4/macao"
    )
    content_ontology_uri = URIRef(
        "http://www.semanticweb.org/eliott/ontologies/2024/4/macao-content"
    )
    
    
    def dump_graph(g: Graph):
        """Print all triples in the graph"""
        for subj, pred, obj in g:
            print(subj, pred, obj)
    
    
    def create_graph() -> Graph:
        g = Graph()
        g.bind("", NS)  # Bind default namespace to empty prefix
        return g
    
    
    def export_graph(g: Graph):
        """Exports the graph to `OUT_FILE`, with OWL imports to include
        the schema file when loaded as an ontology
        """
        this_onto = content_ontology_uri
        g.add((this_onto, RDF.type, OWL.Ontology))
        g.add((this_onto, RDFS.label, Literal("macao-content")))
        g.add((this_onto, OWL.imports, schema_ontology_uri))
        g.serialize(OUT_FILE, "turtle", base=NS)
        print(f"Exported {len(g)} triples to {OUT_FILE}.")
    
    
    def ns_find(elem: etree.ElementBase, query: str):
        """Wrapper for lxml's `find()` function that automatically uses the default
        namespace for all unprefixed tag names.
        """
        return elem.find(query, namespaces={"": elem.nsmap[None]})
    
    
    def ns_findall(elem: etree.ElementBase, query: str):
        """Wrapper for lxml's `findall()` function that automatically uses the default
        namespace for all unprefixed tag names.
        """
        return elem.findall(query, namespaces={"": elem.nsmap[None]})
    
    
    def ns_localname(elem: etree.ElementBase) -> str:
        """Get an element's local name, stripping the namespace."""
        return etree.QName(elem).localname
    
    
    def parse_manifest(graph: Graph):
        """Parses the `imsmanifest.xml` and populates the `graph` with the
        modules hierarchy.
        """
        # Parse with lxml
        root = etree.parse(SOURCE_DIR + "/imsmanifest.xml", None).getroot()
        org = ns_find(root, ".//organization")
        # For all top-level modules
        for i, e in enumerate(ns_findall(org, "item")):
            module = NS[e.get("identifier")]
            parse_manifest_rec(graph, e)
            graph.add((module, RDFS.subClassOf, NS["MacaoRoot"]))
            add_index(graph, module, i)
    
    
    def parse_manifest_rec(
        graph: Graph,
        elem,
        parentResource: Optional[URIRef] = None,
        index: Optional[int] = None,
    ):
        """Parses a module `MosMod` from the manifest recursively, adding all its
        descendants to the `graph`
        :param parentResource: parent element in the tree, as a `rdflib` resource
        :param index: index (order) among sibling elements
        """
    
        # Get title and ID
        title: str = ns_find(elem, "title").text
        id: str = elem.get("identifier")
        # Declare RDF resource and simple properties
        subject = NS[id]
        graph.add((subject, RDF.type, OWL.NamedIndividual))
        add_title(graph, subject, title)
        if id.startswith("MosMod"):
            # It's a Module:
            graph.add((subject, RDF.type, NS["Module"]))
            # Add parent properties if necessary
            if parentResource is not None:
                graph.add((parentResource, NS["contientModule"], subject))
                graph.add((subject, RDFS.subClassOf, parentResource))
                if index is not None:
                    add_index(graph, subject, index)
            # Recurse on child items
            for child_index, child in enumerate(ns_findall(elem, "item")):
                parse_manifest_rec(graph, child, subject, child_index)
    
        else:
            # It's a Subsection
            graph.add((subject, RDF.type, NS["SousPartie"]))
            # Add parent properties if necessary
            if parentResource is not None:
                graph.add((parentResource, NS["contientSousPartie"], subject))
                graph.add((subject, RDFS.subClassOf, parentResource))
                if index is not None:
                    add_index(graph, subject, index)
            # Parse list of pages
            extract_mosetp.parse_mosetp(graph, f"{SOURCE_DIR}/sco/{id}.html", id)
    
    
    def main():
        g = create_graph()
        parse_manifest(g)
        export_graph(g)
    
    
    if __name__ == "__main__":
        main()