Skip to content
Snippets Groups Projects
Commit 852c7d08 authored by Eliott Sammier's avatar Eliott Sammier
Browse files

Fix SOURCE_DIR issues depending on the Macao version

parent 4ac6fc38
No related branches found
No related tags found
1 merge request!1Main
...@@ -38,6 +38,15 @@ RDF_SCHEMA_FILE = env_path_or_rel_default("RDF_SCHEMA_FILE", "../../macao_schema ...@@ -38,6 +38,15 @@ RDF_SCHEMA_FILE = env_path_or_rel_default("RDF_SCHEMA_FILE", "../../macao_schema
RDF_FULL_FILE = env_path_or_rel_default("RDF_FULL_FILE", RESULT_DIR + "/macao_full.ttl") RDF_FULL_FILE = env_path_or_rel_default("RDF_FULL_FILE", RESULT_DIR + "/macao_full.ttl")
"""Path to the full RDF file, including schema, extracted content and inferences""" """Path to the full RDF file, including schema, extracted content and inferences"""
class Context:
"""Some global variables like paths are not constant, and may change at
runtime. This singleton holds the non-constant copies of such variables."""
version = MACAO_VERSION
source_dir = SOURCE_DIR
NS = Namespace("http://www.semanticweb.org/eliott/ontologies/2024/4/macao/") NS = Namespace("http://www.semanticweb.org/eliott/ontologies/2024/4/macao/")
"""The rdflib base Namespace for our ontology""" """The rdflib base Namespace for our ontology"""
......
...@@ -66,7 +66,7 @@ def parse_manifest(graph: Graph): ...@@ -66,7 +66,7 @@ def parse_manifest(graph: Graph):
modules hierarchy. modules hierarchy.
""" """
# Parse with lxml # Parse with lxml
root = etree.parse(source_dir + "/imsmanifest.xml", None).getroot() root = etree.parse(Context.source_dir + "/imsmanifest.xml", None).getroot()
org = ns_find(root, ".//organization") org = ns_find(root, ".//organization")
if org is None: if org is None:
raise ParseError("Missing node <organization> in manifest") raise ParseError("Missing node <organization> in manifest")
...@@ -122,9 +122,7 @@ def parse_manifest_rec( ...@@ -122,9 +122,7 @@ def parse_manifest_rec(
if index is not None: if index is not None:
add_index(graph, subject, index) add_index(graph, subject, index)
# Parse list of pages # Parse list of pages
extract_mosetp.parse_mosetp( extract_mosetp.parse_mosetp(graph, f"{Context.source_dir}/sco/{id}.html", id)
graph, f"{source_dir}/sco/{id}.html", id, f"{source_dir}/contenu/pages"
)
def compare_files(f1: str, f2: str): def compare_files(f1: str, f2: str):
...@@ -135,9 +133,6 @@ def compare_files(f1: str, f2: str): ...@@ -135,9 +133,6 @@ def compare_files(f1: str, f2: str):
) )
source_dir = SOURCE_DIR
def main(): def main():
g = create_graph() g = create_graph()
...@@ -150,9 +145,9 @@ def main(): ...@@ -150,9 +145,9 @@ def main():
print("", file=f) print("", file=f)
if MACAO_VERSION == "full": if MACAO_VERSION == "full":
for version in [12, 3]: # Run the parser once for each version, but with the same RDF graph
global source_dir for Context.version in ["12", "3"]:
source_dir = f"{SOURCE_DIR}/macao_{version}" Context.source_dir = f"{SOURCE_DIR}/macao_{Context.version}"
parse_manifest(g) parse_manifest(g)
else: else:
parse_manifest(g) parse_manifest(g)
......
...@@ -37,7 +37,7 @@ def generate_triples( ...@@ -37,7 +37,7 @@ def generate_triples(
graph.add((mosetp, NS["contientActivite"], page)) graph.add((mosetp, NS["contientActivite"], page))
def parse_mosetp(graph: Graph, filepath: str, id: str, pages_dir: str): def parse_mosetp(graph: Graph, filepath: str, id: str):
"""Parse a subsection (`MosEtp###.html`) into the `graph`, creating """Parse a subsection (`MosEtp###.html`) into the `graph`, creating
the child pages. the child pages.
...@@ -64,7 +64,9 @@ def parse_mosetp(graph: Graph, filepath: str, id: str, pages_dir: str): ...@@ -64,7 +64,9 @@ def parse_mosetp(graph: Graph, filepath: str, id: str, pages_dir: str):
page_id = m.group(2) page_id = m.group(2)
generate_triples(graph, id, page_id, m.group(1), index) generate_triples(graph, id, page_id, m.group(1), index)
# Call the page parser # Call the page parser
parse_page(graph, f"{pages_dir}/{page_id}.html", page_id) parse_page(
graph, f"{Context.source_dir}/contenu/pages/{page_id}.html", page_id
)
else: else:
log.warning(f"skipping page: regex found no match on line '{line}'") log.warning(f"skipping page: regex found no match on line '{line}'")
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment