Skip to content
Snippets Groups Projects
Commit 27ce9e5c authored by Eliott Sammier's avatar Eliott Sammier
Browse files

Improve types & error handling

parent 25f3425a
Branches
No related tags found
No related merge requests found
......@@ -73,3 +73,10 @@ def add_index(g: Graph, subject: URIRef, index: int):
Literal(f"{index:02} | {name} | ") + title,
)
)
# Exceptions ###################################################################
class ParseError(Exception):
pass
......@@ -66,9 +66,11 @@ def parse_manifest(graph: Graph):
# Parse with lxml
root = etree.parse(SOURCE_DIR + "/imsmanifest.xml", None).getroot()
org = ns_find(root, ".//organization")
if org is None:
raise ParseError("Missing node <organization> in manifest")
# For all top-level modules
for i, e in enumerate(ns_findall(org, "item")):
module = NS[e.get("identifier")]
module = NS[e.get("identifier", default="None")]
parse_manifest_rec(graph, e)
graph.add((module, RDFS.subClassOf, NS["MacaoRoot"]))
add_index(graph, module, i)
......@@ -76,7 +78,7 @@ def parse_manifest(graph: Graph):
def parse_manifest_rec(
graph: Graph,
elem,
elem: etree._Element,
parentResource: Optional[URIRef] = None,
index: Optional[int] = None,
):
......@@ -87,12 +89,13 @@ def parse_manifest_rec(
"""
# Get title and ID
title: str = ns_find(elem, "title").text
id: str = elem.get("identifier")
title = ns_find(elem, "title")
title = title.text if title is not None else "None" # safe default value
id: str = elem.get("identifier", default="None")
# Declare RDF resource and simple properties
subject = NS[id]
graph.add((subject, RDF.type, OWL.NamedIndividual))
add_title(graph, subject, title)
add_title(graph, subject, str(title))
if id.startswith("MosMod"):
# It's a Module:
graph.add((subject, RDF.type, NS["Module"]))
......
......@@ -38,10 +38,6 @@ class Page:
return str(self.__dict__)
class ParseError(Exception):
pass
class RegexParser:
def parse(self, js, output=sys.stdout):
# Find function declaration and only keep code after it
......@@ -293,7 +289,7 @@ def parse_page(graph: Graph, filepath: str, id: str):
# Collect all inline scripts (no external 'src') and join them in a
# block of JS code
# scripts = root.cssselect('script[type="text/javascript"]:not([src])')
scripts: List[_Element] = root.xpath(
scripts: List[html.HtmlElement] = root.xpath(
'/html/head/script[@type="text/javascript" and not(@src)]'
)
js = "\n".join((s.text_content() for s in scripts))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment