Skip to content
Snippets Groups Projects
Commit 27ce9e5c authored by Eliott Sammier's avatar Eliott Sammier
Browse files

Improve types & error handling

parent 25f3425a
No related branches found
No related tags found
No related merge requests found
...@@ -73,3 +73,10 @@ def add_index(g: Graph, subject: URIRef, index: int): ...@@ -73,3 +73,10 @@ def add_index(g: Graph, subject: URIRef, index: int):
Literal(f"{index:02} | {name} | ") + title, Literal(f"{index:02} | {name} | ") + title,
) )
) )
# Exceptions ###################################################################
class ParseError(Exception):
pass
...@@ -66,9 +66,11 @@ def parse_manifest(graph: Graph): ...@@ -66,9 +66,11 @@ def parse_manifest(graph: Graph):
# Parse with lxml # Parse with lxml
root = etree.parse(SOURCE_DIR + "/imsmanifest.xml", None).getroot() root = etree.parse(SOURCE_DIR + "/imsmanifest.xml", None).getroot()
org = ns_find(root, ".//organization") org = ns_find(root, ".//organization")
if org is None:
raise ParseError("Missing node <organization> in manifest")
# For all top-level modules # For all top-level modules
for i, e in enumerate(ns_findall(org, "item")): for i, e in enumerate(ns_findall(org, "item")):
module = NS[e.get("identifier")] module = NS[e.get("identifier", default="None")]
parse_manifest_rec(graph, e) parse_manifest_rec(graph, e)
graph.add((module, RDFS.subClassOf, NS["MacaoRoot"])) graph.add((module, RDFS.subClassOf, NS["MacaoRoot"]))
add_index(graph, module, i) add_index(graph, module, i)
...@@ -76,7 +78,7 @@ def parse_manifest(graph: Graph): ...@@ -76,7 +78,7 @@ def parse_manifest(graph: Graph):
def parse_manifest_rec( def parse_manifest_rec(
graph: Graph, graph: Graph,
elem, elem: etree._Element,
parentResource: Optional[URIRef] = None, parentResource: Optional[URIRef] = None,
index: Optional[int] = None, index: Optional[int] = None,
): ):
...@@ -87,12 +89,13 @@ def parse_manifest_rec( ...@@ -87,12 +89,13 @@ def parse_manifest_rec(
""" """
# Get title and ID # Get title and ID
title: str = ns_find(elem, "title").text title = ns_find(elem, "title")
id: str = elem.get("identifier") title = title.text if title is not None else "None" # safe default value
id: str = elem.get("identifier", default="None")
# Declare RDF resource and simple properties # Declare RDF resource and simple properties
subject = NS[id] subject = NS[id]
graph.add((subject, RDF.type, OWL.NamedIndividual)) graph.add((subject, RDF.type, OWL.NamedIndividual))
add_title(graph, subject, title) add_title(graph, subject, str(title))
if id.startswith("MosMod"): if id.startswith("MosMod"):
# It's a Module: # It's a Module:
graph.add((subject, RDF.type, NS["Module"])) graph.add((subject, RDF.type, NS["Module"]))
......
...@@ -38,10 +38,6 @@ class Page: ...@@ -38,10 +38,6 @@ class Page:
return str(self.__dict__) return str(self.__dict__)
class ParseError(Exception):
pass
class RegexParser: class RegexParser:
def parse(self, js, output=sys.stdout): def parse(self, js, output=sys.stdout):
# Find function declaration and only keep code after it # Find function declaration and only keep code after it
...@@ -293,7 +289,7 @@ def parse_page(graph: Graph, filepath: str, id: str): ...@@ -293,7 +289,7 @@ def parse_page(graph: Graph, filepath: str, id: str):
# Collect all inline scripts (no external 'src') and join them in a # Collect all inline scripts (no external 'src') and join them in a
# block of JS code # block of JS code
# scripts = root.cssselect('script[type="text/javascript"]:not([src])') # scripts = root.cssselect('script[type="text/javascript"]:not([src])')
scripts: List[_Element] = root.xpath( scripts: List[html.HtmlElement] = root.xpath(
'/html/head/script[@type="text/javascript" and not(@src)]' '/html/head/script[@type="text/javascript" and not(@src)]'
) )
js = "\n".join((s.text_content() for s in scripts)) js = "\n".join((s.text_content() for s in scripts))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment