diff --git a/tetras_extraction/macao_12/script/extract_page.py b/tetras_extraction/macao_12/script/extract_page.py index 86f9a782f1d5de6351b4243d7b51c3209368c643..dc3d171265fc03cceb52556e5b84879b7619560d 100644 --- a/tetras_extraction/macao_12/script/extract_page.py +++ b/tetras_extraction/macao_12/script/extract_page.py @@ -39,6 +39,10 @@ class Page: class RegexParser: + def __init__(self, graph: Graph, act_id: str) -> None: + self.graph = graph + self.act_id = act_id + def parse(self, js, output=sys.stdout): # Find function declaration and only keep code after it func_split = re.split(r"\s*?function entrerDonnees\(\s*?\)\s*?{", js) @@ -47,6 +51,7 @@ class RegexParser: body = func_split[1] activity_type, activity_var_name = self.parse_activity_constructor(body) + self.graph.add((NS[self.act_id], RDF.type, NS[activity_type])) print(activity_type, file=output) for line in body.splitlines(): @@ -295,7 +300,7 @@ def parse_page(graph: Graph, filepath: str, id: str): js = "\n".join((s.text_content() for s in scripts)) # Try different parsers, each writing to a different file to compare their results - for parser in [XpathParser(), MatchParser(graph, id), RegexParser()]: + for parser in [XpathParser(), MatchParser(graph, id), RegexParser(graph, id)]: with open(f"/tmp/{str(parser)}.txt", "a") as f: print(f"{id:8}", end="", file=f) try: