From d868282fdf3df183a397ae41d3a39cc61bf93074 Mon Sep 17 00:00:00 2001 From: eliott <eliott.sammier@tetras-libre.fr> Date: Thu, 6 Jun 2024 10:22:53 +0200 Subject: [PATCH] RegexParser now adds activity type to the graph --- tetras_extraction/macao_12/script/extract_page.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tetras_extraction/macao_12/script/extract_page.py b/tetras_extraction/macao_12/script/extract_page.py index 86f9a782..dc3d1712 100644 --- a/tetras_extraction/macao_12/script/extract_page.py +++ b/tetras_extraction/macao_12/script/extract_page.py @@ -39,6 +39,10 @@ class Page: class RegexParser: + def __init__(self, graph: Graph, act_id: str) -> None: + self.graph = graph + self.act_id = act_id + def parse(self, js, output=sys.stdout): # Find function declaration and only keep code after it func_split = re.split(r"\s*?function entrerDonnees\(\s*?\)\s*?{", js) @@ -47,6 +51,7 @@ class RegexParser: body = func_split[1] activity_type, activity_var_name = self.parse_activity_constructor(body) + self.graph.add((NS[self.act_id], RDF.type, NS[activity_type])) print(activity_type, file=output) for line in body.splitlines(): @@ -295,7 +300,7 @@ def parse_page(graph: Graph, filepath: str, id: str): js = "\n".join((s.text_content() for s in scripts)) # Try different parsers, each writing to a different file to compare their results - for parser in [XpathParser(), MatchParser(graph, id), RegexParser()]: + for parser in [XpathParser(), MatchParser(graph, id), RegexParser(graph, id)]: with open(f"/tmp/{str(parser)}.txt", "a") as f: print(f"{id:8}", end="", file=f) try: -- GitLab