From d868282fdf3df183a397ae41d3a39cc61bf93074 Mon Sep 17 00:00:00 2001
From: eliott <eliott.sammier@tetras-libre.fr>
Date: Thu, 6 Jun 2024 10:22:53 +0200
Subject: [PATCH] RegexParser now adds activity type to the graph

---
 tetras_extraction/macao_12/script/extract_page.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tetras_extraction/macao_12/script/extract_page.py b/tetras_extraction/macao_12/script/extract_page.py
index 86f9a782..dc3d1712 100644
--- a/tetras_extraction/macao_12/script/extract_page.py
+++ b/tetras_extraction/macao_12/script/extract_page.py
@@ -39,6 +39,10 @@ class Page:
 
 
 class RegexParser:
+    def __init__(self, graph: Graph, act_id: str) -> None:
+        self.graph = graph
+        self.act_id = act_id
+
     def parse(self, js, output=sys.stdout):
         # Find function declaration and only keep code after it
         func_split = re.split(r"\s*?function entrerDonnees\(\s*?\)\s*?{", js)
@@ -47,6 +51,7 @@ class RegexParser:
         body = func_split[1]
 
         activity_type, activity_var_name = self.parse_activity_constructor(body)
+        self.graph.add((NS[self.act_id], RDF.type, NS[activity_type]))
         print(activity_type, file=output)
 
         for line in body.splitlines():
@@ -295,7 +300,7 @@ def parse_page(graph: Graph, filepath: str, id: str):
     js = "\n".join((s.text_content() for s in scripts))
 
     # Try different parsers, each writing to a different file to compare their results
-    for parser in [XpathParser(), MatchParser(graph, id), RegexParser()]:
+    for parser in [XpathParser(), MatchParser(graph, id), RegexParser(graph, id)]:
         with open(f"/tmp/{str(parser)}.txt", "a") as f:
             print(f"{id:8}", end="", file=f)
             try:
-- 
GitLab