Skip to content
Snippets Groups Projects
Commit d868282f authored by Eliott Sammier's avatar Eliott Sammier
Browse files

RegexParser now adds activity type to the graph

parent 27ce9e5c
No related branches found
No related tags found
No related merge requests found
......@@ -39,6 +39,10 @@ class Page:
class RegexParser:
def __init__(self, graph: Graph, act_id: str) -> None:
self.graph = graph
self.act_id = act_id
def parse(self, js, output=sys.stdout):
# Find function declaration and only keep code after it
func_split = re.split(r"\s*?function entrerDonnees\(\s*?\)\s*?{", js)
......@@ -47,6 +51,7 @@ class RegexParser:
body = func_split[1]
activity_type, activity_var_name = self.parse_activity_constructor(body)
self.graph.add((NS[self.act_id], RDF.type, NS[activity_type]))
print(activity_type, file=output)
for line in body.splitlines():
......@@ -295,7 +300,7 @@ def parse_page(graph: Graph, filepath: str, id: str):
js = "\n".join((s.text_content() for s in scripts))
# Try different parsers, each writing to a different file to compare their results
for parser in [XpathParser(), MatchParser(graph, id), RegexParser()]:
for parser in [XpathParser(), MatchParser(graph, id), RegexParser(graph, id)]:
with open(f"/tmp/{str(parser)}.txt", "a") as f:
print(f"{id:8}", end="", file=f)
try:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment