diff --git a/tetras_extraction/script/src/extract_mosetp.py b/tetras_extraction/script/src/extract_mosetp.py index 7ac4c9da01c965ca06fc90629b6c5874f598aa95..96ae78331894d787aeb1753fe00825a827d382f2 100644 --- a/tetras_extraction/script/src/extract_mosetp.py +++ b/tetras_extraction/script/src/extract_mosetp.py @@ -46,7 +46,12 @@ def parse_mosetp(graph: Graph, filepath: str, id: str): :param id: text identifier of the subsection """ # Prepare regex with capturing groups to match lines - regex = re.compile(r'.*new PageContenu\("(.*)", "(.*)", "(.*)", ""\);') + if Context.version == "macao_3": + regex = re.compile( + r'.*new PageContenu\("(.*)", "(.*)", "(.*)", "", "", "", "(.*)"\);' + ) + else: + regex = re.compile(r'.*new PageContenu\("(.*)", "(.*)", "(.*)", ""\);') # The lines we need are fairly basic, grep is much faster # than a Python HTML parser to filter them cmd_array = ["grep", "new PageContenu(", filepath]