Skip to content
Snippets Groups Projects
Commit 7364be88 authored by Eliott Sammier's avatar Eliott Sammier
Browse files

Remove unused JS parsers in favor of RegexParser

parent 55c63dcb
Branches
No related tags found
1 merge request!1Main
......@@ -148,14 +148,6 @@ def compare_files(f1: str, f2: str):
def main():
g = create_graph()
# Create or reset debug log files for all activity parsers, to compare their
# results afterwards
parsers = ("Match", "Xpath", "Regex")
logfiles = [f"/tmp/{p}Parser_debuglog.txt" for p in parsers]
for logfile in logfiles:
with open(logfile, "w") as f:
print("", file=f)
if MACAO_VERSION == "full":
# Run the parser once for each version, but with the same RDF graph
for Context.version in ["macao_12", "macao_3"]:
......@@ -163,12 +155,8 @@ def main():
parse_manifest(g)
else:
parse_manifest(g)
export_graph(g)
# Compare log files 2 by 2
compare_files(logfiles[0], logfiles[1])
compare_files(logfiles[0], logfiles[2])
compare_files(logfiles[1], logfiles[2])
export_graph(g)
if __name__ == "__main__":
......
import re
from abc import abstractmethod
from dataclasses import dataclass
from typing import Any, Callable
from typing import Any
import esprima as es
from lxml import etree, html
from lxml import html
from lxml.etree import _Element
from lxml.html import HtmlElement
from rdflib import RDF, Graph, Literal
......@@ -428,210 +427,6 @@ class RegexParser(JSParser):
raise exception from e
class XpathParser(JSParser):
"""A parser for the JS portion of an activity, that uses XPath to query
an XML representation of Esprima's abstract syntax tree (AST)"""
# XPath requests pre-compiled as functions
request_function = etree.XPath(
'//FunctionDeclaration[id/Identifier[@name="entrerDonnees"]]'
)
request_index_and_values = etree.XPath(
'*//VariableDeclarator[id//*[@name="nr"]]/init/Literal | *//AssignmentExpression//Identifier[starts-with(@name,"CODE_")]'
)
request_constructor_id = etree.XPath(
'*//NewExpression/callee/Identifier[@name="Cours" or starts-with(@name, "Exercice")]'
)
def __init__(self) -> None:
self.fun: Any
"""AST element corresponding to the function we're interested in.
Initialised in `self.parse()`."""
@override
def parse(self, js: str) -> Activity:
jstree: Any = es.parseScript(js, None)
# Convert Esprima object tree to XML etree
xml = self.to_xml(jstree.toDict(), "jstree")
try:
self.fun = self.request_function(xml)[0]
activity = self._parse_activity_type()
if isinstance(activity, ExerciceQC):
self._parse_qc_answers(activity)
return activity
except Exception as e:
raise ParseError(e)
def _parse_activity_type(self) -> Activity:
constructor_id = self.request_constructor_id(self.fun)[0]
match constructor_id.get("name"):
case "ExerciceQC":
arg = constructor_id.xpath("../../arguments/Literal/@value")[0]
if arg == "QCM":
return ExerciceQC(is_qcm=True)
elif arg == "QCU":
return ExerciceQC()
else:
raise ParseError(f"ExerciceQC: invalid argument '{arg}'")
case other:
return Activity.from_typename(other)
def _parse_qc_answers(self, activity: ExerciceQC) -> None:
"""Parse the correct answers for a QC activity"""
indexes_and_values = self.request_index_and_values(self.fun)
choice_id = "0"
for e in indexes_and_values:
value = e.xpath("@value")
if len(value) != 0:
# "index line"
choice_id = value[0]
else:
# "correct" or "incorrect" line
activity.set_correct(choice_id, e.get("name") == "CODE_V")
def to_xml(self, obj: Any, tag_name: str | None = None):
"""Recursively convert an object structure to an XML `ElementTree`.
Structures are expected to be Python dictionaries.
Converting a dictionary produces a tag named after the "type" attribute (if present).
- A primitive attribute (i.e. not list nor dict) becomes a tag attribute.
- A list attribute becomes a tag with its contents as sub-tags.
- A dictionary attribute becomes a tag (named like the attribute's key)
containing a sub-tag for the dictionary itself
"""
if isinstance(obj, dict):
# Dictionary (or object):
# - if it has a "type" key, the dict represents an object -> use its value as the tag name
# - if a tag_name is specified as well, it's probably important (like an attribute name),
# so we keep both, as 2 nested tags (tag_name for the outer tag, type for the inner tag)
inner_tag = None
outer_tag = None
has_inner = "type" in obj.keys()
if has_inner:
inner_tag = etree.Element(obj["type"], None, None)
else:
inner_tag = etree.Element("_dict", None, None)
if tag_name is not None:
outer_tag = etree.Element(tag_name)
if has_inner:
outer_tag.append(inner_tag)
else:
inner_tag = outer_tag
else:
outer_tag = inner_tag
# Recurse on dictionary items
for key, val in obj.items():
if key != "type": # exception for 'type', handled as attribute
if isinstance(val, (list, dict)):
# Structured attributes become child tags
inner_tag.append(self.to_xml(val, key))
else:
# Primitive attributes become tag attributes
inner_tag.set(key, str(val))
return outer_tag
elif isinstance(obj, list):
tag_name = tag_name or "_list"
list_tag = etree.Element(tag_name)
for e in obj:
list_tag.append(self.to_xml(e))
return list_tag
else:
tag_name = tag_name or "_literal"
leaf_tag = etree.Element(tag_name)
leaf_tag.text = str(obj)
return leaf_tag
class MatchParser(JSParser):
"""A parser for the JS portion of an activity, that uses Python match statements
to navigate the abstract syntax tree (AST) produced by Esprima"""
def __init__(self, graph: Graph, act_id: str) -> None:
self.graph = graph
self.act_id = act_id
self.activity: Activity | None = None
@override
def parse(self, js: str) -> Activity:
jstree = es.parseScript(js, None)
# Try to match our template with one of the top-level statements
for statement in jstree.body:
self.match_function(statement.toDict())
if self.activity is not None:
return self.activity
else:
raise ParseError("No activity constructor found")
def match_constructor_call(self, new_expr: dict[str, Any]):
if self.activity is not None: # Ignore anything after the first match
return
match new_expr:
case {
"type": "NewExpression",
"callee": {
"type": "Identifier",
"name": typ,
},
"arguments": [*args],
}:
match typ:
case "Cours" | "ExerciceQM" | "ExerciceTAT" | "ExerciceGD":
self.activity = Activity.from_typename(typ)
case "ExerciceQC":
match args:
case [{"type": "Literal", "value": "QCU"}, *_]:
typ += "_QCU"
self.activity = ExerciceQC()
case [{"type": "Literal", "value": "QCM"}, *_]:
typ += "_QCM"
self.activity = ExerciceQC(is_qcm=True)
case _:
raise ParseError(
f"ExerciceQC: Invalid argument '{args}'"
)
case _:
raise ParseError(f"Unknown activity type '{typ}'")
case _:
pass
def match_function(self, func: dict[str, Any]):
"""Checks if `func` matches a function declaration named `entrerDonnees`,
and search its body if successful
"""
match func:
case {
"type": "FunctionDeclaration",
"id": {"name": "entrerDonnees"},
"body": {"type": "BlockStatement", "body": body},
}:
# Matched a function declaration and captured its `body` attr
for statement in body:
# Find constructor calls (e.g. `new Thing()`) recursively
recurse_prefix(statement, self.match_constructor_call)
case _:
pass
def recurse_prefix(t: Any, f: Callable[[Any], None]):
"""Depth-first prefixed recursion: calls a function on an object, then on
all its children (if it's a list or dictionary) recursively
:param t: The object
:param f: The function to call
"""
f(t)
if isinstance(t, list):
for e in t:
recurse_prefix(e, f)
elif isinstance(t, dict):
for e in t.values():
recurse_prefix(e, f)
def decode_answer_id(id: str):
"""
Decode an obfuscated answer ID, just like the `decodeX()` function
......@@ -681,13 +476,9 @@ def parse_page(graph: Graph, filepath: str, id: str):
js = "\n".join((s.text_content() for s in scripts))
activity = Activity()
# Try different parsers, each writing to a different file to compare their results
for parser in [XpathParser(), MatchParser(graph, id), RegexParser(graph, id)]:
with open(f"/tmp/{str(parser)}_debuglog.txt", "a") as f:
print(f"\n{id:8}", end="", file=f)
parser = RegexParser(graph, id)
try:
activity: Activity = parser.parse(js)
print(activity, end="", file=f)
except ParseError as e:
log.error(
f"{parser} -> {id}: Parsing error: {e}. Treating this as a generic Activity."
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment