Skip to content
Snippets Groups Projects
Commit 90495a98 authored by Eliott Sammier's avatar Eliott Sammier
Browse files

Parse & decode TAT choices for macao3

parent 229bc46a
No related branches found
No related tags found
1 merge request!5Resolve "Parseur par type d'activité"
......@@ -84,12 +84,13 @@ class Activity:
self.title = root.xpath("/html/head/title")[0].text
# => Comments
zi = root.get_element_by_id("zoneInvisible")
# Regex to separate non-digits and digits
for cmt_div in zi:
comment = Comment(cmt_div.get("id") or "")
comment.text = cmt_div.text_content()
comment.html = to_html(cmt_div)
# Split id in two parts (non-digits and digits), then match on these parts
m = regex_comment.match(comment.id)
m = re.match(r"(\D*)(\d*)", comment.id)
if m is not None:
match m.groups():
case ["divCmt", num]:
......@@ -268,6 +269,8 @@ class ExerciceQM(Exercice):
class ExerciceTAT(Exercice):
segments: list[str | Gap] = field(default_factory=list)
"""The segments (text or gap) that make up the exercise text, in order"""
gaps: dict[str, Gap] = field(default_factory=dict)
"""Only the gaps, keyed by ID, useful during parsing"""
@override
def parse_html(self, root: HtmlElement):
......@@ -287,7 +290,7 @@ class ExerciceTAT(Exercice):
self.segments.append(text_segment_buf)
# Add the gap
gap_id = elem.attrib["id"].replace("champTrou", "")
self.segments.append(Gap(gap_id))
self.segments.append(self.get_or_create_gap(gap_id))
# New text segment starts with the tail text of this element
text_segment_buf = elem.tail or ""
else:
......@@ -303,6 +306,13 @@ class ExerciceTAT(Exercice):
)
pass
def get_or_create_gap(self, gap_id: str) -> Gap:
"""Find a gap by ID, creating it if needed"""
if gap_id not in self.gaps:
self.gaps[gap_id] = Gap(gap_id)
return self.gaps[gap_id]
@dataclass
class ExerciceGD(Exercice):
targets: list[str] = field(default_factory=list)
......@@ -335,9 +345,14 @@ class RegexParser(JSParser):
body = func_split[1]
activity, _ = self._parse_activity_constructor(body)
if isinstance(activity, ExerciceQC):
match activity:
case ExerciceQC():
# Parse correct answers
self._parse_qc_answers(body, activity)
case ExerciceTAT():
self._parse_tat_choices(body, activity)
case _:
pass
return activity
......@@ -448,6 +463,42 @@ class RegexParser(JSParser):
except ValueError as e:
raise exception from e
def _parse_tat_choices(self, code: str, exo: ExerciceTAT) -> None:
choices_regex = re.compile(
r"""
exo\.ajouterReponse\(
'(?P<choice_id>\w+)'
,\s'(?P<gap_id>\d+)'
,\s'(?P<correct_code>\d+)'
,\s\"(?P<text>.+)\"
\);""",
re.VERBOSE,
)
choices = list(choices_regex.finditer(code))
# Correctness obfuscation
# Each choice is correct if correct_code == 2*gap_num + (nb_gaps + score) % 2
# (see the wiki for more info)
nb_gaps = max(
[int(match.group("gap_id")) for match in choices_regex.finditer(code)],
default=0,
)
score = self._parse_score(code)
correction_offset = (nb_gaps + score) % 2
# Process matches
for match in choices:
gap = exo.get_or_create_gap(match.group("gap_id"))
choice = Choice(match.group("choice_id"))
correct_code = int(match.group("correct_code"))
choice.is_correct = (2 * int(gap.id) + correction_offset) == correct_code
# Decode obfuscated text
text = match.group("text")
choice.html = decode_answer_text(text)
# Add choice
gap.choices.append(choice)
pass
pass
def decode_answer_id(id: str):
"""
......@@ -476,8 +527,20 @@ def decode_answer_id(id: str):
return res
# Regex to separate non-digits and digits
regex_comment = re.compile(r"(\D*)(\d*)")
def decode_answer_text(text: str):
"""
Decode an obfuscated answer text, just like the `decodeX()` function
in `ClasseExerciceTAT.js`.
"""
# The two chars at the end move to the beginning
if len(text) > 2:
text = text[-2:] + text[0:-2]
# Then it's a simple 1-to-1 character substitution
table = str.maketrans(
"bHOi4ph5sWlr1c2nI7LBuzgaUNv0FDXtm8SodePVqRfwGKkJMxAQjTC",
"ABCDFGHJKLNOPQTUVWXabcdfghjklnopqtuvwx0124578ierRImMsSz",
)
return text.translate(table)
def parse_page(graph: Graph, filepath: str, id: str):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment