Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
Macao Legacy
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
MACAO
Macao Legacy
Commits
90495a98
Commit
90495a98
authored
Aug 2, 2024
by
Eliott Sammier
Browse files
Options
Downloads
Patches
Plain Diff
Parse & decode TAT choices for macao3
parent
229bc46a
No related branches found
No related tags found
1 merge request
!5
Resolve "Parseur par type d'activité"
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
tetras_extraction/script/src/extract_page.py
+70
-7
70 additions, 7 deletions
tetras_extraction/script/src/extract_page.py
with
70 additions
and
7 deletions
tetras_extraction/script/src/extract_page.py
+
70
−
7
View file @
90495a98
...
...
@@ -84,12 +84,13 @@ class Activity:
self
.
title
=
root
.
xpath
(
"
/html/head/title
"
)[
0
].
text
# => Comments
zi
=
root
.
get_element_by_id
(
"
zoneInvisible
"
)
# Regex to separate non-digits and digits
for
cmt_div
in
zi
:
comment
=
Comment
(
cmt_div
.
get
(
"
id
"
)
or
""
)
comment
.
text
=
cmt_div
.
text_content
()
comment
.
html
=
to_html
(
cmt_div
)
# Split id in two parts (non-digits and digits), then match on these parts
m
=
re
gex_comment
.
match
(
comment
.
id
)
m
=
re
.
match
(
r
"
(\D*)(\d*)
"
,
comment
.
id
)
if
m
is
not
None
:
match
m
.
groups
():
case
[
"
divCmt
"
,
num
]:
...
...
@@ -268,6 +269,8 @@ class ExerciceQM(Exercice):
class
ExerciceTAT
(
Exercice
):
segments
:
list
[
str
|
Gap
]
=
field
(
default_factory
=
list
)
"""
The segments (text or gap) that make up the exercise text, in order
"""
gaps
:
dict
[
str
,
Gap
]
=
field
(
default_factory
=
dict
)
"""
Only the gaps, keyed by ID, useful during parsing
"""
@override
def
parse_html
(
self
,
root
:
HtmlElement
):
...
...
@@ -287,7 +290,7 @@ class ExerciceTAT(Exercice):
self
.
segments
.
append
(
text_segment_buf
)
# Add the gap
gap_id
=
elem
.
attrib
[
"
id
"
].
replace
(
"
champTrou
"
,
""
)
self
.
segments
.
append
(
G
ap
(
gap_id
))
self
.
segments
.
append
(
self
.
get_or_create_g
ap
(
gap_id
))
# New text segment starts with the tail text of this element
text_segment_buf
=
elem
.
tail
or
""
else
:
...
...
@@ -303,6 +306,13 @@ class ExerciceTAT(Exercice):
)
pass
def
get_or_create_gap
(
self
,
gap_id
:
str
)
->
Gap
:
"""
Find a gap by ID, creating it if needed
"""
if
gap_id
not
in
self
.
gaps
:
self
.
gaps
[
gap_id
]
=
Gap
(
gap_id
)
return
self
.
gaps
[
gap_id
]
@dataclass
class
ExerciceGD
(
Exercice
):
targets
:
list
[
str
]
=
field
(
default_factory
=
list
)
...
...
@@ -335,9 +345,14 @@ class RegexParser(JSParser):
body
=
func_split
[
1
]
activity
,
_
=
self
.
_parse_activity_constructor
(
body
)
if
isinstance
(
activity
,
ExerciceQC
):
match
activity
:
case
ExerciceQC
():
# Parse correct answers
self
.
_parse_qc_answers
(
body
,
activity
)
case
ExerciceTAT
():
self
.
_parse_tat_choices
(
body
,
activity
)
case
_
:
pass
return
activity
...
...
@@ -448,6 +463,42 @@ class RegexParser(JSParser):
except
ValueError
as
e
:
raise
exception
from
e
def
_parse_tat_choices
(
self
,
code
:
str
,
exo
:
ExerciceTAT
)
->
None
:
choices_regex
=
re
.
compile
(
r
"""
exo\.ajouterReponse\(
'
(?P<choice_id>\w+)
'
,\s
'
(?P<gap_id>\d+)
'
,\s
'
(?P<correct_code>\d+)
'
,\s\"(?P<text>.+)\"
\);
"""
,
re
.
VERBOSE
,
)
choices
=
list
(
choices_regex
.
finditer
(
code
))
# Correctness obfuscation
# Each choice is correct if correct_code == 2*gap_num + (nb_gaps + score) % 2
# (see the wiki for more info)
nb_gaps
=
max
(
[
int
(
match
.
group
(
"
gap_id
"
))
for
match
in
choices_regex
.
finditer
(
code
)],
default
=
0
,
)
score
=
self
.
_parse_score
(
code
)
correction_offset
=
(
nb_gaps
+
score
)
%
2
# Process matches
for
match
in
choices
:
gap
=
exo
.
get_or_create_gap
(
match
.
group
(
"
gap_id
"
))
choice
=
Choice
(
match
.
group
(
"
choice_id
"
))
correct_code
=
int
(
match
.
group
(
"
correct_code
"
))
choice
.
is_correct
=
(
2
*
int
(
gap
.
id
)
+
correction_offset
)
==
correct_code
# Decode obfuscated text
text
=
match
.
group
(
"
text
"
)
choice
.
html
=
decode_answer_text
(
text
)
# Add choice
gap
.
choices
.
append
(
choice
)
pass
pass
def
decode_answer_id
(
id
:
str
):
"""
...
...
@@ -476,8 +527,20 @@ def decode_answer_id(id: str):
return
res
# Regex to separate non-digits and digits
regex_comment
=
re
.
compile
(
r
"
(\D*)(\d*)
"
)
def
decode_answer_text
(
text
:
str
):
"""
Decode an obfuscated answer text, just like the `decodeX()` function
in `ClasseExerciceTAT.js`.
"""
# The two chars at the end move to the beginning
if
len
(
text
)
>
2
:
text
=
text
[
-
2
:]
+
text
[
0
:
-
2
]
# Then it's a simple 1-to-1 character substitution
table
=
str
.
maketrans
(
"
bHOi4ph5sWlr1c2nI7LBuzgaUNv0FDXtm8SodePVqRfwGKkJMxAQjTC
"
,
"
ABCDFGHJKLNOPQTUVWXabcdfghjklnopqtuvwx0124578ierRImMsSz
"
,
)
return
text
.
translate
(
table
)
def
parse_page
(
graph
:
Graph
,
filepath
:
str
,
id
:
str
):
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment