Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
Macao Legacy
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
MACAO
Macao Legacy
Commits
2d8e8fbc
Commit
2d8e8fbc
authored
Aug 1, 2024
by
Eliott Sammier
Browse files
Options
Downloads
Patches
Plain Diff
Parse TAT text and gaps
parent
1e270a00
No related branches found
No related tags found
1 merge request
!5
Resolve "Parseur par type d'activité"
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
tetras_extraction/script/src/extract_page.py
+43
-2
43 additions, 2 deletions
tetras_extraction/script/src/extract_page.py
with
43 additions
and
2 deletions
tetras_extraction/script/src/extract_page.py
+
43
−
2
View file @
2d8e8fbc
...
...
@@ -183,6 +183,14 @@ class ChoiceGroup:
self
.
items
:
list
[
Choice
]
class
Gap
:
"""
A gap in a gap-fill text exercise
"""
def
__init__
(
self
,
id
:
str
):
self
.
id
=
id
self
.
choices
:
list
[
Choice
]
=
[]
class
ExerciceQC
(
Exercice
):
def
__init__
(
self
,
is_qcm
:
bool
=
False
)
->
None
:
super
().
__init__
()
...
...
@@ -275,8 +283,41 @@ class ExerciceQM(Exercice):
class
ExerciceTAT
(
Exercice
):
def
__init__
(
self
):
super
().
__init__
()
self
.
text
:
str
# can be HTML
self
.
gaps
:
list
[
ChoiceGroup
]
self
.
segments
:
list
[
str
|
Gap
]
=
[]
@override
def
parse_html
(
self
,
root
:
HtmlElement
):
super
().
parse_html
(
root
)
# Find the text container
try
:
container
=
root
.
find_class
(
"
STY_texteTAT
"
)[
0
]
except
IndexError
as
e
:
raise
ParseError
(
"
ExerciceTAT: text container not found
"
)
from
e
# Text buffer accumulates the text found
text_segment_buf
=
container
.
text
or
""
for
elem
in
container
:
if
elem
.
tag
==
"
select
"
and
"
STY_selectTAT
"
in
elem
.
classes
:
# It's a gap
# Time to "close" the text segment and add it
self
.
segments
.
append
(
text_segment_buf
)
# Add the gap
gap_id
=
elem
.
attrib
[
"
id
"
].
replace
(
"
champTrou
"
,
""
)
self
.
segments
.
append
(
Gap
(
gap_id
))
# New text segment starts with the tail text of this element
text_segment_buf
=
elem
.
tail
or
""
else
:
text_segment_buf
+=
to_html
(
elem
)
self
.
segments
.
append
(
text_segment_buf
)
nb_total_gaps
=
len
(
container
.
find_class
(
"
STY_selectTAT
"
))
nb_found_gaps
=
len
([
e
for
e
in
self
.
segments
if
isinstance
(
e
,
Gap
)])
if
nb_found_gaps
!=
nb_total_gaps
:
log
.
warning
(
f
"
{
self
.
id
}
: Text has
{
nb_total_gaps
}
gaps in total, but found
{
nb_found_gaps
}
gap elements, some might be missing
"
)
pass
class
ExerciceGD
(
Exercice
):
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment