Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
Macao Legacy
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
MACAO
Macao Legacy
Commits
229bc46a
Commit
229bc46a
authored
11 months ago
by
Eliott Sammier
Browse files
Options
Downloads
Patches
Plain Diff
Use dataclasses for readability & auto-boilerplate
parent
692947d9
No related branches found
No related tags found
1 merge request
!5
Resolve "Parseur par type d'activité"
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
tetras_extraction/script/src/extract_page.py
+71
-90
71 additions, 90 deletions
tetras_extraction/script/src/extract_page.py
with
71 additions
and
90 deletions
tetras_extraction/script/src/extract_page.py
+
71
−
90
View file @
229bc46a
import
re
import
re
from
abc
import
abstractmethod
from
abc
import
abstractmethod
from
dataclasses
import
dataclass
from
dataclasses
import
dataclass
,
field
from
typing
import
Any
from
lxml
import
html
from
lxml
import
html
from
lxml.etree
import
_Element
from
lxml.html
import
HtmlElement
from
lxml.html
import
HtmlElement
from
rdflib
import
RDF
,
Graph
,
Literal
from
rdflib
import
RDF
,
Graph
,
Literal
from
typing_extensions
import
override
from
typing_extensions
import
override
...
@@ -15,62 +13,68 @@ from common import *
...
@@ -15,62 +13,68 @@ from common import *
log
=
get_logger
(
"
extract_page
"
)
log
=
get_logger
(
"
extract_page
"
)
class
Comment
:
class
Base
:
def
__init__
(
self
,
id
:
str
=
""
):
"""
A default base class to implement convenience methods
"""
self
.
id
=
id
self
.
num
:
int
self
.
text
:
str
self
.
html
:
Any
self
.
elem
:
_Element
@override
@override
def
__repr__
(
self
):
def
__repr__
(
self
)
->
str
:
return
str
(
self
.
__dict__
)
return
str
(
type
(
self
).
__name__
)
+
str
(
self
.
__dict__
)
@dataclass
class
Comment
:
id
:
str
"""
The comment
'
s identifier, unique in its parent activity
"""
num
:
int
=
-
1
"""
The comment
'
s index in the activity, its order
"""
html
:
str
=
""
"""
The comment as raw HTML
"""
text
:
str
=
""
"""
The comment as plain text, with formatting stripped
"""
@dataclass
class
Activity
:
class
Activity
:
def
__init__
(
self
):
id
:
str
=
""
self
.
id
:
str
=
""
"""
The ID of the page this activity is in (`pg###`)
"""
"""
The ID of the page this activity is in (`pg###`)
"""
self
.
title
:
str
=
""
title
:
str
=
""
"""
Human-readable title of the activity
"""
"""
Human-readable title of the activity
"""
self
.
description
:
str
|
None
=
None
description
:
str
|
None
=
None
"""
Description of the activity
'
s body (HTML),
"""
Description of the activity
'
s body (HTML),
e.g. the instructions for an exercise activity
"""
e.g. the instructions for an exercise activity
"""
self
.
comment_consigne
:
Comment
|
None
=
None
comment_consigne
:
Comment
|
None
=
None
"""
Another form of activity description but in a comment. May or may not
"""
Another form of activity description but in a comment. May or may not
coexist with a regular description
"""
coexist with a regular description
"""
self
.
comment_success
:
Comment
|
None
=
None
comment_success
:
Comment
|
None
=
None
"""
Comment displayed on success, if applicable
"""
"""
Comment displayed on success, if applicable
"""
self
.
comments_sugg
:
dict
[
str
,
Comment
]
=
{}
comments_sugg
:
dict
[
str
,
Comment
]
=
field
(
default_factory
=
dict
)
"""
Help comments displayed on failure, if applicable (keyed by ID)
"""
"""
Help comments displayed on failure, if applicable (keyed by ID)
"""
self
.
comments_misc
:
list
[
Comment
]
=
[]
comments_misc
:
list
[
Comment
]
=
field
(
default_factory
=
list
)
"""
Any other comments, if present
"""
"""
Any other comments, if present
"""
self
.
ref
:
URIRef
def
save
(
self
,
graph
:
Graph
):
def
save
(
self
,
graph
:
Graph
):
"""
Save activity data to the graph. Subclasses may override this method
"""
Save activity data to the graph. Subclasses may override this method
to save their specific data.
"""
to save their specific data.
"""
self
.
r
ef
=
NS
[
self
.
id
]
ref
:
URIR
ef
=
NS
[
self
.
id
]
# => Type
# => Type
graph
.
add
((
self
.
ref
,
RDF
.
type
,
NS
[
self
.
get_name
()]))
graph
.
add
((
ref
,
RDF
.
type
,
NS
[
self
.
get_name
()]))
# => Title
# => Title
set_title
(
graph
,
self
.
ref
,
self
.
title
)
set_title
(
graph
,
ref
,
self
.
title
)
# => Description
# => Description
description
=
self
.
description
or
""
description
=
self
.
description
or
""
if
self
.
comment_consigne
is
not
None
:
if
self
.
comment_consigne
is
not
None
:
description
+=
self
.
comment_consigne
.
html
description
+=
self
.
comment_consigne
.
html
if
description
!=
""
:
if
description
!=
""
:
graph
.
add
((
self
.
ref
,
NS
[
"
description
"
],
Literal
(
description
)))
graph
.
add
((
ref
,
NS
[
"
description
"
],
Literal
(
description
)))
# => Comments
# => Comments
if
self
.
comment_success
is
not
None
:
if
self
.
comment_success
is
not
None
:
graph
.
add
(
graph
.
add
(
(
self
.
ref
,
NS
[
"
commentaireSucces
"
],
Literal
(
self
.
comment_success
.
html
))
(
ref
,
NS
[
"
commentaireSucces
"
],
Literal
(
self
.
comment_success
.
html
))
)
)
for
comment
in
self
.
comments_sugg
.
values
():
for
comment
in
self
.
comments_sugg
.
values
():
graph
.
add
((
self
.
ref
,
NS
[
"
commentaireSugg
"
],
Literal
(
comment
.
html
)))
graph
.
add
((
ref
,
NS
[
"
commentaireSugg
"
],
Literal
(
comment
.
html
)))
for
comment
in
self
.
comments_misc
:
for
comment
in
self
.
comments_misc
:
graph
.
add
((
self
.
ref
,
NS
[
"
commentaireInfo
"
],
Literal
(
comment
.
html
)))
graph
.
add
((
ref
,
NS
[
"
commentaireInfo
"
],
Literal
(
comment
.
html
)))
def
parse_html
(
self
,
root
:
HtmlElement
):
def
parse_html
(
self
,
root
:
HtmlElement
):
"""
From a `lxml.html` parsing tree, extract all data relevant to this class.
"""
From a `lxml.html` parsing tree, extract all data relevant to this class.
...
@@ -81,11 +85,9 @@ class Activity:
...
@@ -81,11 +85,9 @@ class Activity:
# => Comments
# => Comments
zi
=
root
.
get_element_by_id
(
"
zoneInvisible
"
)
zi
=
root
.
get_element_by_id
(
"
zoneInvisible
"
)
for
cmt_div
in
zi
:
for
cmt_div
in
zi
:
comment
=
Comment
()
comment
=
Comment
(
cmt_div
.
get
(
"
id
"
)
or
""
)
comment
.
text
=
cmt_div
.
text_content
()
comment
.
text
=
cmt_div
.
text_content
()
comment
.
html
=
to_html
(
cmt_div
)
comment
.
html
=
to_html
(
cmt_div
)
comment
.
elem
=
cmt_div
comment
.
id
=
cmt_div
.
get
(
"
id
"
)
or
""
# Split id in two parts (non-digits and digits), then match on these parts
# Split id in two parts (non-digits and digits), then match on these parts
m
=
regex_comment
.
match
(
comment
.
id
)
m
=
regex_comment
.
match
(
comment
.
id
)
if
m
is
not
None
:
if
m
is
not
None
:
...
@@ -127,10 +129,6 @@ class Activity:
...
@@ -127,10 +129,6 @@ class Activity:
case
_
:
case
_
:
raise
NameError
(
name
=
name
)
raise
NameError
(
name
=
name
)
@override
def
__repr__
(
self
):
return
self
.
get_name
()
+
str
(
self
.
__dict__
)
class
Cours
(
Activity
):
class
Cours
(
Activity
):
@override
@override
...
@@ -154,48 +152,35 @@ class Exercice(Activity):
...
@@ -154,48 +152,35 @@ class Exercice(Activity):
class
Choice
:
class
Choice
:
"""
A possible answer for a question, correct or not
"""
"""
A possible answer for a question, correct or not
"""
def
__init__
(
id
:
str
=
""
self
,
id
:
str
=
""
,
index
:
int
=
-
1
,
is_correct
:
bool
=
False
,
html
:
str
=
""
,
comment
:
Comment
|
None
=
None
,
):
self
.
id
=
id
"""
A string identifier for the choice
"""
"""
A string identifier for the choice
"""
self
.
index
=
index
index
:
int
=
-
1
"""
The order the choice appears in
"""
"""
The order the choice appears in
"""
self
.
is_correct
=
is_correct
is_correct
:
bool
=
False
self
.
html
=
html
html
:
str
=
""
self
.
comment
=
c
omment
comment
:
C
omment
|
None
=
None
"""
A `Comment` associated with this choice, displayed when the exercise
"""
A `Comment` associated with this choice, displayed when the exercise
is incorrect and this choice is selected
"""
is incorrect and this choice is selected
"""
@override
def
__str__
(
self
)
->
str
:
return
f
"
Choice(id=
'
{
self
.
id
}
'
, index=
{
self
.
index
}
, is_correct=
{
self
.
is_correct
}
, html=
'
{
self
.
html
[
0
::
10
]
}
'
)
"
@dataclass
class
ChoiceGroup
:
class
ChoiceGroup
:
def
__init__
(
self
):
label
:
str
self
.
label
:
str
items
:
list
[
Choice
]
=
field
(
default_factory
=
list
)
self
.
items
:
list
[
Choice
]
@dataclass
class
Gap
:
class
Gap
:
"""
A gap in a gap-fill text exercise
"""
"""
A gap in a gap-fill text exercise
"""
def
__init__
(
self
,
id
:
str
):
id
:
str
self
.
id
=
id
choices
:
list
[
Choice
]
=
field
(
default_factory
=
list
)
self
.
choices
:
list
[
Choice
]
=
[]
@dataclass
class
ExerciceQC
(
Exercice
):
class
ExerciceQC
(
Exercice
):
def
__init__
(
self
,
is_qcm
:
bool
=
False
)
->
None
:
is_qcm
:
bool
=
False
super
().
__init__
()
choices
:
dict
[
str
,
Choice
]
=
field
(
default_factory
=
dict
)
self
.
is_qcm
=
is_qcm
self
.
choices
:
dict
[
str
,
Choice
]
=
{}
@override
@override
def
get_name
(
self
)
->
str
:
def
get_name
(
self
)
->
str
:
...
@@ -274,16 +259,15 @@ class ExerciceQC(Exercice):
...
@@ -274,16 +259,15 @@ class ExerciceQC(Exercice):
return
self
.
choices
[
id
]
return
self
.
choices
[
id
]
@dataclass
class
ExerciceQM
(
Exercice
):
class
ExerciceQM
(
Exercice
):
def
__init__
(
self
):
questions
:
list
[
ChoiceGroup
]
=
field
(
default_factory
=
list
)
super
().
__init__
()
self
.
questions
:
list
[
ChoiceGroup
]
@dataclass
class
ExerciceTAT
(
Exercice
):
class
ExerciceTAT
(
Exercice
):
def
__init__
(
self
):
segments
:
list
[
str
|
Gap
]
=
field
(
default_factory
=
list
)
super
().
__init__
()
"""
The segments (text or gap) that make up the exercise text, in order
"""
self
.
segments
:
list
[
str
|
Gap
]
=
[]
@override
@override
def
parse_html
(
self
,
root
:
HtmlElement
):
def
parse_html
(
self
,
root
:
HtmlElement
):
...
@@ -319,15 +303,13 @@ class ExerciceTAT(Exercice):
...
@@ -319,15 +303,13 @@ class ExerciceTAT(Exercice):
)
)
pass
pass
@dataclass
class
ExerciceGD
(
Exercice
):
class
ExerciceGD
(
Exercice
):
def
__init__
(
self
):
targets
:
list
[
str
]
=
field
(
default_factory
=
list
)
super
().
__init__
()
draggables
:
list
[
list
[
Choice
]]
=
field
(
default_factory
=
list
)
self
.
targets
:
list
[
str
]
self
.
draggables
:
list
[
list
[
Choice
]]
class
JSParser
:
class
JSParser
(
Base
)
:
@abstractmethod
@abstractmethod
def
parse
(
self
,
js
:
str
)
->
Activity
:
def
parse
(
self
,
js
:
str
)
->
Activity
:
"""
Parse a string of JavaScript code and returns an instance of the
"""
Parse a string of JavaScript code and returns an instance of the
...
@@ -341,8 +323,7 @@ class JSParser:
...
@@ -341,8 +323,7 @@ class JSParser:
class
RegexParser
(
JSParser
):
class
RegexParser
(
JSParser
):
def
__init__
(
self
,
graph
:
Graph
,
act_id
:
str
)
->
None
:
def
__init__
(
self
,
act_id
:
str
)
->
None
:
self
.
graph
=
graph
self
.
act_id
=
act_id
self
.
act_id
=
act_id
@override
@override
...
@@ -517,7 +498,7 @@ def parse_page(graph: Graph, filepath: str, id: str):
...
@@ -517,7 +498,7 @@ def parse_page(graph: Graph, filepath: str, id: str):
js
=
"
\n
"
.
join
((
s
.
text_content
()
for
s
in
scripts
))
js
=
"
\n
"
.
join
((
s
.
text_content
()
for
s
in
scripts
))
activity
=
Activity
()
activity
=
Activity
()
parser
=
RegexParser
(
graph
,
id
)
parser
=
RegexParser
(
id
)
try
:
try
:
activity
:
Activity
=
parser
.
parse
(
js
)
activity
:
Activity
=
parser
.
parse
(
js
)
except
ParseError
as
e
:
except
ParseError
as
e
:
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment