Skip to content
Snippets Groups Projects
Commit 229bc46a authored by Eliott Sammier's avatar Eliott Sammier
Browse files

Use dataclasses for readability & auto-boilerplate

parent 692947d9
No related branches found
No related tags found
1 merge request!5Resolve "Parseur par type d'activité"
import re import re
from abc import abstractmethod from abc import abstractmethod
from dataclasses import dataclass from dataclasses import dataclass, field
from typing import Any
from lxml import html from lxml import html
from lxml.etree import _Element
from lxml.html import HtmlElement from lxml.html import HtmlElement
from rdflib import RDF, Graph, Literal from rdflib import RDF, Graph, Literal
from typing_extensions import override from typing_extensions import override
...@@ -15,62 +13,68 @@ from common import * ...@@ -15,62 +13,68 @@ from common import *
log = get_logger("extract_page") log = get_logger("extract_page")
class Comment: class Base:
def __init__(self, id: str = ""): """A default base class to implement convenience methods"""
self.id = id
self.num: int
self.text: str
self.html: Any
self.elem: _Element
@override @override
def __repr__(self): def __repr__(self) -> str:
return str(self.__dict__) return str(type(self).__name__) + str(self.__dict__)
@dataclass
class Comment:
id: str
"""The comment's identifier, unique in its parent activity"""
num: int = -1
"""The comment's index in the activity, its order"""
html: str = ""
"""The comment as raw HTML"""
text: str = ""
"""The comment as plain text, with formatting stripped """
@dataclass
class Activity: class Activity:
def __init__(self): id: str = ""
self.id: str = ""
"""The ID of the page this activity is in (`pg###`)""" """The ID of the page this activity is in (`pg###`)"""
self.title: str = "" title: str = ""
"""Human-readable title of the activity""" """Human-readable title of the activity"""
self.description: str | None = None description: str | None = None
"""Description of the activity's body (HTML), """Description of the activity's body (HTML),
e.g. the instructions for an exercise activity""" e.g. the instructions for an exercise activity"""
self.comment_consigne: Comment | None = None comment_consigne: Comment | None = None
"""Another form of activity description but in a comment. May or may not """Another form of activity description but in a comment. May or may not
coexist with a regular description""" coexist with a regular description"""
self.comment_success: Comment | None = None comment_success: Comment | None = None
"""Comment displayed on success, if applicable""" """Comment displayed on success, if applicable"""
self.comments_sugg: dict[str, Comment] = {} comments_sugg: dict[str, Comment] = field(default_factory=dict)
"""Help comments displayed on failure, if applicable (keyed by ID)""" """Help comments displayed on failure, if applicable (keyed by ID)"""
self.comments_misc: list[Comment] = [] comments_misc: list[Comment] = field(default_factory=list)
"""Any other comments, if present""" """Any other comments, if present"""
self.ref: URIRef
def save(self, graph: Graph): def save(self, graph: Graph):
"""Save activity data to the graph. Subclasses may override this method """Save activity data to the graph. Subclasses may override this method
to save their specific data.""" to save their specific data."""
self.ref = NS[self.id] ref: URIRef = NS[self.id]
# => Type # => Type
graph.add((self.ref, RDF.type, NS[self.get_name()])) graph.add((ref, RDF.type, NS[self.get_name()]))
# => Title # => Title
set_title(graph, self.ref, self.title) set_title(graph, ref, self.title)
# => Description # => Description
description = self.description or "" description = self.description or ""
if self.comment_consigne is not None: if self.comment_consigne is not None:
description += self.comment_consigne.html description += self.comment_consigne.html
if description != "": if description != "":
graph.add((self.ref, NS["description"], Literal(description))) graph.add((ref, NS["description"], Literal(description)))
# => Comments # => Comments
if self.comment_success is not None: if self.comment_success is not None:
graph.add( graph.add(
(self.ref, NS["commentaireSucces"], Literal(self.comment_success.html)) (ref, NS["commentaireSucces"], Literal(self.comment_success.html))
) )
for comment in self.comments_sugg.values(): for comment in self.comments_sugg.values():
graph.add((self.ref, NS["commentaireSugg"], Literal(comment.html))) graph.add((ref, NS["commentaireSugg"], Literal(comment.html)))
for comment in self.comments_misc: for comment in self.comments_misc:
graph.add((self.ref, NS["commentaireInfo"], Literal(comment.html))) graph.add((ref, NS["commentaireInfo"], Literal(comment.html)))
def parse_html(self, root: HtmlElement): def parse_html(self, root: HtmlElement):
"""From a `lxml.html` parsing tree, extract all data relevant to this class. """From a `lxml.html` parsing tree, extract all data relevant to this class.
...@@ -81,11 +85,9 @@ class Activity: ...@@ -81,11 +85,9 @@ class Activity:
# => Comments # => Comments
zi = root.get_element_by_id("zoneInvisible") zi = root.get_element_by_id("zoneInvisible")
for cmt_div in zi: for cmt_div in zi:
comment = Comment() comment = Comment(cmt_div.get("id") or "")
comment.text = cmt_div.text_content() comment.text = cmt_div.text_content()
comment.html = to_html(cmt_div) comment.html = to_html(cmt_div)
comment.elem = cmt_div
comment.id = cmt_div.get("id") or ""
# Split id in two parts (non-digits and digits), then match on these parts # Split id in two parts (non-digits and digits), then match on these parts
m = regex_comment.match(comment.id) m = regex_comment.match(comment.id)
if m is not None: if m is not None:
...@@ -127,10 +129,6 @@ class Activity: ...@@ -127,10 +129,6 @@ class Activity:
case _: case _:
raise NameError(name=name) raise NameError(name=name)
@override
def __repr__(self):
return self.get_name() + str(self.__dict__)
class Cours(Activity): class Cours(Activity):
@override @override
...@@ -154,48 +152,35 @@ class Exercice(Activity): ...@@ -154,48 +152,35 @@ class Exercice(Activity):
class Choice: class Choice:
"""A possible answer for a question, correct or not""" """A possible answer for a question, correct or not"""
def __init__( id: str = ""
self,
id: str = "",
index: int = -1,
is_correct: bool = False,
html: str = "",
comment: Comment | None = None,
):
self.id = id
"""A string identifier for the choice""" """A string identifier for the choice"""
self.index = index index: int = -1
"""The order the choice appears in""" """The order the choice appears in"""
self.is_correct = is_correct is_correct: bool = False
self.html = html html: str = ""
self.comment = comment comment: Comment | None = None
"""A `Comment` associated with this choice, displayed when the exercise """A `Comment` associated with this choice, displayed when the exercise
is incorrect and this choice is selected""" is incorrect and this choice is selected"""
@override
def __str__(self) -> str:
return f"Choice(id='{self.id}', index={self.index}, is_correct={self.is_correct}, html='{self.html[0::10]}')"
@dataclass
class ChoiceGroup: class ChoiceGroup:
def __init__(self): label: str
self.label: str items: list[Choice] = field(default_factory=list)
self.items: list[Choice]
@dataclass
class Gap: class Gap:
"""A gap in a gap-fill text exercise""" """A gap in a gap-fill text exercise"""
def __init__(self, id: str): id: str
self.id = id choices: list[Choice] = field(default_factory=list)
self.choices: list[Choice] = []
@dataclass
class ExerciceQC(Exercice): class ExerciceQC(Exercice):
def __init__(self, is_qcm: bool = False) -> None: is_qcm: bool = False
super().__init__() choices: dict[str, Choice] = field(default_factory=dict)
self.is_qcm = is_qcm
self.choices: dict[str, Choice] = {}
@override @override
def get_name(self) -> str: def get_name(self) -> str:
...@@ -274,16 +259,15 @@ class ExerciceQC(Exercice): ...@@ -274,16 +259,15 @@ class ExerciceQC(Exercice):
return self.choices[id] return self.choices[id]
@dataclass
class ExerciceQM(Exercice): class ExerciceQM(Exercice):
def __init__(self): questions: list[ChoiceGroup] = field(default_factory=list)
super().__init__()
self.questions: list[ChoiceGroup]
@dataclass
class ExerciceTAT(Exercice): class ExerciceTAT(Exercice):
def __init__(self): segments: list[str | Gap] = field(default_factory=list)
super().__init__() """The segments (text or gap) that make up the exercise text, in order"""
self.segments: list[str | Gap] = []
@override @override
def parse_html(self, root: HtmlElement): def parse_html(self, root: HtmlElement):
...@@ -319,15 +303,13 @@ class ExerciceTAT(Exercice): ...@@ -319,15 +303,13 @@ class ExerciceTAT(Exercice):
) )
pass pass
@dataclass
class ExerciceGD(Exercice): class ExerciceGD(Exercice):
def __init__(self): targets: list[str] = field(default_factory=list)
super().__init__() draggables: list[list[Choice]] = field(default_factory=list)
self.targets: list[str]
self.draggables: list[list[Choice]]
class JSParser: class JSParser(Base):
@abstractmethod @abstractmethod
def parse(self, js: str) -> Activity: def parse(self, js: str) -> Activity:
"""Parse a string of JavaScript code and returns an instance of the """Parse a string of JavaScript code and returns an instance of the
...@@ -341,8 +323,7 @@ class JSParser: ...@@ -341,8 +323,7 @@ class JSParser:
class RegexParser(JSParser): class RegexParser(JSParser):
def __init__(self, graph: Graph, act_id: str) -> None: def __init__(self, act_id: str) -> None:
self.graph = graph
self.act_id = act_id self.act_id = act_id
@override @override
...@@ -517,7 +498,7 @@ def parse_page(graph: Graph, filepath: str, id: str): ...@@ -517,7 +498,7 @@ def parse_page(graph: Graph, filepath: str, id: str):
js = "\n".join((s.text_content() for s in scripts)) js = "\n".join((s.text_content() for s in scripts))
activity = Activity() activity = Activity()
parser = RegexParser(graph, id) parser = RegexParser(id)
try: try:
activity: Activity = parser.parse(js) activity: Activity = parser.parse(js)
except ParseError as e: except ParseError as e:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment