Skip to content
Snippets Groups Projects
Commit 56ae8f77 authored by Eliott Sammier's avatar Eliott Sammier
Browse files

Minor signature change for parse_html()

parent 45c689ab
No related branches found
No related tags found
No related merge requests found
...@@ -69,11 +69,10 @@ class Activity: ...@@ -69,11 +69,10 @@ class Activity:
for comment in self.comments_misc: for comment in self.comments_misc:
graph.add((self.ref, NS["commentaireInfo"], Literal(comment.html))) graph.add((self.ref, NS["commentaireInfo"], Literal(comment.html)))
def parse_html(self, tree): def parse_html(self, root: HtmlElement):
"""From a `lxml.html` parsing tree, extract all data relevant to this class. """From a `lxml.html` parsing tree, extract all data relevant to this class.
Subclasses may override this method to extract more specific data. Subclasses may override this method to extract more specific data.
""" """
root = tree.getroot()
# => Title # => Title
self.title = root.xpath("/html/head/title")[0].text self.title = root.xpath("/html/head/title")[0].text
# => Comments # => Comments
...@@ -126,17 +125,16 @@ class Activity: ...@@ -126,17 +125,16 @@ class Activity:
class Cours(Activity): class Cours(Activity):
def parse_html(self, tree): def parse_html(self, root: HtmlElement):
super().parse_html(tree) super().parse_html(root)
# => Description # => Description
cours = tree.getroot().get_element_by_id("STY_texteCours") cours = root.get_element_by_id("STY_texteCours")
self.description = to_html(cours).strip() self.description = to_html(cours).strip()
class Exercice(Activity): class Exercice(Activity):
def parse_html(self, tree): def parse_html(self, root: HtmlElement):
super().parse_html(tree) super().parse_html(root)
root = tree.getroot()
# => Description # => Description
question = root.get_element_by_id("STY_question") question = root.get_element_by_id("STY_question")
self.description = to_html(question).strip() self.description = to_html(question).strip()
...@@ -167,9 +165,8 @@ class ExerciceQC(Exercice): ...@@ -167,9 +165,8 @@ class ExerciceQC(Exercice):
def get_name(self) -> str: def get_name(self) -> str:
return "ExerciceQC_QCM" if self.is_qcm else "ExerciceQC_QCU" return "ExerciceQC_QCM" if self.is_qcm else "ExerciceQC_QCU"
def parse_html(self, tree): def parse_html(self, root: HtmlElement):
super().parse_html(tree) super().parse_html(root)
root = tree.getroot()
# Find question choices # Find question choices
for choice in root.find_class("STY_reponseQC"): for choice in root.find_class("STY_reponseQC"):
# Choices have an 'id' attribute in the form 'lienRepX' # Choices have an 'id' attribute in the form 'lienRepX'
...@@ -537,6 +534,6 @@ def parse_page(graph: Graph, filepath: str, id: str): ...@@ -537,6 +534,6 @@ def parse_page(graph: Graph, filepath: str, id: str):
activity.id = id activity.id = id
# Parse the HTML portion # Parse the HTML portion
activity.parse_html(tree) activity.parse_html(root)
# Save everything to the graph # Save everything to the graph
activity.save(graph) activity.save(graph)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment