Minor signature change for parse_html()

56ae8f77 · Eliott Sammier · 45c689ab · 56ae8f77
Commit 56ae8f77 authored 1 year ago by Eliott Sammier
--- a/tetras_extraction/macao_12/script/extract_page.py
+++ b/tetras_extraction/macao_12/script/extract_page.py
@@ -69,11 +69,10 @@ class Activity:
        for comment in self.comments_misc:
            graph.add((self.ref, NS["commentaireInfo"], Literal(comment.html)))
-    def parse_html(self, tree):
+    def parse_html(self, root: HtmlElement):
        """From a `lxml.html` parsing tree, extract all data relevant to this class.
        Subclasses may override this method to extract more specific data.
        """
-        root = tree.getroot()
        # => Title
        self.title = root.xpath("/html/head/title")[0].text
        # => Comments
@@ -126,17 +125,16 @@ class Activity:
 class Cours(Activity):
-    def parse_html(self, tree):
+    def parse_html(self, root: HtmlElement):
-        super().parse_html(tree)
+        super().parse_html(root)
        # => Description
-        cours = tree.getroot().get_element_by_id("STY_texteCours")
+        cours = root.get_element_by_id("STY_texteCours")
        self.description = to_html(cours).strip()
 class Exercice(Activity):
-    def parse_html(self, tree):
+    def parse_html(self, root: HtmlElement):
-        super().parse_html(tree)
+        super().parse_html(root)
-        root = tree.getroot()
        # => Description
        question = root.get_element_by_id("STY_question")
        self.description = to_html(question).strip()
@@ -167,9 +165,8 @@ class ExerciceQC(Exercice):
    def get_name(self) -> str:
        return "ExerciceQC_QCM" if self.is_qcm else "ExerciceQC_QCU"
-    def parse_html(self, tree):
+    def parse_html(self, root: HtmlElement):
-        super().parse_html(tree)
+        super().parse_html(root)
-        root = tree.getroot()
        # Find question choices
        for choice in root.find_class("STY_reponseQC"):
            # Choices have an 'id' attribute in the form 'lienRepX'
@@ -537,6 +534,6 @@ def parse_page(graph: Graph, filepath: str, id: str):
    activity.id = id
    # Parse the HTML portion
-    activity.parse_html(tree)
+    activity.parse_html(root)
    # Save everything to the graph
    activity.save(graph)