diff --git a/tetras_extraction/macao_12/script/extract.py b/tetras_extraction/macao_12/script/extract.py index 69c3ff092cd7b812bc3107d6fffe33e04252d695..dacf474e2facb4871c2028b7fccaf0b9f0bf0fc8 100644 --- a/tetras_extraction/macao_12/script/extract.py +++ b/tetras_extraction/macao_12/script/extract.py @@ -40,21 +40,21 @@ def export_graph(g: Graph): print(f"Exported {len(g)} triples to {RESULT_FILE}.") -def ns_find(elem: etree.ElementBase, query: str): +def ns_find(elem: etree._Element, query: str): """Wrapper for lxml's `find()` function that automatically uses the default namespace for all unprefixed tag names. """ return elem.find(query, namespaces={"": elem.nsmap[None]}) -def ns_findall(elem: etree.ElementBase, query: str): +def ns_findall(elem: etree._Element, query: str): """Wrapper for lxml's `findall()` function that automatically uses the default namespace for all unprefixed tag names. """ return elem.findall(query, namespaces={"": elem.nsmap[None]}) -def ns_localname(elem: etree.ElementBase) -> str: +def ns_localname(elem: etree._Element) -> str: """Get an element's local name, stripping the namespace.""" return etree.QName(elem).localname @@ -119,10 +119,18 @@ def parse_manifest_rec( extract_mosetp.parse_mosetp(graph, f"{SOURCE_DIR}/sco/{id}.html", id) +import extract_page + + def main(): g = create_graph() parse_manifest(g) export_graph(g) + # extract_page.parse_page( + # g, + # f"{SOURCE_DIR}/contenu/pages/pg60.html", + # "pg60", + # ) if __name__ == "__main__":