diff --git a/tetras_extraction/script/src/extract_page.py b/tetras_extraction/script/src/extract_page.py index f119ea1de4283ded9425ea0fa1338613a8a302b1..23f85f1db03ac37d1adba2c7c4b02146b31c553f 100644 --- a/tetras_extraction/script/src/extract_page.py +++ b/tetras_extraction/script/src/extract_page.py @@ -335,7 +335,7 @@ class ExerciceTAT(Exercice): graph.add((segment_uri, RDF.type, NS["Segment"])) graph.add((segment_uri, NS["index"], Literal(index))) if isinstance(segment, str): - graph.add((segment_uri, NS["text"], Literal(segment))) + graph.add((segment_uri, NS["html"], Literal(segment))) else: graph.add((segment_uri, RDF.type, NS["Champ"])) segment.save(graph, rdf_name) diff --git a/tetras_extraction/script/src/transform.py b/tetras_extraction/script/src/transform.py index b3285bb9412d132c6bd2dc0d01c0fb8dfe270fb0..10ae8ffb6802d3d7cf36da49cf4d5a0ec45ff391 100644 --- a/tetras_extraction/script/src/transform.py +++ b/tetras_extraction/script/src/transform.py @@ -118,11 +118,15 @@ def transform_html(graph: Graph): for prop in html_properties: for t in graph.triples((None, NS[prop], None)) : desc_str = prepareHTMLforMD(t[2]) - tmp = tempfile.NamedTemporaryFile(suffix=".html") - with open(tmp.name, 'w') as f: - f.write(desc_str) - mid = MarkItDown() - desc_md = postEditMD(mid.convert(tmp.name).text_content) + if desc_str == '': + desc_md = '' + else: + tmp = tempfile.NamedTemporaryFile(suffix=".html") + with open(tmp.name, 'w') as f: + f.write(desc_str) + mid = MarkItDown() + tmp_md = mid.convert(tmp.name).text_content + desc_md = postEditMD(tmp_md) l = list(t) l[2] = Literal(desc_md) l[1] = NS[prop+'_md']