From 5f723d6c3375d7d5525653282d59e9c44daf5dcc Mon Sep 17 00:00:00 2001
From: daxid <david.rouquet@tetras-libre.fr>
Date: Mon, 13 Jan 2025 22:45:34 +0100
Subject: [PATCH] allow TAT choice text html->md

---
 tetras_extraction/script/src/extract_page.py |  2 +-
 tetras_extraction/script/src/transform.py    | 14 +++++++++-----
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/tetras_extraction/script/src/extract_page.py b/tetras_extraction/script/src/extract_page.py
index f119ea1..23f85f1 100644
--- a/tetras_extraction/script/src/extract_page.py
+++ b/tetras_extraction/script/src/extract_page.py
@@ -335,7 +335,7 @@ class ExerciceTAT(Exercice):
             graph.add((segment_uri, RDF.type, NS["Segment"]))
             graph.add((segment_uri, NS["index"], Literal(index)))
             if isinstance(segment, str):
-                graph.add((segment_uri, NS["text"], Literal(segment)))
+                graph.add((segment_uri, NS["html"], Literal(segment)))
             else:
                 graph.add((segment_uri, RDF.type, NS["Champ"]))
                 segment.save(graph, rdf_name)
diff --git a/tetras_extraction/script/src/transform.py b/tetras_extraction/script/src/transform.py
index b3285bb..10ae8ff 100644
--- a/tetras_extraction/script/src/transform.py
+++ b/tetras_extraction/script/src/transform.py
@@ -118,11 +118,15 @@ def transform_html(graph: Graph):
     for prop in html_properties:
         for t in graph.triples((None, NS[prop], None))  :
             desc_str = prepareHTMLforMD(t[2])
-            tmp = tempfile.NamedTemporaryFile(suffix=".html")
-            with open(tmp.name, 'w') as f:
-                f.write(desc_str)
-            mid = MarkItDown()
-            desc_md = postEditMD(mid.convert(tmp.name).text_content)
+            if desc_str == '':
+                desc_md = ''
+            else:
+                tmp = tempfile.NamedTemporaryFile(suffix=".html")
+                with open(tmp.name, 'w') as f:
+                    f.write(desc_str)
+                mid = MarkItDown()
+                tmp_md = mid.convert(tmp.name).text_content
+                desc_md = postEditMD(tmp_md)
             l = list(t)
             l[2] = Literal(desc_md)
             l[1] = NS[prop+'_md']
-- 
GitLab