From 73b9c55a72c586e0a6d22bd5bf9912aedce02b4e Mon Sep 17 00:00:00 2001
From: daxid <david.rouquet@tetras-libre.fr>
Date: Mon, 14 Apr 2025 15:16:42 +0200
Subject: [PATCH] fix infoboxes by putting html instead of md

---
 tetras_extraction/script/src/transform.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tetras_extraction/script/src/transform.py b/tetras_extraction/script/src/transform.py
index 9c87403..92a774e 100644
--- a/tetras_extraction/script/src/transform.py
+++ b/tetras_extraction/script/src/transform.py
@@ -2,6 +2,7 @@ from rdflib import OWL, RDF, Graph, Literal
 from lxml import html
 #import pandoc
 from markitdown import MarkItDown
+from markdown import markdown
 import tempfile
 import re
 from glob import glob
@@ -48,6 +49,7 @@ def markFileDown(filepath):
 
 
 def prepareHTMLforMD(str):
+    str = str.replace("\r"," ")
     # Identify audio content and add markups to identify them in the MD
     regexAV = re.compile(r".*?PF_clipAV\('.*?', '(.*?).swf',.*")
     str = regexAV.sub(r"@AUDIOSTART\1@AUDIOEND", str)
@@ -106,8 +108,8 @@ def postEditMD(str1):
         try:
             file = open(filepath, 'r', encoding='utf-8')
             file.close()
-            md = markFileDown(filepath)
-            commentaireInfo_md = '<div class="commentaireInfo" commentaireId="'+str(i)+'">'+md+'</div>'
+            simpleHtml = markdown(markFileDown(filepath))
+            commentaireInfo_md = '<div class="commentaireInfo" commentaireId="'+str(i)+'">'+simpleHtml+'</div>'
             str1 = str1.replace(match,str(i))
         except FileNotFoundError:
             pass
-- 
GitLab