From 73b9c55a72c586e0a6d22bd5bf9912aedce02b4e Mon Sep 17 00:00:00 2001 From: daxid <david.rouquet@tetras-libre.fr> Date: Mon, 14 Apr 2025 15:16:42 +0200 Subject: [PATCH] fix infoboxes by putting html instead of md --- tetras_extraction/script/src/transform.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tetras_extraction/script/src/transform.py b/tetras_extraction/script/src/transform.py index 9c87403..92a774e 100644 --- a/tetras_extraction/script/src/transform.py +++ b/tetras_extraction/script/src/transform.py @@ -2,6 +2,7 @@ from rdflib import OWL, RDF, Graph, Literal from lxml import html #import pandoc from markitdown import MarkItDown +from markdown import markdown import tempfile import re from glob import glob @@ -48,6 +49,7 @@ def markFileDown(filepath): def prepareHTMLforMD(str): + str = str.replace("\r"," ") # Identify audio content and add markups to identify them in the MD regexAV = re.compile(r".*?PF_clipAV\('.*?', '(.*?).swf',.*") str = regexAV.sub(r"@AUDIOSTART\1@AUDIOEND", str) @@ -106,8 +108,8 @@ def postEditMD(str1): try: file = open(filepath, 'r', encoding='utf-8') file.close() - md = markFileDown(filepath) - commentaireInfo_md = '<div class="commentaireInfo" commentaireId="'+str(i)+'">'+md+'</div>' + simpleHtml = markdown(markFileDown(filepath)) + commentaireInfo_md = '<div class="commentaireInfo" commentaireId="'+str(i)+'">'+simpleHtml+'</div>' str1 = str1.replace(match,str(i)) except FileNotFoundError: pass -- GitLab