diff --git a/tetras_extraction/script/src/transform.py b/tetras_extraction/script/src/transform.py index bd22da8551722020a0d42fc095d22f44287fd1a8..27f7509de5aef7ea4e3837b787ce5ae429425be2 100644 --- a/tetras_extraction/script/src/transform.py +++ b/tetras_extraction/script/src/transform.py @@ -34,10 +34,14 @@ def construct_while(g: Graph, query: str): pass def prepareHTMLforMD(str): - audio = "" #regexJS = re.compile(r'<script type="text/javascript">(.*)</script>') regexAV = re.compile(r".*?PF_clipAV\('.*?', '(.*?)',.*") - str = regexAV.sub(audio, str) + str = regexAV.sub(r"AUDIOSTART\1@AUDIOEND", str) + return(str) + +def postEditMD(str): + regexAV = re.compile(r"AUDIOSTART(.*?)@AUDIOEND") + str = regexAV.sub(r'{{< audio id="\1" src="audio/\1.0.mp3" class="something" >}}', str).replace(r'\_',r'_') return(str) def transform_html(graph: Graph): @@ -81,7 +85,7 @@ def transform_html(graph: Graph): with open(tmp.name, 'w') as f: f.write(desc_str) mid = MarkItDown() - desc_md = mid.convert(tmp.name).text_content + desc_md = postEditMD(mid.convert(tmp.name).text_content) l = list(t) l[2] = Literal(desc_md) l[1] = NS[prop+'_md']