Skip to content
Snippets Groups Projects
Commit f3ba41f7 authored by David Rouquet's avatar David Rouquet
Browse files

gère lorsque plusieurs audios sont extraits d'un unique swf

parent b6e114f5
No related branches found
No related tags found
1 merge request!4Main
......@@ -25,6 +25,8 @@ MACAO_ROOT = env_path_or_rel_default("MACAO_ROOT", "../../..")
SOURCE_DIR = env_path_or_rel_default("SOURCES_DIR", "../../../Basilisk/MACAO")
"""Path to the Macao source directory (i.e. the one with the manifest)"""
RESULT_DIR = env_path_or_rel_default("RESULTS_DIR", f"../../result/{MACAO_VERSION}")
"""Path to the Hugo static media directory"""
HUGO_MEDIA_DIR = env_path_or_rel_default("HUGO_MEDIA_DIR", f"/home/daxid/DEV/MACAO/macao-hugo/macao/static/media/")
"""Path to the directory containing various results (RDF, content, media...)"""
NEW_CONTENT_ROOT = env_path_or_rel_default(
"NEW_CONTENT_ROOT", RESULT_DIR + "/activities"
......
......@@ -4,6 +4,7 @@ from lxml import html
from markitdown import MarkItDown
import tempfile
import re
from glob import glob
from common import *
......@@ -36,12 +37,19 @@ def construct_while(g: Graph, query: str):
def prepareHTMLforMD(str):
#regexJS = re.compile(r'<script type="text/javascript">(.*)</script>')
regexAV = re.compile(r".*?PF_clipAV\('.*?', '(.*?).swf',.*")
str = regexAV.sub(r"AUDIOSTART\1@AUDIOEND", str)
str = regexAV.sub(r"@AUDIOSTART\1@AUDIOEND", str)
return(str)
def postEditMD(str):
regexAV = re.compile(r"AUDIOSTART(.*?)@AUDIOEND")
str = regexAV.sub(r'{{< audio id="\1" src="media/\1/\1_01.mp3" class="something" >}}', str).replace(r'\_',r'_')
regexAV = re.compile(r"@AUDIOSTART.*?@AUDIOEND")
for audioElt in regexAV.findall(str):
audioFolder = audioElt.replace("@AUDIOSTART","").replace("@AUDIOEND","").replace(r"\_","_")
audioPaths = glob(HUGO_MEDIA_DIR+"/"+audioFolder+"/*.mp3")
audioStr = ''
for audioPath in audioPaths:
audioFile = audioPath.split('/')[-1]
audioStr += r'{{< audio id="'+audioFile+r'" src="media/'+audioFolder+r'/'+audioFile+r'" >}}'
str = str.replace(audioElt, audioStr)
return(str)
def transform_html(graph: Graph):
......@@ -80,20 +88,17 @@ def transform_html(graph: Graph):
# Process all html content through Markitdown
for prop in html_properties:
for t in graph.triples((None, NS[prop], None)) :
desc_str = prepareHTMLforMD(t[2])
tmp = tempfile.NamedTemporaryFile(suffix=".html")
with open(tmp.name, 'w') as f:
f.write(desc_str)
mid = MarkItDown()
#desc_md = postEditMD(mid.convert(tmp.name).text_content)
desc_md = mid.convert(tmp.name).text_content
desc_md = postEditMD(mid.convert(tmp.name).text_content)
l = list(t)
l[2] = Literal(desc_md)
l[1] = NS[prop+'_md']
manual_edition_list = list(graph.triples((l[0], NS[prop+"_md_manual_edition"], None)))
if len(manual_edition_list)>=1:
print(manual_edition_list)
t_manual_edition = manual_edition_list[0]
l[2] = t_manual_edition[2]
graph.add(tuple(l))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment