Skip to content
Snippets Groups Projects
Commit f3ba41f7 authored by David Rouquet's avatar David Rouquet
Browse files

gère lorsque plusieurs audios sont extraits d'un unique swf

parent b6e114f5
Branches
No related tags found
1 merge request!4Main
...@@ -25,6 +25,8 @@ MACAO_ROOT = env_path_or_rel_default("MACAO_ROOT", "../../..") ...@@ -25,6 +25,8 @@ MACAO_ROOT = env_path_or_rel_default("MACAO_ROOT", "../../..")
SOURCE_DIR = env_path_or_rel_default("SOURCES_DIR", "../../../Basilisk/MACAO") SOURCE_DIR = env_path_or_rel_default("SOURCES_DIR", "../../../Basilisk/MACAO")
"""Path to the Macao source directory (i.e. the one with the manifest)""" """Path to the Macao source directory (i.e. the one with the manifest)"""
RESULT_DIR = env_path_or_rel_default("RESULTS_DIR", f"../../result/{MACAO_VERSION}") RESULT_DIR = env_path_or_rel_default("RESULTS_DIR", f"../../result/{MACAO_VERSION}")
"""Path to the Hugo static media directory"""
HUGO_MEDIA_DIR = env_path_or_rel_default("HUGO_MEDIA_DIR", f"/home/daxid/DEV/MACAO/macao-hugo/macao/static/media/")
"""Path to the directory containing various results (RDF, content, media...)""" """Path to the directory containing various results (RDF, content, media...)"""
NEW_CONTENT_ROOT = env_path_or_rel_default( NEW_CONTENT_ROOT = env_path_or_rel_default(
"NEW_CONTENT_ROOT", RESULT_DIR + "/activities" "NEW_CONTENT_ROOT", RESULT_DIR + "/activities"
......
...@@ -4,6 +4,7 @@ from lxml import html ...@@ -4,6 +4,7 @@ from lxml import html
from markitdown import MarkItDown from markitdown import MarkItDown
import tempfile import tempfile
import re import re
from glob import glob
from common import * from common import *
...@@ -36,12 +37,19 @@ def construct_while(g: Graph, query: str): ...@@ -36,12 +37,19 @@ def construct_while(g: Graph, query: str):
def prepareHTMLforMD(str): def prepareHTMLforMD(str):
#regexJS = re.compile(r'<script type="text/javascript">(.*)</script>') #regexJS = re.compile(r'<script type="text/javascript">(.*)</script>')
regexAV = re.compile(r".*?PF_clipAV\('.*?', '(.*?).swf',.*") regexAV = re.compile(r".*?PF_clipAV\('.*?', '(.*?).swf',.*")
str = regexAV.sub(r"AUDIOSTART\1@AUDIOEND", str) str = regexAV.sub(r"@AUDIOSTART\1@AUDIOEND", str)
return(str) return(str)
def postEditMD(str): def postEditMD(str):
regexAV = re.compile(r"AUDIOSTART(.*?)@AUDIOEND") regexAV = re.compile(r"@AUDIOSTART.*?@AUDIOEND")
str = regexAV.sub(r'{{< audio id="\1" src="media/\1/\1_01.mp3" class="something" >}}', str).replace(r'\_',r'_') for audioElt in regexAV.findall(str):
audioFolder = audioElt.replace("@AUDIOSTART","").replace("@AUDIOEND","").replace(r"\_","_")
audioPaths = glob(HUGO_MEDIA_DIR+"/"+audioFolder+"/*.mp3")
audioStr = ''
for audioPath in audioPaths:
audioFile = audioPath.split('/')[-1]
audioStr += r'{{< audio id="'+audioFile+r'" src="media/'+audioFolder+r'/'+audioFile+r'" >}}'
str = str.replace(audioElt, audioStr)
return(str) return(str)
def transform_html(graph: Graph): def transform_html(graph: Graph):
...@@ -80,20 +88,17 @@ def transform_html(graph: Graph): ...@@ -80,20 +88,17 @@ def transform_html(graph: Graph):
# Process all html content through Markitdown # Process all html content through Markitdown
for prop in html_properties: for prop in html_properties:
for t in graph.triples((None, NS[prop], None)) : for t in graph.triples((None, NS[prop], None)) :
desc_str = prepareHTMLforMD(t[2]) desc_str = prepareHTMLforMD(t[2])
tmp = tempfile.NamedTemporaryFile(suffix=".html") tmp = tempfile.NamedTemporaryFile(suffix=".html")
with open(tmp.name, 'w') as f: with open(tmp.name, 'w') as f:
f.write(desc_str) f.write(desc_str)
mid = MarkItDown() mid = MarkItDown()
#desc_md = postEditMD(mid.convert(tmp.name).text_content) desc_md = postEditMD(mid.convert(tmp.name).text_content)
desc_md = mid.convert(tmp.name).text_content
l = list(t) l = list(t)
l[2] = Literal(desc_md) l[2] = Literal(desc_md)
l[1] = NS[prop+'_md'] l[1] = NS[prop+'_md']
manual_edition_list = list(graph.triples((l[0], NS[prop+"_md_manual_edition"], None))) manual_edition_list = list(graph.triples((l[0], NS[prop+"_md_manual_edition"], None)))
if len(manual_edition_list)>=1: if len(manual_edition_list)>=1:
print(manual_edition_list)
t_manual_edition = manual_edition_list[0] t_manual_edition = manual_edition_list[0]
l[2] = t_manual_edition[2] l[2] = t_manual_edition[2]
graph.add(tuple(l)) graph.add(tuple(l))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment