diff --git a/tetras_extraction/script/src/transform.py b/tetras_extraction/script/src/transform.py index 10a43634dd23de4ba825534f320dead7dbc25c8e..7a51970a160430257b32ab9402ef5b746ae4f34b 100644 --- a/tetras_extraction/script/src/transform.py +++ b/tetras_extraction/script/src/transform.py @@ -35,14 +35,15 @@ def construct_while(g: Graph, query: str): pass def prepareHTMLforMD(str): - #regexJS = re.compile(r'<script type="text/javascript">(.*)</script>') - # Identify audio content and add markups to identify them in the MD regexAV = re.compile(r".*?PF_clipAV\('.*?', '(.*?).swf',.*") str = regexAV.sub(r"@AUDIOSTART\1@AUDIOEND", str) - + # Identify image content and add markups to identify them in the MD + regexIMG = re.compile(r"<img.*?src=\"\.\./media/(.*?)\".*?>") + str = regexIMG.sub(r"@IMAGESTART\1@IMAGEEND", str) return(str) + def postEditMD(str): ################################################### # Add audio players for audio extracted from SWF @@ -59,12 +60,23 @@ def postEditMD(str): str = str.replace(audioElt, audioStr) ################################################### + ################################################### + # Add html code to MD for images + ################################################### + regexIMG = re.compile(r"@IMAGESTART.*?@IMAGEEND") + for imageElt in regexIMG.findall(str): + imgFilename = imageElt.replace("@IMAGESTART","").replace("@IMAGEEND","").replace(r"\_","_") + imgHtml = '<img src="/macao-hugo/media/'+imgFilename+'" id="'+imgFilename+'"\>' + str = str.replace(imageElt, imgHtml) + ################################################### + # Add html for images and fix media paths regexIMG = re.compile(r"!\[\]\(\.\./media/(.*?)\)") str = regexIMG.sub(r"<img src='/macao-hugo/media/\1'>", str) return(str) + def transform_html(graph: Graph): html_properties = ['commentaireInfo', 'commentaireSucces', 'commentaireSugg', 'html', 'description']