Skip to content
Snippets Groups Projects
Commit 4ef7294e authored by David Rouquet's avatar David Rouquet
Browse files

working with amrBatch

parent 55a431f7
No related branches found
No related tags found
No related merge requests found
%% Cell type:code id:0d91640d-23ea-4079-b765-2eea030926c5 tags: %% Cell type:code id:0d91640d-23ea-4079-b765-2eea030926c5 tags:
``` python ``` python
import importlib.util import importlib.util
import re import re
import amrlib import amrlib
from amrlib.graph_processing.amr_plot import AMRPlot from amrlib.graph_processing.amr_plot import AMRPlot
import uuid import uuid
from IPython.display import SVG, display from IPython.display import SVG, display
import os import os
import shutil import shutil
import subprocess import subprocess
from subprocess import Popen, PIPE, STDOUT from subprocess import Popen, PIPE, STDOUT
from glob import glob from glob import glob
import sys import sys
import os import os
TENET_PATH = "/opt/dashboards/TetrasMARS/tenet/"
sys.path.insert(0, os.path.abspath(TENET_PATH))
import tenet
from IPython.display import HTML,IFrame from IPython.display import HTML,IFrame
import ipywidgets import ipywidgets
import dash_bootstrap_components as dbc import dash_bootstrap_components as dbc
from dash import dcc, html, Input, Output from dash import dcc, html, Input, Output
from jupyter_dash import JupyterDash as Dash from jupyter_dash import JupyterDash as Dash
from dash.dependencies import Input, Output, State from dash.dependencies import Input, Output, State
import base64 import base64
AMR_BATCH_PATH = "/opt/dashboards/TetrasMARS/amrbatch/"
sys.path.insert(0, os.path.abspath(AMR_BATCH_PATH))
import amrbatch
TENET_PATH = "/opt/dashboards/TetrasMARS/tenet/"
sys.path.insert(0, os.path.abspath(TENET_PATH))
import tenet
AMR_MODEL_PATH="/opt/dashboards/TetrasMARS/corpus/cm-tool/amrModel/model_parse_xfm_bart_large-v0_1_0"
MEDIA_PATH = "/opt/dashboards/media/17/" MEDIA_PATH = "/opt/dashboards/media/17/"
MEDIA_URL = "https://unsel.tetras-lab.io/dashboard/17/media/" MEDIA_URL = "https://unsel.tetras-lab.io/dashboard/17/media/"
ROOT_PATH = "/opt/dashboards/TetrasMARS/tetras-mars-demo/" ROOT_PATH = "/opt/dashboards/TetrasMARS/tetras-mars-demo/"
AMRLD_PATH = "/opt/dashboards/TetrasMARS/tetras-mars-demo/lib/amrld/" AMRLD_PATH = "/opt/dashboards/TetrasMARS/tetras-mars-demo/lib/amrld/"
owl2vowlPath = '/opt/dashboards/tools/owl2vowl_0.3.7/owl2vowl.jar' owl2vowlPath = '/opt/dashboards/tools/owl2vowl_0.3.7/owl2vowl.jar'
WEBVOWL_PATH = '/opt/webvowl/' WEBVOWL_PATH = '/opt/webvowl/'
onto_prefix="ontologyTarget" onto_prefix="ontologyTarget"
# The following is basically `import tenet` # The following is basically `import tenet`
#spec=importlib.util.spec_from_file_location("tenet",TENET_PATH+'tenet/__init__.py') #spec=importlib.util.spec_from_file_location("tenet",TENET_PATH+'tenet/__init__.py')
#tenet = importlib.util.module_from_spec(spec) #tenet = importlib.util.module_from_spec(spec)
#spec.loader.exec_module(tenet) #spec.loader.exec_module(tenet)
``` ```
%% Cell type:code id:295e4aef-bbd8-40f0-8d84-0b8032b7b039 tags: %% Cell type:code id:295e4aef-bbd8-40f0-8d84-0b8032b7b039 tags:
``` python ``` python
stog = amrlib.load_stog_model(model_dir="/opt/dashboards/TetrasMARS/corpus/cm-tool/amrModel/model_parse_xfm_bart_large-v0_1_0") #stog = amrlib.load_stog_model(model_dir="/opt/dashboards/TetrasMARS/corpus/cm-tool/amrModel/model_parse_xfm_bart_large-v0_1_0")
uuidStr = str(uuid.uuid4()) #prefixPath = uuidDirPath+"file"
uuidDirPath = "/opt/data/tmp/demo-tetras-mars/"+uuidStr+'/' #penmanPath = prefixPath+".amr.penman"
os.mkdir(uuidDirPath) #svgPath = prefixPath+".amr.svg"
prefixPath = uuidDirPath+"file" #ttlFilePath = uuidDirPath+onto_prefix+"-0/"+onto_prefix+"_factoid.ttl"
penmanPath = prefixPath+".amr.penman"
svgPath = prefixPath+".amr.svg"
ttlFilePath = uuidDirPath+onto_prefix+"-0/"+onto_prefix+"_factoid.ttl"
webvowlFileName = ttlFilePath.split('/')[-1].replace('ttl','json')
webvowlFilepath = WEBVOWL_PATH+uuidStr+'_'+webvowlFileName
uuidZipPath = MEDIA_PATH+uuidStr # without the .zip extention
uuidZipUrl = MEDIA_URL+uuidStr+".zip"
``` ```
%% Cell type:code id:a410a6b3-865d-441f-9b83-90a1badae291 tags: %% Cell type:code id:a410a6b3-865d-441f-9b83-90a1badae291 tags:
``` python ``` python
def clean_sting(string): def clean_sting(string):
""" Sentence cleanup as needed """ """ Sentence cleanup as needed """
return re.sub("(\.)*\\n", "", string) return re.sub("(\.)*\\n", "", string)
def string2amr(string,stog): def string2amr(string,stog):
stog_result = stog.parse_sents([clean_sting(string)], add_metadata=True) stog_result = stog.parse_sents([clean_sting(string)], add_metadata=True)
return stog_result[0] return stog_result[0]
def show_svg(path): def show_svg(path):
display(SVG(filename=path)) display(SVG(filename=path))
def add_id_in_penman_if_needed(penmanStr,uuidStr): def add_id_in_penman_if_needed(penmanStr,uuidStr):
if not(penmanStr.startswith('# ::id')): if not(penmanStr.startswith('# ::id')):
penmanStr = '# ::id '+uuidStr+'\n'+penmanStr penmanStr = '# ::id '+uuidStr+'\n'+penmanStr
return penmanStr return penmanStr
def owl2vowl(ttlFilePath, uuid='', importList=[]): def owl2vowl(ttlFilePath, webvowlFileName, webvowlFilePath, uuid='', importList=[]):
# Run java parser # Run java parser
if importList == []: if importList == []:
cmd = ['java', '-jar', owl2vowlPath, cmd = ['java', '-jar', owl2vowlPath,
'-file', ttlFilePath] '-file', ttlFilePath]
else: else:
cmd = ['java', '-jar', owl2vowlPath, cmd = ['java', '-jar', owl2vowlPath,
'-file', ttlFilePath, '-file', ttlFilePath,
'-dependencies'] + importList '-dependencies'] + importList
with Popen(cmd, stdout=PIPE, stderr=STDOUT) as p: with Popen(cmd, stdout=PIPE, stderr=STDOUT) as p:
p.wait() p.wait()
p.stdout.flush() p.stdout.flush()
if p.returncode != 0: if p.returncode != 0:
print("Error in owl2vowl: \n\n"+p.stdout.read().decode()) print("Error in owl2vowl: \n\n"+p.stdout.read().decode())
os.rename(webvowlFileName, webvowlFilepath) os.rename(webvowlFileName, webvowlFilePath)
def localImage2htmlImg(imgPath): def localImage2htmlImg(imgPath):
with open(imgPath, "rb") as image_file: with open(imgPath, "rb") as image_file:
img_data = base64.b64encode(image_file.read()) img_data = base64.b64encode(image_file.read())
img_data = img_data.decode() img_data = img_data.decode()
img_data = "data:image/svg+xml;base64,{}".format(img_data) img_data = "data:image/svg+xml;base64,{}".format(img_data)
# ... # ...
return html.Img(id="tag_id", src=img_data, width="100%", height="100%", className="img_class")#, alt="my image" return html.Img(id="tag_id", src=img_data, width="100%", height="100%", className="img_class")#, alt="my image"
``` ```
%% Cell type:code id:5fd9cf0c-990a-4776-b206-8cc94f87c7be tags: %% Cell type:code id:5fd9cf0c-990a-4776-b206-8cc94f87c7be tags:
``` python ``` python
def processStr(input): def processStr(input):
penmanStr = string2amr(input,stog) # Define usefull variable and paths based on a uuid
format = 'svg' uuidStr = str(uuid.uuid4())
penmanStr = add_id_in_penman_if_needed(penmanStr,uuidStr) uuidDirPath = "/opt/data/tmp/demo-tetras-mars/"+uuidStr+'/'
penmanFile = open(penmanPath,"w") os.mkdir(uuidDirPath)
penmanFile.write(penmanStr) fullOntoPath = uuidDirPath+'full-ontology.ttl'
penmanFile.close() ontoBySentencePath = uuidDirPath+'onto-by-sentence/'
plot = AMRPlot(uuidDirPath+"/file.amr", format) webvowlFileName = fullOntoPath.split('/')[-1].replace('ttl','json')
plot.build_from_graph(penmanStr) webvowlFilePath = WEBVOWL_PATH+uuidStr+'_'+webvowlFileName
plot.graph.render() uuidZipPath = MEDIA_PATH+uuidStr # without the .zip extention
amrldWorkPenmanFilepath = AMRLD_PATH+"/wk/"+uuidStr+".amr.penman" uuidZipUrl = MEDIA_URL+uuidStr+".zip"
amrldWorkNtFilepath = AMRLD_PATH+"/wk/"+uuidStr+".amr.nt"
# Generate an AMR graph by sentence in a subfolder (with companion files such as images of the graphs)
amrNtPath = prefixPath+".amr.nt" amr_graph_list = amrbatch.parse_document_string_to_produce_amr_graph(
amrTtlPath = prefixPath+".amr.ttl" input, None, amr_model_path=AMR_MODEL_PATH, output_dirpath=uuidDirPath,
os.chdir(AMRLD_PATH) amrld_serialization=True)
amrld_process = ["python3", "amr_to_rdf.py", # Construct ontologies from each AMR graph plus a "full" one that is the union
"-i", penmanPath, factoids = tenet.create_ontology_from_amrld_dir(
"-o", amrTtlPath, uuidDirPath,
"-f", "ttl" ] onto_prefix="http://ontologies",
subprocess.run(amrld_process) out_file_path=fullOntoPath,
technical_dir_path=ontoBySentencePath)
# Besoin de se mettre dans le répertoire tenet jusqu'à résolution du ticket https://gitlab.tetras-libre.fr/tetras-mars/tenet/-/issues/133
os.chdir(TENET_PATH+'tenet/') # Create a zip file so the user can download all generated files
factoids = tenet.create_ontology_from_amrld_file(amrTtlPath, owl2vowl(fullOntoPath, webvowlFileName, webvowlFilePath, uuid=uuidStr)
onto_prefix=onto_prefix, # "https://tenet.tetras-libre.fr/demo/",
out_file_path=uuidDirPath+"factoid.ttl",
technical_dir_path=uuidDirPath)
webvowlFilepath = owl2vowl(ttlFilePath,uuid=uuidStr)
shutil.make_archive(uuidZipPath, 'zip', uuidDirPath) shutil.make_archive(uuidZipPath, 'zip', uuidDirPath)
return uuidDirPath, uuidZipUrl, webvowlFilePath
``` ```
%% Cell type:code id:0cd10e8b-cf7a-4fd4-b8ac-540fcb943325 tags: %% Cell type:code id:0cd10e8b-cf7a-4fd4-b8ac-540fcb943325 tags:
``` python ``` python
################################################################################################## ##################################################################################################
# THE FOLLOWING PART IS SPECIFIC TO TÉTRAS LAB # THE FOLLOWING PART IS SPECIFIC TO TÉTRAS LAB
# #
# The _get_tl_config function gets configuration parameters for your # The _get_tl_config function gets configuration parameters for your
# Tétras Lab instance. # Tétras Lab instance.
# Those parameters are passed when initialising the Dash app. # Those parameters are passed when initialising the Dash app.
################################################################################################## ##################################################################################################
def _get_tl_config(): def _get_tl_config():
import socket, errno, os import socket, errno, os
# Find a free port # Find a free port
host = "0.0.0.0" host = "0.0.0.0"
port = 8050 port = 8050
end = 9999 end = 9999
found = False found = False
while not found: while not found:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
try: try:
s.bind((host, port)) s.bind((host, port))
found = True found = True
except socket.error as e: except socket.error as e:
if e.errno == errno.EADDRINUSE: if e.errno == errno.EADDRINUSE:
port = port + 1 port = port + 1
if (port > end): if (port > end):
raise "No available APP port" raise "No available APP port"
else: else:
raise e raise e
if (os.getenv("HOST", None) is not None): if (os.getenv("HOST", None) is not None):
proto = os.getenv("PROTO") proto = os.getenv("PROTO")
actualhost = os.getenv("JUPYTER_HOST", os.getenv("VOILA_HOST", "")) actualhost = os.getenv("JUPYTER_HOST", os.getenv("VOILA_HOST", ""))
localport = os.getenv("PORT", 80) localport = os.getenv("PORT", 80)
intermediatehost = os.getenv("HOST", "localhost") intermediatehost = os.getenv("HOST", "localhost")
base_path = f"/{actualhost}/app_proxy/{port}/" base_path = f"/{actualhost}/app_proxy/{port}/"
proxified= f"{proto}://{intermediatehost}:{localport}{base_path}" proxified= f"{proto}://{intermediatehost}:{localport}{base_path}"
localurl = f"http://{host}:{port}" localurl = f"http://{host}:{port}"
proxy = f"{localurl}::{proxified}" proxy = f"{localurl}::{proxified}"
return ((proxified, host, port, proxy, base_path)) return ((proxified, host, port, proxy, base_path))
return ((f"http://localhost:{port}", host, port, None, "/")) return ((f"http://localhost:{port}", host, port, None, "/"))
server_url, host, port, proxy, base_path = _get_tl_config() server_url, host, port, proxy, base_path = _get_tl_config()
app = Dash( app = Dash(
server_url=server_url, server_url=server_url,
requests_pathname_prefix=base_path, requests_pathname_prefix=base_path,
) )
################################################################################################## ##################################################################################################
################################################################################################## ##################################################################################################
# THE FOLLOWING PART IS GENERIC (JUPYTER)-DASH CODE FROM https://dash.plotly.com/basic-callbacks # THE FOLLOWING PART IS GENERIC (JUPYTER)-DASH CODE FROM https://dash.plotly.com/basic-callbacks
# #
# The _get_tl_config function gets configuration parameters for your # The _get_tl_config function gets configuration parameters for your
# Tétras Lab instance. # Tétras Lab instance.
# Those parameters are passed when initialising the Dash app. # Those parameters are passed when initialising the Dash app.
################################################################################################## ##################################################################################################
app.layout = html.Div([ app.layout = html.Div([
"Enter an english text and click on the button bellow to construct an ontology. You can then download the results as a zip file.", "Enter an english text and click on the button bellow to construct an ontology.",
html.Br(),
" You can then browse the results online or download them as a zip file.",
html.Br(),
html.Br(), html.Br(),
"Please reload the page before entering another text.",
dcc.Textarea( dcc.Textarea(
id='textarea-state', id='textarea-state',
value='Jupiter is a gas giant.', value='Jupiter is a gas giant. Earth is a rock planet.',
style={'width': '100%', 'height': 100}, style={'width': '80%', 'height': 100},
), ),
html.Br(), html.Br(),
html.Button('Construct AMR graphs and ontology', id='textarea-state-button', n_clicks=0), html.Button('Construct AMR graphs and ontology', id='textarea-state-button', n_clicks=0),
#html.Button('Download result as zip', id='download-zip-button', n_clicks=0), html.Br(),
html.A(children="", href='', target="_blank",id="download-link"), html.A(children="", href='', target="_blank",id="download-link"),
html.Br(), html.Br(),
dcc.Loading(html.Div(id='my-output'), color='#5A8264'), html.Br(),
dcc.Loading(html.Div(id='my-output'), color='#A85431', style={"verticalAlign":"top"}),
]) ])
@app.callback( @app.callback(
#Output('textarea-state-output', 'children'), #Output('textarea-state-output', 'children'),
Output(component_id='my-output', component_property='children'), Output(component_id='my-output', component_property='children'),
Output(component_id='download-link', component_property='children'), Output(component_id='download-link', component_property='children'),
Output(component_id='download-link', component_property='href'), Output(component_id='download-link', component_property='href'),
Input('textarea-state-button', 'n_clicks'), Input('textarea-state-button', 'n_clicks'),
State('textarea-state', 'value'), State('textarea-state', 'value'),
prevent_initial_call=True, prevent_initial_call=True,
) )
def update_output(n_clicks, value): def update_output(n_clicks, value):
if n_clicks > 0: if n_clicks > 0:
processStr(value) uuidDirPath, uuidZipUrl, webvowlFilePath = processStr(value)
#show_svg(svgPath) #show_svg(svgPath)
#display(IFrame('''https://unsel.tetras-lab.io/webvowl/#{}">'''.format(webvowlFilepath.replace("/opt/webvowl/","").replace(".json","")),800,1200)) #display(IFrame('''https://unsel.tetras-lab.io/webvowl/#{}">'''.format(webvowlFilePath.replace("/opt/webvowl/","").replace(".json","")),800,1200))
return [[ return [html.Div(
"Browse ontology (You can click a class to see its instances in the right panel, they are not shown on the graph.) :", dbc.Accordion(
html.Br(), [
html.Iframe(src='''https://unsel.tetras-lab.io/webvowl/#{}'''.format(webvowlFilepath.replace("/opt/webvowl/","").replace(".json","")),style={"height": "800px", "width": "100%"}), dbc.AccordionItem(
html.Br(), [
"AMR graph :", "You can click a class to see its instances in the right panel, they are not shown on the graph.",
html.Br(), html.Br(),
localImage2htmlImg(svgPath) html.Iframe(src='''https://unsel.tetras-lab.io/webvowl/#{}'''.format(webvowlFilePath.replace("/opt/webvowl/","").replace(".json","")),style={"height": "800px", "width": "100%"}),
], ],
"Download Zip File", uuidZipUrl title="Browse ontology",
)
]
#+ [dbc.AccordionItem(
# [
# localImage2htmlImg(svgPath)
# ],
# title="AMR Graph",
# )
#
# for svgPath in glob(uuidDirPath+"document-*/*.svg")
# ]
#
# ,
)
),
"Download Zip File", uuidZipUrl
] ]
#@app.callback( #@app.callback(
# Output("download-zip", "data"), # Output("download-zip", "data"),
# Input("download-zip-button", "n_clicks"), # Input("download-zip-button", "n_clicks"),
# prevent_initial_call=True, # prevent_initial_call=True,
#) #)
#def func(n_clicks): #def func(n_clicks):
# if n_clicks > 0: # if n_clicks > 0:
# return dcc.send_file('https://unsel.tetras-lab.io/dashboard/17/media/9f7287d0-e7b2-4328-9137-7a7c44225b68.zip') # return dcc.send_file('https://unsel.tetras-lab.io/dashboard/17/media/9f7287d0-e7b2-4328-9137-7a7c44225b68.zip')
app.run_server(mode="inline", app.run_server(mode="inline",
#mode="external",
host=host, port=port, proxy=proxy, height=2000) host=host, port=port, proxy=proxy, height=2000)
################################################################################################## ##################################################################################################
``` ```
%% Cell type:code id:4cbaf48e-3467-45b0-b931-20a758b79895 tags: %% Cell type:code id:b1817beb-04b7-4e6f-9b82-675de76cf8af tags:
``` python ``` python
``` ```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment