From 7d1e5dace398e65cc75d61cf689067c41ec4ca08 Mon Sep 17 00:00:00 2001 From: daxid <david.rouquet@tetras-libre.fr> Date: Mon, 27 Feb 2023 14:55:15 +0000 Subject: [PATCH] First selfContainedDemo working --- selfContainedDemo.ipynb | 302 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 302 insertions(+) create mode 100644 selfContainedDemo.ipynb diff --git a/selfContainedDemo.ipynb b/selfContainedDemo.ipynb new file mode 100644 index 0000000..67cfb43 --- /dev/null +++ b/selfContainedDemo.ipynb @@ -0,0 +1,302 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "0d91640d-23ea-4079-b765-2eea030926c5", + "metadata": {}, + "outputs": [], + "source": [ + "import importlib.util\n", + "import re\n", + "import amrlib\n", + "from amrlib.graph_processing.amr_plot import AMRPlot\n", + "import uuid\n", + "from IPython.display import SVG, display\n", + "import os\n", + "import shutil\n", + "import subprocess\n", + "from subprocess import Popen, PIPE, STDOUT\n", + "from glob import glob\n", + "import sys\n", + "import os\n", + "TENET_PATH = \"/opt/dashboards/TetrasMARS/tenet/\"\n", + "sys.path.insert(0, os.path.abspath(TENET_PATH))\n", + "import tenet\n", + "from IPython.display import HTML,IFrame\n", + "import ipywidgets\n", + "import dash_bootstrap_components as dbc\n", + "from dash import dcc, html, Input, Output\n", + "from jupyter_dash import JupyterDash as Dash\n", + "from dash.dependencies import Input, Output, State\n", + "import base64\n", + "\n", + "MEDIA_PATH = \"/opt/dashboards/media/17/\"\n", + "MEDIA_URL = \"https://unsel.tetras-lab.io/dashboard/17/media/\"\n", + "ROOT_PATH = \"/opt/dashboards/TetrasMARS/tetras-mars-demo/\"\n", + "AMRLD_PATH = \"/opt/dashboards/TetrasMARS/tetras-mars-demo/lib/amrld/\"\n", + "owl2vowlPath = '/opt/dashboards/tools/owl2vowl_0.3.7/owl2vowl.jar'\n", + "WEBVOWL_PATH = '/opt/webvowl/'\n", + "onto_prefix=\"ontologyTarget\"\n", + "\n", + "# The following is basically `import tenet`\n", + "#spec=importlib.util.spec_from_file_location(\"tenet\",TENET_PATH+'tenet/__init__.py')\n", + "#tenet = importlib.util.module_from_spec(spec)\n", + "#spec.loader.exec_module(tenet)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "295e4aef-bbd8-40f0-8d84-0b8032b7b039", + "metadata": {}, + "outputs": [], + "source": [ + "stog = amrlib.load_stog_model(model_dir=\"/opt/dashboards/TetrasMARS/corpus/cm-tool/amrModel/model_parse_xfm_bart_large-v0_1_0\") \n", + " \n", + "uuidStr = str(uuid.uuid4())\n", + "uuidDirPath = \"/opt/data/tmp/demo-tetras-mars/\"+uuidStr+'/'\n", + "os.mkdir(uuidDirPath)\n", + "prefixPath = uuidDirPath+\"file\"\n", + "penmanPath = prefixPath+\".amr.penman\"\n", + "svgPath = prefixPath+\".amr.svg\"\n", + "ttlFilePath = uuidDirPath+onto_prefix+\"-0/\"+onto_prefix+\"_factoid.ttl\"\n", + "webvowlFileName = ttlFilePath.split('/')[-1].replace('ttl','json')\n", + "webvowlFilepath = WEBVOWL_PATH+uuidStr+'_'+webvowlFileName\n", + "uuidZipPath = MEDIA_PATH+uuidStr # without the .zip extention\n", + "uuidZipUrl = MEDIA_URL+uuidStr+\".zip\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a410a6b3-865d-441f-9b83-90a1badae291", + "metadata": {}, + "outputs": [], + "source": [ + "def clean_sting(string):\n", + " \"\"\" Sentence cleanup as needed \"\"\"\n", + " return re.sub(\"(\\.)*\\\\n\", \"\", string)\n", + "\n", + "def string2amr(string,stog):\n", + " stog_result = stog.parse_sents([clean_sting(string)], add_metadata=True) \n", + " return stog_result[0] \n", + "\n", + "def show_svg(path):\n", + " display(SVG(filename=path))\n", + " \n", + "def add_id_in_penman_if_needed(penmanStr,uuidStr):\n", + " if not(penmanStr.startswith('# ::id')):\n", + " penmanStr = '# ::id '+uuidStr+'\\n'+penmanStr\n", + " return penmanStr\n", + "\n", + "def owl2vowl(ttlFilePath, uuid='', importList=[]):\n", + " # Run java parser\n", + " if importList == []:\n", + " cmd = ['java', '-jar', owl2vowlPath,\n", + " '-file', ttlFilePath] \n", + " else:\n", + " cmd = ['java', '-jar', owl2vowlPath,\n", + " '-file', ttlFilePath,\n", + " '-dependencies'] + importList \n", + " with Popen(cmd, stdout=PIPE, stderr=STDOUT) as p:\n", + " p.wait()\n", + " p.stdout.flush()\n", + " if p.returncode != 0:\n", + " print(\"Error in owl2vowl: \\n\\n\"+p.stdout.read().decode())\n", + " os.rename(webvowlFileName, webvowlFilepath)\n", + " \n", + "def localImage2htmlImg(imgPath):\n", + " with open(imgPath, \"rb\") as image_file:\n", + " img_data = base64.b64encode(image_file.read())\n", + " img_data = img_data.decode()\n", + " img_data = \"data:image/svg+xml;base64,{}\".format(img_data)\n", + " # ...\n", + " return html.Img(id=\"tag_id\", src=img_data, width=\"100%\", height=\"100%\", className=\"img_class\")#, alt=\"my image\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5fd9cf0c-990a-4776-b206-8cc94f87c7be", + "metadata": {}, + "outputs": [], + "source": [ + "def processStr(input):\n", + " penmanStr = string2amr(input,stog)\n", + " format = 'svg'\n", + " penmanStr = add_id_in_penman_if_needed(penmanStr,uuidStr)\n", + " penmanFile = open(penmanPath,\"w\")\n", + " penmanFile.write(penmanStr)\n", + " penmanFile.close()\n", + " plot = AMRPlot(uuidDirPath+\"/file.amr\", format) \n", + " plot.build_from_graph(penmanStr)\n", + " plot.graph.render()\n", + " amrldWorkPenmanFilepath = AMRLD_PATH+\"/wk/\"+uuidStr+\".amr.penman\"\n", + " amrldWorkNtFilepath = AMRLD_PATH+\"/wk/\"+uuidStr+\".amr.nt\"\n", + "\n", + " amrNtPath = prefixPath+\".amr.nt\" \n", + " amrTtlPath = prefixPath+\".amr.ttl\" \n", + " os.chdir(AMRLD_PATH)\n", + "\n", + " amrld_process = [\"python3\", \"amr_to_rdf.py\", \n", + " \"-i\", penmanPath, \n", + " \"-o\", amrTtlPath,\n", + " \"-f\", \"ttl\" ]\n", + " subprocess.run(amrld_process) \n", + "\n", + " # Besoin de se mettre dans le répertoire tenet jusqu'à résolution du ticket https://gitlab.tetras-libre.fr/tetras-mars/tenet/-/issues/133\n", + " os.chdir(TENET_PATH+'tenet/')\n", + " factoids = tenet.create_ontology_from_amrld_file(amrTtlPath,\n", + " onto_prefix=onto_prefix, # \"https://tenet.tetras-libre.fr/demo/\",\n", + " out_file_path=uuidDirPath+\"factoid.ttl\",\n", + " technical_dir_path=uuidDirPath)\n", + " webvowlFilepath = owl2vowl(ttlFilePath,uuid=uuidStr)\n", + " shutil.make_archive(uuidZipPath, 'zip', uuidDirPath)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0cd10e8b-cf7a-4fd4-b8ac-540fcb943325", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "##################################################################################################\n", + "# THE FOLLOWING PART IS SPECIFIC TO TÉTRAS LAB\n", + "#\n", + "# The _get_tl_config function gets configuration parameters for your\n", + "# Tétras Lab instance.\n", + "# Those parameters are passed when initialising the Dash app.\n", + "##################################################################################################\n", + "def _get_tl_config():\n", + " import socket, errno, os\n", + " # Find a free port\n", + " host = \"0.0.0.0\"\n", + " port = 8050\n", + " end = 9999\n", + " found = False\n", + " while not found:\n", + " with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:\n", + " try:\n", + " s.bind((host, port))\n", + " found = True\n", + " except socket.error as e:\n", + " if e.errno == errno.EADDRINUSE:\n", + " port = port + 1\n", + " if (port > end):\n", + " raise \"No available APP port\"\n", + " else:\n", + " raise e\n", + " if (os.getenv(\"HOST\", None) is not None):\n", + " proto = os.getenv(\"PROTO\")\n", + " actualhost = os.getenv(\"JUPYTER_HOST\", os.getenv(\"VOILA_HOST\", \"\"))\n", + " localport = os.getenv(\"PORT\", 80)\n", + " intermediatehost = os.getenv(\"HOST\", \"localhost\")\n", + " base_path = f\"/{actualhost}/app_proxy/{port}/\"\n", + " proxified= f\"{proto}://{intermediatehost}:{localport}{base_path}\"\n", + " localurl = f\"http://{host}:{port}\"\n", + " proxy = f\"{localurl}::{proxified}\"\n", + " return ((proxified, host, port, proxy, base_path))\n", + " return ((f\"http://localhost:{port}\", host, port, None, \"/\"))\n", + "\n", + "server_url, host, port, proxy, base_path = _get_tl_config()\n", + "\n", + "app = Dash(\n", + " server_url=server_url, \n", + " requests_pathname_prefix=base_path,\n", + ")\n", + "##################################################################################################\n", + "\n", + "\n", + "##################################################################################################\n", + "# THE FOLLOWING PART IS GENERIC (JUPYTER)-DASH CODE FROM https://dash.plotly.com/basic-callbacks\n", + "#\n", + "# The _get_tl_config function gets configuration parameters for your\n", + "# Tétras Lab instance.\n", + "# Those parameters are passed when initialising the Dash app.\n", + "##################################################################################################\n", + "\n", + "app.layout = html.Div([\n", + " dcc.Textarea(\n", + " id='textarea-state',\n", + " value='Jupyter is a gas giant.',\n", + " style={'width': '100%', 'height': 200},\n", + " ),\n", + " html.Button('Construct AMR graphs and extract ontology', id='textarea-state-button', n_clicks=0),\n", + " #html.Button('Download result as zip', id='download-zip-button', n_clicks=0),\n", + " html.A(children=\"\", href='', target=\"_blank\",id=\"download-link\"),\n", + " dcc.Loading(html.Div(id='my-output'), color='#5A8264')\n", + "])\n", + "\n", + "@app.callback(\n", + " #Output('textarea-state-output', 'children'),\n", + " Output(component_id='my-output', component_property='children'),\n", + " Output(component_id='download-link', component_property='children'),\n", + " Output(component_id='download-link', component_property='href'), \n", + " Input('textarea-state-button', 'n_clicks'),\n", + " State('textarea-state', 'value'),\n", + " prevent_initial_call=True,\n", + ")\n", + "def update_output(n_clicks, value):\n", + " if n_clicks > 0:\n", + " processStr(value)\n", + " #show_svg(svgPath) \n", + " #display(IFrame('''https://unsel.tetras-lab.io/webvowl/#{}\">'''.format(webvowlFilepath.replace(\"/opt/webvowl/\",\"\").replace(\".json\",\"\")),800,1200))\n", + " return [[\n", + " html.Iframe(src='''https://unsel.tetras-lab.io/webvowl/#{}'''.format(webvowlFilepath.replace(\"/opt/webvowl/\",\"\").replace(\".json\",\"\")),\n", + " style={\"height\": \"800px\", \"width\": \"100%\"}),\n", + " localImage2htmlImg(svgPath)\n", + " ],\n", + " \"Download Zip File\", uuidZipUrl\n", + " ]\n", + "\n", + "#@app.callback(\n", + "# Output(\"download-zip\", \"data\"),\n", + "# Input(\"download-zip-button\", \"n_clicks\"),\n", + "# prevent_initial_call=True,\n", + "#)\n", + "#def func(n_clicks):\n", + "# if n_clicks > 0:\n", + "# return dcc.send_file('https://unsel.tetras-lab.io/dashboard/17/media/9f7287d0-e7b2-4328-9137-7a7c44225b68.zip')\n", + " \n", + "\n", + " \n", + "app.run_server(mode=\"inline\", \n", + " host=host, port=port, proxy=proxy, height=2000)\n", + "##################################################################################################\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4cbaf48e-3467-45b0-b931-20a758b79895", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} -- GitLab