{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "0d91640d-23ea-4079-b765-2eea030926c5", "metadata": {}, "outputs": [], "source": [ "%%capture\n", "\n", "try:\n", " from tetraslab.dash import Dash\n", "except ModuleNotFoundError:\n", " print(\"ModuleNotFoundError: No module named 'tetraslab', defaulting to regular Dash.\")\n", " from dash import Dash\n", "import importlib.util\n", "import re\n", "import amrlib\n", "from amrlib.graph_processing.amr_plot import AMRPlot\n", "import uuid\n", "from IPython.display import SVG, display\n", "import os\n", "import shutil\n", "import subprocess\n", "from subprocess import Popen, PIPE, STDOUT\n", "from glob import glob\n", "import sys\n", "import os\n", "from IPython.display import HTML,IFrame\n", "import ipywidgets\n", "import dash_bootstrap_components as dbc\n", "from dash import dcc, html, Input, Output\n", "from dash.dependencies import Input, Output, State\n", "import base64\n", "import xml.etree.ElementTree as ET\n", "import configparser\n", "\n", "config = configparser.ConfigParser()\n", "config.read('config.ini')\n", "#config['LIB']['']\n", "\n", "AMR_BATCH_PATH = config['LIB']['AMR_BATCH_PATH']\n", "sys.path.insert(0, os.path.abspath(AMR_BATCH_PATH))\n", "import amrbatch\n", "\n", "TENET_PATH = config['LIB']['TENET_PATH']\n", "sys.path.insert(0, os.path.abspath(TENET_PATH))\n", "import tenet\n", "\n", "BASE_URL = 'https://mars.tetras-lab.io'\n", "DASHBOARD_NUM = '1'\n", "onto_prefix=\"ontologyTarget\"\n", "\n", "AMR_MODEL_PATH = config['LIB']['AMR_MODEL_PATH']\n", "AMRLD_PATH = config['LIB']['AMRLD_PATH']\n", "owl2vowlPath = config['LIB']['owl2vowlPath']\n", "WEBVOWL_PATH = config['LIB']['WEBVOWL_PATH']\n", "\n", "MEDIA_PATH = \"/opt/dashboards/media/\"+DASHBOARD_NUM+\"/\"\n", "MEDIA_URL = BASE_URL+\"/dashboard/\"+DASHBOARD_NUM+\"/media/\"\n", "\n", "\n", "# The following is basically `import tenet`\n", "#spec=importlib.util.spec_from_file_location(\"tenet\",TENET_PATH+'tenet/__init__.py')\n", "#tenet = importlib.util.module_from_spec(spec)\n", "#spec.loader.exec_module(tenet)" ] }, { "cell_type": "code", "execution_count": null, "id": "295e4aef-bbd8-40f0-8d84-0b8032b7b039", "metadata": {}, "outputs": [], "source": [ "#stog = amrlib.load_stog_model(model_dir=\"/opt/dashboards/TetrasMARS/corpus/cm-tool/amrModel/model_parse_xfm_bart_large-v0_1_0\") \n", " \n", "\n", "\n", "#prefixPath = uuidDirPath+\"file\"\n", "#penmanPath = prefixPath+\".amr.penman\"\n", "#svgPath = prefixPath+\".amr.svg\"\n", "#ttlFilePath = uuidDirPath+onto_prefix+\"-0/\"+onto_prefix+\"_factoid.ttl\"\n" ] }, { "cell_type": "code", "execution_count": null, "id": "a410a6b3-865d-441f-9b83-90a1badae291", "metadata": {}, "outputs": [], "source": [ "def clean_sting(string):\n", " \"\"\" Sentence cleanup as needed \"\"\"\n", " return re.sub(\"(\\.)*\\\\n\", \"\", string)\n", "\n", "def string2amr(string,stog):\n", " stog_result = stog.parse_sents([clean_sting(string)], add_metadata=True) \n", " return stog_result[0] \n", "\n", "\n", "\n", "def show_svg(path):\n", " display(SVG(filename=path))\n", " \n", "def add_id_in_penman_if_needed(penmanStr,uuidStr):\n", " if not(penmanStr.startswith('# ::id')):\n", " penmanStr = '# ::id '+uuidStr+'\\n'+penmanStr\n", " return penmanStr\n", "\n", "def owl2vowl(ttlFilePath, webvowlFileName, webvowlFilePath, uuid='', importList=[]):\n", " # Run java parser\n", " if importList == []:\n", " cmd = ['java', '-jar', owl2vowlPath,\n", " '-file', ttlFilePath] \n", " else:\n", " cmd = ['java', '-jar', owl2vowlPath,\n", " '-file', ttlFilePath,\n", " '-dependencies'] + importList \n", " with Popen(cmd, stdout=PIPE, stderr=STDOUT) as p:\n", " p.wait()\n", " p.stdout.flush()\n", " if p.returncode != 0:\n", " print(\"Error in owl2vowl: \\n\\n\"+p.stdout.read().decode())\n", " os.rename(webvowlFileName, webvowlFilePath)\n", " \n", "def localImage2htmlImg(imgPath):\n", " with open(imgPath, \"rb\") as image_file:\n", " imageStr = image_file.read()\n", " root = ET.fromstring(imageStr)\n", " widthInt = int(root.attrib['width'].replace(\"pt\",\"\"))\n", " if widthInt > 1700 :\n", " widthStr = \"100%\"\n", " else:\n", " widthStr = str(widthInt)+\"pt\"\n", " img_data = base64.b64encode(imageStr)\n", " img_data = img_data.decode()\n", " img_data = \"data:image/svg+xml;base64,{}\".format(img_data)\n", " # ...\n", " return html.Img(id=\"tag_id\", src=img_data, width=widthStr, height=\"100%\", className=\"img_class\")#, alt=\"my image\"" ] }, { "cell_type": "code", "execution_count": null, "id": "5fd9cf0c-990a-4776-b206-8cc94f87c7be", "metadata": {}, "outputs": [], "source": [ "def processStr(input):\n", " # Define usefull variable and paths based on a uuid\n", " uuidStr = str(uuid.uuid4())\n", " uuidDirPath = \"/opt/data/tmp/demo-tetras-mars/\"+uuidStr+'/'\n", " os.mkdir(uuidDirPath)\n", " fullOntoPath = uuidDirPath+'full-ontology.ttl'\n", " ontoBySentencePath = uuidDirPath+'onto-by-sentence/'\n", " webvowlFileName = fullOntoPath.split('/')[-1].replace('ttl','json')\n", " webvowlFilePath = WEBVOWL_PATH+uuidStr+'_'+webvowlFileName\n", " uuidZipPath = MEDIA_PATH+uuidStr # without the .zip extention\n", " uuidZipUrl = MEDIA_URL+uuidStr+\".zip\"\n", " \n", " # Generate an AMR graph by sentence in a subfolder (with companion files such as images of the graphs)\n", " try: \n", " amr_graph_list = amrbatch.parse_document_string_to_produce_amr_graph(\n", " input, None, amr_model_path=AMR_MODEL_PATH, output_dirpath=uuidDirPath, \n", " amrld_serialization=True)\n", " except:\n", " errorStr = \"Error generating AMR graphs. You can try a simpler input. Sorry :(\"\n", " \n", " # Construct ontologies from each AMR graph plus a \"full\" one that is the union\n", " try:\n", " factoids = tenet.create_ontology_from_amrld_dir(\n", " uuidDirPath,\n", " onto_prefix=\"http://ontologies\",\n", " out_file_path=fullOntoPath,\n", " technical_dir_path=ontoBySentencePath)\n", " owl2vowl(fullOntoPath, webvowlFileName, webvowlFilePath, uuid=uuidStr)\n", " errorStr = \"\"\n", " except:\n", " errorStr = \"Error generating ontology. You can still clic this link to download AMR graphs or see them below. Try a simpler input, sorry :(\"\n", " \n", " # Create a zip file so the user can download all generated files\n", " shutil.make_archive(uuidZipPath, 'zip', uuidDirPath)\n", " return uuidDirPath, uuidZipUrl, webvowlFilePath, errorStr" ] }, { "cell_type": "code", "execution_count": null, "id": "0cd10e8b-cf7a-4fd4-b8ac-540fcb943325", "metadata": { "tags": [] }, "outputs": [], "source": [ "##################################################################################################\n", "# THE FOLLOWING PART IS SPECIFIC TO TÉTRAS LAB\n", "#\n", "# The _get_tl_config function gets configuration parameters for your\n", "# Tétras Lab instance.\n", "# Those parameters are passed when initialising the Dash app.\n", "##################################################################################################\n", "def _get_tl_config():\n", " import socket, errno, os\n", " # Find a free port\n", " host = \"0.0.0.0\"\n", " port = 8050\n", " end = 9999\n", " found = False\n", " while not found:\n", " with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:\n", " try:\n", " s.bind((host, port))\n", " found = True\n", " except socket.error as e:\n", " if e.errno == errno.EADDRINUSE:\n", " port = port + 1\n", " if (port > end):\n", " raise \"No available APP port\"\n", " else:\n", " raise e\n", " if (os.getenv(\"HOST\", None) is not None):\n", " proto = os.getenv(\"PROTO\")\n", " actualhost = os.getenv(\"JUPYTER_HOST\", os.getenv(\"VOILA_HOST\", \"\"))\n", " localport = os.getenv(\"PORT\", 80)\n", " intermediatehost = os.getenv(\"HOST\", \"localhost\")\n", " base_path = f\"/{actualhost}/app_proxy/{port}/\"\n", " proxified= f\"{proto}://{intermediatehost}:{localport}{base_path}\"\n", " localurl = f\"http://{host}:{port}\"\n", " proxy = f\"{localurl}::{proxified}\"\n", " return ((proxified, host, port, proxy, base_path))\n", " return ((f\"http://localhost:{port}\", host, port, None, \"/\"))\n", "\n", "server_url, host, port, proxy, base_path = _get_tl_config()\n", "\n", "app = Dash(\n", " requests_pathname_prefix=base_path,\n", " external_stylesheets=[dbc.themes.BOOTSTRAP]\n", ")\n", "##################################################################################################\n", "\n", "\n", "##################################################################################################\n", "# THE FOLLOWING PART IS GENERIC (JUPYTER)-DASH CODE FROM https://dash.plotly.com/basic-callbacks\n", "#\n", "# The _get_tl_config function gets configuration parameters for your\n", "# Tétras Lab instance.\n", "# Those parameters are passed when initialising the Dash app.\n", "##################################################################################################\n", "\n", "app.layout = html.Div([\n", " html.H4(\"Enter an english text and click on the button bellow to construct an ontology.\", style={'text-align': 'center'}),\n", " html.Br(),\n", " html.P(\"It should take about 10 to 30 sec/sentence. You can then browse the results online or download them as a zip file.\", style={'text-align': 'center'}),\n", " html.P(\"The text must be a simple succession of sentences. Fancy formating is not supported at the moment. \", style={'text-align': 'center'}), \n", " html.Br(),\n", " html.Br(),\n", " html.Div(dbc.Textarea(\n", " id='textarea-state',\n", " value='The Solar System is the gravitationally bound system of the Sun. The inner system planets are terrestrial planets composed of rock and metal. The outer system planets are giant planets. The two largest planets, Jupiter and Saturn, are gas giants, being composed of hydrogen and helium.',style={'width': '100%', 'height': 130}),\n", " style={'width': '60%', 'height': 130, 'margin-left': 'auto', 'margin-right': 'auto'},\n", " ),\n", " html.Br(), html.Br(),\n", " html.Div(\n", " dbc.Button('Construct AMR graphs and ontology', id='textarea-state-button', n_clicks=0, outline=True, color=\"primary\"),\n", " className=\"text-center\"),\n", " html.Br(),\n", " dcc.Loading(html.Div(html.A(children=\"\", href='', target=\"_blank\",id=\"download-link\"), className=\"text-center\")),\n", " html.Br(),\n", " html.Br(),\n", " dcc.Loading(html.Div(id='my-output'), type='circle'),\n", "])\n", "\n", "@app.callback(\n", " #Output('textarea-state-output', 'children'),\n", " Output(component_id='my-output', component_property='children'),\n", " Output(component_id='download-link', component_property='children'),\n", " Output(component_id='download-link', component_property='href'), \n", " Input('textarea-state-button', 'n_clicks'),\n", " State('textarea-state', 'value'),\n", " prevent_initial_call=True,\n", ")\n", "def update_output(n_clicks, value):\n", " if n_clicks > 0:\n", " uuidDirPath, uuidZipUrl, webvowlFilePath, errorStr = processStr(value)\n", " if \"Error\" in errorStr:\n", " feedbackStr = errorStr\n", " else:\n", " feedbackStr = \"Download Zip File\"\n", " #show_svg(svgPath) \n", " #display(IFrame(BASE_URL+'''/webvowl/#{}\">'''.format(webvowlFilePath.replace(\"/opt/webvowl/\",\"\").replace(\".json\",\"\")),800,1200))\n", " return [dbc.Row([dbc.Col(),\n", " dbc.Col(dbc.Accordion( \n", " [dbc.AccordionItem(\n", " [\n", " localImage2htmlImg(svgPath)\n", " ],\n", " title=\"AMR Graph for sentence \"+re.sub(r'.document.*', '', svgPath.replace(uuidDirPath+\"document-\",\"\")),\n", " )\n", " for svgPath in sorted(glob(uuidDirPath+\"document-*/*.svg\"))\n", " ]+\n", " [dbc.AccordionItem(\n", " [\n", " \"You can click a class to see its instances in the right panel, they are not shown in the graph.\",\n", " html.Br(),\n", " html.A(children=\"Open ontology browser in separate tab.\", href=BASE_URL+'''/webvowl/#{}'''.format(webvowlFilePath.replace(\"/opt/webvowl/\",\"\").replace(\".json\",\"\")), target=\"_blank\",id=\"webvowl-link\"),\n", " html.Br(),\n", " html.Iframe(src=BASE_URL+'''/webvowl/#{}'''.format(webvowlFilePath.replace(\"/opt/webvowl/\",\"\").replace(\".json\",\"\")),style={\"height\": \"800px\", \"width\": \"100%\"}),\n", " ],\n", " title=\"Browse ontology\", item_id='onto'\n", " )\n", " ] , active_item='onto'), width=10), dbc.Col()\n", " ]),\n", " feedbackStr, uuidZipUrl\n", " ]\n", "\n", "#@app.callback(\n", "# Output(\"download-zip\", \"data\"),\n", "# Input(\"download-zip-button\", \"n_clicks\"),\n", "# prevent_initial_call=True,\n", "#)\n", "#def func(n_clicks):\n", "# if n_clicks > 0:\n", "# return dcc.send_file(BASE_URL+'/dashboard/17/media/9f7287d0-e7b2-4328-9137-7a7c44225b68.zip')\n", " \n", "\n", " \n", "app.run_server(mode=\"inline\",\n", " #mode=\"external\",\n", " host=host, port=port, proxy=proxy, height=2000)\n", "##################################################################################################\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.18" } }, "nbformat": 4, "nbformat_minor": 5 }