diff --git a/scripts/unlizeToNotebook.py b/scripts/unlizeToNotebook.py index 16b0f2fd64a74a73377471b49dd27fb6e6d9c868..a14eb98e30848def4106eec945ba48a4f5e011b1 100755 --- a/scripts/unlizeToNotebook.py +++ b/scripts/unlizeToNotebook.py @@ -2,11 +2,11 @@ import nbformat as nbf from nbformat.v4 import new_code_cell +import nbconvert as nbc import click from lxml import etree, objectify from unlizeXml import remove_namespace, unlize, nestedBody2Str - @click.command() @click.argument('input', nargs=1, type=click.Path(dir_okay=False, exists=True)) @@ -20,7 +20,6 @@ from unlizeXml import remove_namespace, unlize, nestedBody2Str help='if true do not send request to unl.ru') def unlizeXmlNb(input, output, template, lang, dry_run): nb = nbf.read(template, 4) - parser = etree.XMLParser(remove_comments=True) doc = objectify.parse(input, parser=parser) remove_namespace(doc) @@ -33,27 +32,21 @@ def unlizeXmlNb(input, output, template, lang, dry_run): addCell(nb, unl) else: addCell(nb, getText(node, 'unl')) - with open(output, 'w') as f: nbf.write(nb, f) - def getText(node, tag): try: return node.xpath(tag)[0].text except IndexError: return '' - def addCell(nb, unl): - code = """ -unldata = \"\"\" + code = """unldata = \"\"\" {unl} \"\"\" -displayUnl(unldata) -""".format(xml=xml, unl=unl) +displayUnl(unldata)""".format(unl=unl.strip()) nb['cells'].append(new_code_cell(code)) - if __name__ == '__main__': unlizeXmlNb() diff --git a/scripts/unlizeXmlNbSample.ipynb b/scripts/unlizeXmlNbSample.ipynb index 6f23f2cb2bda9b79a904c156344270fb9ab31e4a..7eb293f008be3f57c196020bd3cfbe630b822efb 100644 --- a/scripts/unlizeXmlNbSample.ipynb +++ b/scripts/unlizeXmlNbSample.ipynb @@ -9,6 +9,7 @@ "import tempfile\n", "import os\n", "import re\n", + "import requests\n", "from subprocess import Popen, PIPE, STDOUT\n", "from IPython.core.display import SVG" ] @@ -23,18 +24,15 @@ " with tempfile.NamedTemporaryFile() as temp:\n", " out_name = os.path.basename(temp.name)\n", " out_dir = os.path.dirname(temp.name)\n", - "\n", " with tempfile.NamedTemporaryFile(mode=\"w\") as in_file:\n", " # Remove CRLF and flush output to avoid java errors\n", " in_file.write(text.replace(\"\\r\\n\", \"\\n\"))\n", " in_file.flush()\n", - "\n", " # Run java parser\n", " cmd = ['java', '-jar', path,\n", " '--input-file', in_file.name,\n", " '--output-Dir', out_dir, '--output-file', out_name,\n", " '--output-type', 'dot']\n", - "\n", " with Popen(cmd, stdout=PIPE, stderr=STDOUT) as p:\n", " p.wait()\n", " p.stdout.flush()\n", @@ -42,7 +40,6 @@ " print(\"Error in unl2rdf: \\n\\n\"+p.stdout.read().decode())\n", " print('UNL;')\n", " print(text)\n", - "\n", " # generate dot output\n", " fname = '{}/{}.dot'.format(out_dir, out_name)\n", " cmd = ['dot', '-Tsvg', fname]\n", @@ -64,19 +61,36 @@ " return svg\n", " return \"\"\n", "\n", - "\n", + "def unl2dotWeb(unldata) :\n", + " data={'unl': unldata, 'outputs':['dot', 'svg']}\n", + " try:\n", + " r = requests.post('https://unl.demo.tetras-libre.fr/unl2rdf', data=data)\n", + " except Exception as e:\n", + " return 'Error calling https://unl.demo.tetras-libre.fr/unl2rdf : \"{error}\"'.format(error=e)\n", + " html=r.text\n", + " # On utilise une regex au lieu de parser le html car ce dernier est mal formé\n", + " regex = re.compile('<svg.*svg>',re.MULTILINE|re.DOTALL)\n", + " svg = regex.search(html).group()\n", + " return(svg)\n", + " \n", "def displayUnl(unldata) :\n", "# We generate protoSVG because whent there are several sentences, \n", "# a string composed of several concatenated SVG is produced (not a valid SVG).\n", "# We must then split the string to obtain several valid SVG to display.\n", - " protoSvg = unl2dot(unldata, \"unl2rdf-app-1.0-SNAPSHOT-jar-with-dependencies.jar\")\n", - " sep = \"</svg>\\n\"\n", - " svgArray = [x+sep for x in protoSvg.split(sep)]\n", - " svgArray.pop()\n", - " for svg in svgArray :\n", - " text = re.search('\\{org.*\\n(.*)\\n.*org\\}',unldata).group(1)\n", - " print(text)\n", - " display(SVG(svg))" + " sep = \"[/S]\\n\"\n", + " unldataArray = [x+sep for x in unldata.split(sep)]\n", + " unldataArray.pop()\n", + " for unl in unldataArray :\n", + " regex = re.compile('\\{org:..\\}\\n(.*)\\n{\\/org\\}',re.MULTILINE|re.DOTALL)\n", + " text = regex.search(unl).group(1)\n", + " print(\"\\n\"+text+\"\\n\")\n", + " # Keep one of the two lines below depending if you want to use a local jar or a webservice for unltools\n", + " try:\n", + " #svg = unl2dotWeb(unl)\n", + " svg = unl2dot(unl, \"unl2rdf-app-1.0-SNAPSHOT-jar-with-dependencies.jar\")\n", + " display(SVG(svg))\n", + " except Exception as e :\n", + " print(e)" ] } ],