Skip to content
Snippets Groups Projects
Commit 0679d080 authored by David Rouquet's avatar David Rouquet
Browse files

Add possibility to use unl2rdf webservice au lieu du jar unltools

parent 9ea8564d
Branches
No related tags found
No related merge requests found
Pipeline #206 passed
......@@ -2,11 +2,11 @@
import nbformat as nbf
from nbformat.v4 import new_code_cell
import nbconvert as nbc
import click
from lxml import etree, objectify
from unlizeXml import remove_namespace, unlize, nestedBody2Str
@click.command()
@click.argument('input', nargs=1,
type=click.Path(dir_okay=False, exists=True))
......@@ -20,7 +20,6 @@ from unlizeXml import remove_namespace, unlize, nestedBody2Str
help='if true do not send request to unl.ru')
def unlizeXmlNb(input, output, template, lang, dry_run):
nb = nbf.read(template, 4)
parser = etree.XMLParser(remove_comments=True)
doc = objectify.parse(input, parser=parser)
remove_namespace(doc)
......@@ -33,27 +32,21 @@ def unlizeXmlNb(input, output, template, lang, dry_run):
addCell(nb, unl)
else:
addCell(nb, getText(node, 'unl'))
with open(output, 'w') as f:
nbf.write(nb, f)
def getText(node, tag):
try:
return node.xpath(tag)[0].text
except IndexError:
return ''
def addCell(nb, unl):
code = """
unldata = \"\"\"
code = """unldata = \"\"\"
{unl}
\"\"\"
displayUnl(unldata)
""".format(xml=xml, unl=unl)
displayUnl(unldata)""".format(unl=unl.strip())
nb['cells'].append(new_code_cell(code))
if __name__ == '__main__':
unlizeXmlNb()
%% Cell type:code id: tags:
``` python
import tempfile
import os
import re
import requests
from subprocess import Popen, PIPE, STDOUT
from IPython.core.display import SVG
```
%% Cell type:code id: tags:
``` python
def unl2dot(text, path):
with tempfile.NamedTemporaryFile() as temp:
out_name = os.path.basename(temp.name)
out_dir = os.path.dirname(temp.name)
with tempfile.NamedTemporaryFile(mode="w") as in_file:
# Remove CRLF and flush output to avoid java errors
in_file.write(text.replace("\r\n", "\n"))
in_file.flush()
# Run java parser
cmd = ['java', '-jar', path,
'--input-file', in_file.name,
'--output-Dir', out_dir, '--output-file', out_name,
'--output-type', 'dot']
with Popen(cmd, stdout=PIPE, stderr=STDOUT) as p:
p.wait()
p.stdout.flush()
if p.returncode != 0:
print("Error in unl2rdf: \n\n"+p.stdout.read().decode())
print('UNL;')
print(text)
# generate dot output
fname = '{}/{}.dot'.format(out_dir, out_name)
cmd = ['dot', '-Tsvg', fname]
with Popen(cmd, stdout=PIPE, stderr=PIPE) as p:
p.wait()
if p.returncode != 0:
print("Error creating svg: \n\n"+p.stderr.read().decode())
print('UNL:')
print(text)
try:
with open(fname) as f:
print('DOT:')
print(f.read())
except FileNotFoundError:
pass
else:
svg = p.stdout.read().decode()
os.remove(fname)
return svg
return ""
def unl2dotWeb(unldata) :
data={'unl': unldata, 'outputs':['dot', 'svg']}
try:
r = requests.post('https://unl.demo.tetras-libre.fr/unl2rdf', data=data)
except Exception as e:
return 'Error calling https://unl.demo.tetras-libre.fr/unl2rdf : "{error}"'.format(error=e)
html=r.text
# On utilise une regex au lieu de parser le html car ce dernier est mal formé
regex = re.compile('<svg.*svg>',re.MULTILINE|re.DOTALL)
svg = regex.search(html).group()
return(svg)
def displayUnl(unldata) :
# We generate protoSVG because whent there are several sentences,
# a string composed of several concatenated SVG is produced (not a valid SVG).
# We must then split the string to obtain several valid SVG to display.
protoSvg = unl2dot(unldata, "unl2rdf-app-1.0-SNAPSHOT-jar-with-dependencies.jar")
sep = "</svg>\n"
svgArray = [x+sep for x in protoSvg.split(sep)]
svgArray.pop()
for svg in svgArray :
text = re.search('\{org.*\n(.*)\n.*org\}',unldata).group(1)
print(text)
display(SVG(svg))
sep = "[/S]\n"
unldataArray = [x+sep for x in unldata.split(sep)]
unldataArray.pop()
for unl in unldataArray :
regex = re.compile('\{org:..\}\n(.*)\n{\/org\}',re.MULTILINE|re.DOTALL)
text = regex.search(unl).group(1)
print("\n"+text+"\n")
# Keep one of the two lines below depending if you want to use a local jar or a webservice for unltools
try:
#svg = unl2dotWeb(unl)
svg = unl2dot(unl, "unl2rdf-app-1.0-SNAPSHOT-jar-with-dependencies.jar")
display(SVG(svg))
except Exception as e :
print(e)
```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment