Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
nlreqdataset-unl-enco
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
UNL
nlreqdataset-unl-enco
Commits
0679d080
Commit
0679d080
authored
5 years ago
by
David Rouquet
Browse files
Options
Downloads
Patches
Plain Diff
Add possibility to use unl2rdf webservice au lieu du jar unltools
parent
9ea8564d
Branches
Branches containing commit
No related tags found
No related merge requests found
Pipeline
#206
passed
5 years ago
Stage: deploy
Changes
2
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
scripts/unlizeToNotebook.py
+3
-10
3 additions, 10 deletions
scripts/unlizeToNotebook.py
scripts/unlizeXmlNbSample.ipynb
+27
-13
27 additions, 13 deletions
scripts/unlizeXmlNbSample.ipynb
with
30 additions
and
23 deletions
scripts/unlizeToNotebook.py
+
3
−
10
View file @
0679d080
...
...
@@ -2,11 +2,11 @@
import
nbformat
as
nbf
from
nbformat.v4
import
new_code_cell
import
nbconvert
as
nbc
import
click
from
lxml
import
etree
,
objectify
from
unlizeXml
import
remove_namespace
,
unlize
,
nestedBody2Str
@click.command
()
@click.argument
(
'
input
'
,
nargs
=
1
,
type
=
click
.
Path
(
dir_okay
=
False
,
exists
=
True
))
...
...
@@ -20,7 +20,6 @@ from unlizeXml import remove_namespace, unlize, nestedBody2Str
help
=
'
if true do not send request to unl.ru
'
)
def
unlizeXmlNb
(
input
,
output
,
template
,
lang
,
dry_run
):
nb
=
nbf
.
read
(
template
,
4
)
parser
=
etree
.
XMLParser
(
remove_comments
=
True
)
doc
=
objectify
.
parse
(
input
,
parser
=
parser
)
remove_namespace
(
doc
)
...
...
@@ -33,27 +32,21 @@ def unlizeXmlNb(input, output, template, lang, dry_run):
addCell
(
nb
,
unl
)
else
:
addCell
(
nb
,
getText
(
node
,
'
unl
'
))
with
open
(
output
,
'
w
'
)
as
f
:
nbf
.
write
(
nb
,
f
)
def
getText
(
node
,
tag
):
try
:
return
node
.
xpath
(
tag
)[
0
].
text
except
IndexError
:
return
''
def
addCell
(
nb
,
unl
):
code
=
"""
unldata =
\"\"\"
code
=
"""
unldata =
\"\"\"
{unl}
\"\"\"
displayUnl(unldata)
"""
.
format
(
xml
=
xml
,
unl
=
unl
)
displayUnl(unldata)
"""
.
format
(
unl
=
unl
.
strip
())
nb
[
'
cells
'
].
append
(
new_code_cell
(
code
))
if
__name__
==
'
__main__
'
:
unlizeXmlNb
()
This diff is collapsed.
Click to expand it.
scripts/unlizeXmlNbSample.ipynb
+
27
−
13
View file @
0679d080
...
...
@@ -9,6 +9,7 @@
"import tempfile\n",
"import os\n",
"import re\n",
"import requests\n",
"from subprocess import Popen, PIPE, STDOUT\n",
"from IPython.core.display import SVG"
]
...
...
@@ -23,18 +24,15 @@
" with tempfile.NamedTemporaryFile() as temp:\n",
" out_name = os.path.basename(temp.name)\n",
" out_dir = os.path.dirname(temp.name)\n",
"\n",
" with tempfile.NamedTemporaryFile(mode=\"w\") as in_file:\n",
" # Remove CRLF and flush output to avoid java errors\n",
" in_file.write(text.replace(\"\\r\\n\", \"\\n\"))\n",
" in_file.flush()\n",
"\n",
" # Run java parser\n",
" cmd = ['java', '-jar', path,\n",
" '--input-file', in_file.name,\n",
" '--output-Dir', out_dir, '--output-file', out_name,\n",
" '--output-type', 'dot']\n",
"\n",
" with Popen(cmd, stdout=PIPE, stderr=STDOUT) as p:\n",
" p.wait()\n",
" p.stdout.flush()\n",
...
...
@@ -42,7 +40,6 @@
" print(\"Error in unl2rdf: \\n\\n\"+p.stdout.read().decode())\n",
" print('UNL;')\n",
" print(text)\n",
"\n",
" # generate dot output\n",
" fname = '{}/{}.dot'.format(out_dir, out_name)\n",
" cmd = ['dot', '-Tsvg', fname]\n",
...
...
@@ -64,19 +61,36 @@
" return svg\n",
" return \"\"\n",
"\n",
"def unl2dotWeb(unldata) :\n",
" data={'unl': unldata, 'outputs':['dot', 'svg']}\n",
" try:\n",
" r = requests.post('https://unl.demo.tetras-libre.fr/unl2rdf', data=data)\n",
" except Exception as e:\n",
" return 'Error calling https://unl.demo.tetras-libre.fr/unl2rdf : \"{error}\"'.format(error=e)\n",
" html=r.text\n",
" # On utilise une regex au lieu de parser le html car ce dernier est mal formé\n",
" regex = re.compile('<svg.*svg>',re.MULTILINE|re.DOTALL)\n",
" svg = regex.search(html).group()\n",
" return(svg)\n",
" \n",
"def displayUnl(unldata) :\n",
"# We generate protoSVG because whent there are several sentences, \n",
"# a string composed of several concatenated SVG is produced (not a valid SVG).\n",
"# We must then split the string to obtain several valid SVG to display.\n",
" protoSvg = unl2dot(unldata, \"unl2rdf-app-1.0-SNAPSHOT-jar-with-dependencies.jar\")\n",
" sep = \"</svg>\\n\"\n",
" svgArray = [x+sep for x in protoSvg.split(sep)]\n",
" svgArray.pop()\n",
" for svg in svgArray :\n",
" text = re.search('\\{org.*\\n(.*)\\n.*org\\}',unldata).group(1)\n",
" print(text)\n",
" display(SVG(svg))"
" sep = \"[/S]\\n\"\n",
" unldataArray = [x+sep for x in unldata.split(sep)]\n",
" unldataArray.pop()\n",
" for unl in unldataArray :\n",
" regex = re.compile('\\{org:..\\}\\n(.*)\\n{\\/org\\}',re.MULTILINE|re.DOTALL)\n",
" text = regex.search(unl).group(1)\n",
" print(\"\\n\"+text+\"\\n\")\n",
" # Keep one of the two lines below depending if you want to use a local jar or a webservice for unltools\n",
" try:\n",
" #svg = unl2dotWeb(unl)\n",
" svg = unl2dot(unl, \"unl2rdf-app-1.0-SNAPSHOT-jar-with-dependencies.jar\")\n",
" display(SVG(svg))\n",
" except Exception as e :\n",
" print(e)"
]
}
],
...
...
%% Cell type:code id: tags:
```
python
import
tempfile
import
os
import
re
import
requests
from
subprocess
import
Popen
,
PIPE
,
STDOUT
from
IPython.core.display
import
SVG
```
%% Cell type:code id: tags:
```
python
def
unl2dot
(
text
,
path
):
with
tempfile
.
NamedTemporaryFile
()
as
temp
:
out_name
=
os
.
path
.
basename
(
temp
.
name
)
out_dir
=
os
.
path
.
dirname
(
temp
.
name
)
with
tempfile
.
NamedTemporaryFile
(
mode
=
"
w
"
)
as
in_file
:
# Remove CRLF and flush output to avoid java errors
in_file
.
write
(
text
.
replace
(
"
\r\n
"
,
"
\n
"
))
in_file
.
flush
()
# Run java parser
cmd
=
[
'
java
'
,
'
-jar
'
,
path
,
'
--input-file
'
,
in_file
.
name
,
'
--output-Dir
'
,
out_dir
,
'
--output-file
'
,
out_name
,
'
--output-type
'
,
'
dot
'
]
with
Popen
(
cmd
,
stdout
=
PIPE
,
stderr
=
STDOUT
)
as
p
:
p
.
wait
()
p
.
stdout
.
flush
()
if
p
.
returncode
!=
0
:
print
(
"
Error in unl2rdf:
\n\n
"
+
p
.
stdout
.
read
().
decode
())
print
(
'
UNL;
'
)
print
(
text
)
# generate dot output
fname
=
'
{}/{}.dot
'
.
format
(
out_dir
,
out_name
)
cmd
=
[
'
dot
'
,
'
-Tsvg
'
,
fname
]
with
Popen
(
cmd
,
stdout
=
PIPE
,
stderr
=
PIPE
)
as
p
:
p
.
wait
()
if
p
.
returncode
!=
0
:
print
(
"
Error creating svg:
\n\n
"
+
p
.
stderr
.
read
().
decode
())
print
(
'
UNL:
'
)
print
(
text
)
try
:
with
open
(
fname
)
as
f
:
print
(
'
DOT:
'
)
print
(
f
.
read
())
except
FileNotFoundError
:
pass
else
:
svg
=
p
.
stdout
.
read
().
decode
()
os
.
remove
(
fname
)
return
svg
return
""
def
unl2dotWeb
(
unldata
)
:
data
=
{
'
unl
'
:
unldata
,
'
outputs
'
:[
'
dot
'
,
'
svg
'
]}
try
:
r
=
requests
.
post
(
'
https://unl.demo.tetras-libre.fr/unl2rdf
'
,
data
=
data
)
except
Exception
as
e
:
return
'
Error calling https://unl.demo.tetras-libre.fr/unl2rdf :
"
{error}
"'
.
format
(
error
=
e
)
html
=
r
.
text
# On utilise une regex au lieu de parser le html car ce dernier est mal formé
regex
=
re
.
compile
(
'
<svg.*svg>
'
,
re
.
MULTILINE
|
re
.
DOTALL
)
svg
=
regex
.
search
(
html
).
group
()
return
(
svg
)
def
displayUnl
(
unldata
)
:
# We generate protoSVG because whent there are several sentences,
# a string composed of several concatenated SVG is produced (not a valid SVG).
# We must then split the string to obtain several valid SVG to display.
protoSvg
=
unl2dot
(
unldata
,
"
unl2rdf-app-1.0-SNAPSHOT-jar-with-dependencies.jar
"
)
sep
=
"
</svg>
\n
"
svgArray
=
[
x
+
sep
for
x
in
protoSvg
.
split
(
sep
)]
svgArray
.
pop
()
for
svg
in
svgArray
:
text
=
re
.
search
(
'
\{org.*
\n
(.*)
\n
.*org\}
'
,
unldata
).
group
(
1
)
print
(
text
)
display
(
SVG
(
svg
))
sep
=
"
[/S]
\n
"
unldataArray
=
[
x
+
sep
for
x
in
unldata
.
split
(
sep
)]
unldataArray
.
pop
()
for
unl
in
unldataArray
:
regex
=
re
.
compile
(
'
\{org:..\}
\n
(.*)
\n
{\/org\}
'
,
re
.
MULTILINE
|
re
.
DOTALL
)
text
=
regex
.
search
(
unl
).
group
(
1
)
print
(
"
\n
"
+
text
+
"
\n
"
)
# Keep one of the two lines below depending if you want to use a local jar or a webservice for unltools
try
:
#svg = unl2dotWeb(unl)
svg
=
unl2dot
(
unl
,
"
unl2rdf-app-1.0-SNAPSHOT-jar-with-dependencies.jar
"
)
display
(
SVG
(
svg
))
except
Exception
as
e
:
print
(
e
)
```
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment