Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
amrBatch
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Tetras MARS
amrBatch
Commits
108ca865
Commit
108ca865
authored
2 years ago
by
David Rouquet
Browse files
Options
Downloads
Patches
Plain Diff
Update AMR conversion with multiprocessing
parent
5bd63f5b
No related branches found
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
amrbatch/main.py
+72
-44
72 additions, 44 deletions
amrbatch/main.py
with
72 additions
and
44 deletions
amrbatch/main.py
+
72
−
44
View file @
108ca865
...
...
@@ -21,6 +21,8 @@ from amrlib.graph_processing.amr_plot import AMRPlot
from
filepath_manager
import
FilepathManager
from
work_data
import
WorkData
#global AMR_MODEL_PATH
# -- Config File Path
LIB_PATH
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
+
'
/
'
LOGGING_CONF_FILE_PATH
=
f
'
{
LIB_PATH
}
logging.conf
'
...
...
@@ -61,7 +63,7 @@ def __prepare_workdata(filepath_manager, line_set):
sentence_number
+=
1
new_data
=
WorkData
(
sentence
,
sentence_number
,
filepath_manager
)
workdata_list
.
append
(
new_data
)
logger
.
debug
(
f
'
*** sentence
{
sentence_number
}
***
\n
{
new_data
}
'
)
logger
.
debug
(
f
'
*** sentence
{
sentence_number
}
***
\n
{
new_data
.
sentence
}
'
)
logger
.
info
(
f
'
----- number of sentences:
{
len
(
workdata_list
)
}
'
)
return
workdata_list
...
...
@@ -88,14 +90,59 @@ def __generate_sentence_file(filepath_manager, workdata_list):
#==============================================================================
# Conversion Steps
# Sentence Conversion to AMR
#==============================================================================
def
__run_conversion
(
arg_dict
):
data
=
arg_dict
[
'
data
'
]
amr_model_path
=
arg_dict
[
'
amr_model_path
'
]
logger
.
info
(
"
-- Loading AMR model
"
)
stog
=
amrlib
.
load_stog_model
(
model_dir
=
amr_model_path
)
logger
.
info
(
"
-- Converting sentences to AMR graphs
"
)
stog_result
=
stog
.
parse_sents
([
data
.
sentence
])
logger
.
info
(
f
'
----- Sentence successfully processed
'
)
logger
.
info
(
stog_result
)
data
.
graph
=
stog_result
[
0
]
return
data
def
__convert_sentences_to_graphs
(
amr_model_path
,
input_data_list
):
"""
Converting text sentences to AMR graphs
"""
mapIterable
=
[]
for
data
in
input_data_list
:
arg_dict
=
{
'
data
'
:
data
,
'
amr_model_path
'
:
amr_model_path
}
mapIterable
=
mapIterable
+
[
arg_dict
]
number_of_processes
=
min
(
round
((
multiprocessing
.
cpu_count
()
-
1
)
/
4
),
len
(
input_data_list
))
with
multiprocessing
.
Pool
(
number_of_processes
)
as
p
:
result_data_list
=
p
.
map
(
__run_conversion
,
mapIterable
)
# result_data_list = []
# for arg_dict in mapIterable:
# result_data_list += __convert_sentence_to_graph_multiprocess_run(arg_dict)
logger
.
info
(
f
'
----- Total processed graph number:
{
len
(
result_data_list
)
}
'
)
return
result_data_list
#==============================================================================
# AMR Graph File Generation
#==============================================================================
def
__generate_penman_amr_graph
(
filepath_manager
,
data
):
"""
AMR graph generation in penman format
"""
output_filepath
=
data
.
get_penman_amr_graph_output_filepath
()
logger
.
debug
(
f
"
----- AMR Graph file (penman):
{
os
.
path
.
basename
(
output_filepath
)
}
"
)
logger
.
info
(
f
"
----- AMR Graph file (penman):
{
os
.
path
.
basename
(
output_filepath
)
}
"
)
with
open
(
output_filepath
,
"
w
"
)
as
writing_file
:
# w = write
writing_file
.
write
(
data
.
id_line_str
)
writing_file
.
write
(
data
.
graph
)
...
...
@@ -107,14 +154,14 @@ def __generate_dot_amr_graph(filepath_manager, data):
try
:
# -- generating dot/png/svg files using AMRLib and GraphViz
dot_filename
=
data
.
get_dot_amr_graph_output_filepath
()
logger
.
debug
(
f
'
----- AMR Graph file (dot):
{
os
.
path
.
basename
(
dot_filename
)
}
'
)
logger
.
info
(
f
'
----- AMR Graph file (dot):
{
os
.
path
.
basename
(
dot_filename
)
}
'
)
format
=
'
png
'
plot
=
AMRPlot
(
dot_filename
,
format
)
plot
.
build_from_graph
(
data
.
graph
)
plot
.
graph
.
render
()
render_fn
=
dot_filename
+
'
.
'
+
format
# -- renaming PNG file
good_png_fn
=
data
.
get_png_amr_graph_output_filepath
()
logger
.
debug
(
f
'
----- AMR Graph file (png):
{
{os.path.basename(good_png_fn)}
}
'
)
logger
.
info
(
f
'
----- AMR Graph file (png):
{
os
.
path
.
basename
(
good_png_fn
)
}
'
)
os
.
rename
(
render_fn
,
good_png_fn
)
returnValue
=
dot_filename
format
=
'
svg
'
...
...
@@ -123,7 +170,7 @@ def __generate_dot_amr_graph(filepath_manager, data):
plot
.
graph
.
render
()
render_fn
=
dot_filename
+
'
.
'
+
format
# -- renaming PNG file
good_svg_fn
=
good_png_fn
.
replace
(
'
.png
'
,
'
.svg
'
)
logger
.
debug
(
f
'
----- AMR Graph file (svg):
{
{os.path.basename(good_svg_fn)}
}
'
)
logger
.
info
(
f
'
----- AMR Graph file (svg):
{
os
.
path
.
basename
(
good_svg_fn
)
}
'
)
os
.
rename
(
render_fn
,
good_svg_fn
)
except
Exception
as
ex
:
logger
.
warning
(
'
Exception when trying to plot:
'
+
ex
)
...
...
@@ -131,41 +178,12 @@ def __generate_dot_amr_graph(filepath_manager, data):
returnValue
=
'
Exception when trying to plot
'
return
(
returnValue
)
# Function executed when a worker is created in the pool
def
init_pool_worker
():
amr_model_path
=
"
/home/daxid/hdd_data/jupyterlab_root/lib/amrModel/model_parse_xfm_bart_large-v0_1_0
"
# declare scope of a new global variable
global
stog
# store argument in the global variable for this process
logger
.
info
(
"
-- Loading AMR model
"
)
stog
=
amrlib
.
load_stog_model
(
model_dir
=
amr_model
)
def
__convert_sentence_to_graph_multiprocess_run
(
data
):
print
(
"
in worker
\n
"
)
wd_number
=
1
stog_result
=
stog
.
parse_sents
([
data
.
sentence
])
logger
.
info
(
f
'
----- Sentence
{
wd_number
}
successfully processed
'
)
logger
.
debug
(
stog_result
)
data
.
graph
=
stog_result
[
0
]
return
(
stog_result
)
def
__convert_sentences_to_graphs
(
amr_model_path
,
workdata_list
):
"""
Converting text sentences to AMR graphs
"""
# ----- (Multi-processing) Extraction Run
number_of_processes
=
min
(
multiprocessing
.
cpu_count
()
-
1
,
len
(
workdata_list
))
global
stog
with
multiprocessing
.
Pool
(
2
,
initializer
=
init_pool_worker
)
as
p
:
logger
.
info
(
"
-- Converting sentences to AMR graphs
"
)
print
(
"
pool created
\n
"
)
stog_result_list
=
p
.
map
(
__convert_sentence_to_graph_multiprocess_run
,
workdata_list
)
logger
.
info
(
f
'
----- Total processed graph number:
{
len
(
stog_result_list
)
}
'
)
return
workdata_list
def
__generate_amr_graph_files
(
filepath_manager
,
workdata_list
):
logger
.
info
(
"
-- Generating AMR graph files
"
)
# ----- Prepare multiprocessing data
starmapIterable
=
[(
data
,
filepath_manager
)
for
data
in
workdata_list
]
starmapIterable
=
[(
filepath_manager
,
data
)
for
data
in
workdata_list
]
# ----- (Multi-processing) Extraction Run
with
multiprocessing
.
Pool
(
multiprocessing
.
cpu_count
()
-
1
)
as
p
:
penmanFilePathList
=
p
.
starmap
(
__generate_penman_amr_graph
,
starmapIterable
)
...
...
@@ -174,7 +192,7 @@ def __generate_amr_graph_files(filepath_manager, workdata_list):
#==============================================================================
# Serialization
Steps
#
AMR
Serialization
#==============================================================================
def
__serialize_amr_graph_to_amr_rdf
(
filepath_manager
,
data
):
...
...
@@ -184,9 +202,9 @@ def __serialize_amr_graph_to_amr_rdf(filepath_manager, data):
amr_penman_filepath
=
data
.
get_penman_amr_graph_output_filepath
()
amr_rdf_triple_filepath
=
data
.
get_amr_rdf_triple_output_filepath
()
amr_rdf_turtle_filepath
=
data
.
get_amr_rdf_turtle_output_filepath
()
logger
.
debug
(
f
'
----- AMR filepath (penman):
{
amr_penman_filepath
}
'
)
logger
.
debug
(
f
'
----- AMR-RDF filepath (triple):
{
amr_rdf_triple_filepath
}
'
)
logger
.
debug
(
f
'
----- AMR-RDF filepath (turtle):
{
amr_rdf_turtle_filepath
}
'
)
logger
.
info
(
f
'
----- AMR filepath (penman):
{
os
.
path
.
basename
(
amr_penman_filepath
)
}
'
)
logger
.
info
(
f
'
----- AMR-RDF filepath (triple):
{
os
.
path
.
basename
(
amr_rdf_triple_filepath
)
}
'
)
logger
.
info
(
f
'
----- AMR-RDF filepath (turtle):
{
os
.
path
.
basename
(
amr_rdf_turtle_filepath
)
}
'
)
# -- AMR-LD processing
amrld_process
=
[
"
python3
"
,
"
amr_to_rdf.py
"
,
"
-i
"
,
amr_penman_filepath
,
"
-o
"
,
amr_rdf_triple_filepath
]
...
...
@@ -195,14 +213,21 @@ def __serialize_amr_graph_to_amr_rdf(filepath_manager, data):
os
.
chdir
(
AMRLD_DIR
)
subprocess
.
run
(
amrld_process
)
os
.
chdir
(
current_dirpath
)
if
(
os
.
path
.
isfile
(
amr_rdf_triple_filepath
)):
logger
.
info
(
f
'
----- AMR-RDF triple successfully processed (
{
os
.
path
.
basename
(
amr_rdf_triple_filepath
)
}
)
'
)
else
:
logger
.
info
(
f
'
----- *** Process of AMR-RDF triple generation failed ***
'
)
# -- Turtle Conversion
if
(
os
.
path
.
isfile
(
amr_rdf_triple_filepath
)):
g
=
Graph
()
g
.
parse
(
amr_rdf_triple_filepath
)
g
.
serialize
(
destination
=
amr_rdf_turtle_filepath
,
format
=
'
turtle
'
)
logger
.
info
(
f
'
----- AMR-RDF triple successfully processed (
{
os
.
path
.
basename
(
amr_rdf_turtle_filepath
)
}
)
'
)
if
(
os
.
path
.
isfile
(
amr_rdf_turtle_filepath
)):
logger
.
info
(
f
'
----- AMR-RDF turtle successfully processed (
{
os
.
path
.
basename
(
amr_rdf_turtle_filepath
)
}
)
'
)
else
:
logger
.
info
(
f
'
----- *** Process of AMR-RDF turtle generation failed ***
'
)
...
...
@@ -234,6 +259,9 @@ def __analyze_line_set_to_produce_amr_graphs(line_set, data_reference, amr_model
"""
#global AMR_MODEL_PATH
#AMR_MODEL_PATH = amr_model_path
logger
.
info
(
'
\n
=== Preparation ===
'
)
# -- Initialize a filepath manager
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment