Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
Corpus Making Tool
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Tetras MARS
corpus
Corpus Making Tool
Commits
ab3309bb
Commit
ab3309bb
authored
2 years ago
by
Aurélien Lamercerie
Browse files
Options
Downloads
Patches
Plain Diff
Update propbank_analyzer to adjust searching operations
parent
60086fc5
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
lib/propbank_analyzer.py
+48
-35
48 additions, 35 deletions
lib/propbank_analyzer.py
requirements.txt
+2
-0
2 additions, 0 deletions
requirements.txt
with
50 additions
and
35 deletions
lib/propbank_analyzer.py
+
48
−
35
View file @
ab3309bb
...
...
@@ -13,6 +13,7 @@
import
sys
import
glob
import
re
from
bs4
import
BeautifulSoup
...
...
@@ -28,7 +29,7 @@ OUTPUT_DIR = "../outputData/"
# Data
PROPBANK_FRAMES_DIR
=
"
../propbankFrames/
"
PBF_DIGITS
=
2
AMR_CORE_ROLE_FORM
=
[
'
:ARG\d$
'
,
'
ARG\d$
'
,
'
\d$
'
]
#==============================================================================
...
...
@@ -64,7 +65,11 @@ def get_roleset_id_from_amr_predicate(amr_predicate):
def
get_number_from_amr_role
(
amr_role
):
return
1
role_number
=
-
1
for
role_format
in
AMR_CORE_ROLE_FORM
:
if
re
.
match
(
role_format
,
amr_role
):
role_number
=
int
(
amr_role
[
-
1
])
return
role_number
#==============================================================================
...
...
@@ -79,16 +84,16 @@ def find_frame_of_lemma(lemma):
frame_filepath
=
glob
.
glob
(
target_file
,
recursive
=
True
)
if
len
(
frame_filepath
)
>=
1
:
is_found
=
True
frame_filepath
=
frame_filepath
[
0
]
with
open
(
frame_filepath
,
'
r
'
)
as
f
:
xml_data
=
f
.
read
()
frame_data
=
BeautifulSoup
(
xml_data
,
'
xml
'
)
else
:
is_found
=
False
frame_filepath
=
''
frame_data
=
None
is_found
=
frame_data
is
not
None
return
is_found
,
frame_filepath
,
frame_data
...
...
@@ -96,35 +101,36 @@ def find_frame_of_lemma(lemma):
# Functions to analyze a frame data
#==============================================================================
def
get
_roleset_in_frame
(
frame_data
,
lemma
,
roleset_id
):
"""
Get a
roleset
in a given
frame data
def
find
_roleset_in_frame
(
frame_data
,
lemma
,
roleset_id
):
"""
Find the
roleset
corresponding to a lemma and an id in a
frame data
"""
try
:
lemma_data
=
frame_data
.
find
(
'
predicate
'
,
{
'
lemma
'
:
lemma
})
roleset_data
=
lemma_data
.
find
(
'
roleset
'
,
{
'
id
'
:
roleset_id
})
is_found
=
True
except
:
lemma_data
=
None
roleset_data
=
None
is_found
=
False
is_found
=
(
lemma_data
is
not
None
)
&
(
roleset_data
is
not
None
)
return
is_found
,
roleset_data
def
get_role_in_roleset
(
roleset_data
,
role_number
):
def
find_role_in_roleset
(
roleset_data
,
role_number
):
"""
Find the role corresponding to a given number in a roleset data
"""
try
:
role_data
=
roleset_data
.
find
(
'
role
'
,
{
'
n
'
:
role_number
})
is_found
=
True
except
:
roleset_data
=
None
is_found
=
False
return
is_found
,
role_data
role_data
=
None
is_found
=
(
role_data
is
not
None
)
return
is_found
,
role_data
...
...
@@ -152,30 +158,37 @@ def main(amr_predicate, amr_role):
if
frame_found
:
print
(
"
----- frame xml file found:
"
+
frame_filepath
)
else
:
print
(
"
----- frame xml file not found
"
)
print
(
"
----- frame xml file not found for lemma
"
+
lemma
)
if
frame_found
:
# -- Analyze frame data to get informations
print
(
"
-- Analyzing frame data
"
)
rs_found
,
rs_data
=
find_roleset_in_frame
(
frame_data
,
lemma
,
roleset_id
)
nb_roles
=
-
1
# -- Analyze frame data to get informations
print
(
"
-- Analyzing frame data
"
)
rs_found
,
rs_data
=
get_roleset_in_frame
(
frame_data
,
lemma
,
roleset_id
)
nb_roles
=
-
1
if
rs_found
:
print
(
"
----- roleset id:
"
+
rs_data
.
get
(
'
id
'
))
print
(
"
----- roleset name:
"
+
rs_data
.
get
(
'
name
'
))
nb_roles
=
len
(
rs_data
.
find_all
(
'
role
'
))
print
(
"
----- number of roles:
"
+
str
(
nb_roles
))
for
n
in
range
(
nb_roles
):
_
,
role_data
=
get_role_in_roleset
(
rs_data
,
n
)
print
(
"
----- role
"
+
str
(
n
)
+
'
:
'
+
role_data
.
get
(
'
f
'
)
+
if
rs_found
:
print
(
"
----- roleset id:
"
+
rs_data
.
get
(
'
id
'
))
print
(
"
----- roleset name:
"
+
rs_data
.
get
(
'
name
'
))
nb_roles
=
len
(
rs_data
.
find_all
(
'
role
'
))
print
(
"
----- number of roles:
"
+
str
(
nb_roles
))
for
n
in
range
(
nb_roles
):
_
,
role_data
=
find_role_in_roleset
(
rs_data
,
n
)
print
(
"
----- role
"
+
str
(
n
)
+
'
:
'
+
role_data
.
get
(
'
f
'
)
+
'
,
'
+
role_data
.
get
(
'
descr
'
))
else
:
print
(
"
----- roleset not found
"
)
else
:
print
(
"
----- roleset
"
+
roleset_id
+
"
not found
"
)
# -- Analyze frame data to get informations
if
rs_found
&
role_number
in
range
(
nb_roles
):
print
(
"
-- Finding role
"
)
_
,
role_data
=
get_role_in_roleset
(
rs_data
,
role_number
)
print
(
"
----- role found:
"
+
role_data
.
get
(
'
f
'
)
+
'
,
'
+
role_data
.
get
(
'
descr
'
))
# -- Analyze frame data to get informations
if
rs_found
&
role_number
in
range
(
nb_roles
):
print
(
"
-- Finding role
"
)
print
(
"
----- role number:
"
+
str
(
role_number
))
r_found
,
role_data
=
find_role_in_roleset
(
rs_data
,
role_number
)
if
r_found
:
print
(
"
----- role
"
+
str
(
role_number
)
+
"
found:
"
+
role_data
.
get
(
'
f
'
)
+
'
,
'
+
role_data
.
get
(
'
descr
'
))
else
:
print
(
"
----- role
"
+
str
(
role_number
)
+
"
not found
"
)
# -- Ending print
print
(
"
\n
"
+
"
[SSC] Done
"
)
...
...
This diff is collapsed.
Click to expand it.
requirements.txt
+
2
−
0
View file @
ab3309bb
...
...
@@ -4,3 +4,5 @@ argparse
numpy
rdflib
graphviz
bs4
lxml
This diff is collapsed.
Click to expand it.
Aurélien Lamercerie
@alam
mentioned in issue
#2
·
2 years ago
mentioned in issue
#2
mentioned in issue #2
Toggle commit list
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment