Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
Corpus Making Tool
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Tetras MARS
corpus
Corpus Making Tool
Commits
60086fc5
Commit
60086fc5
authored
2 years ago
by
Aurélien Lamercerie
Browse files
Options
Downloads
Patches
Plain Diff
Update propbank_analyzer with some functions to find and analyze a specific frame
parent
431c7279
No related branches found
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
lib/propbank_analyzer.py
+130
-12
130 additions, 12 deletions
lib/propbank_analyzer.py
with
130 additions
and
12 deletions
lib/propbank_analyzer.py
+
130
−
12
View file @
60086fc5
...
...
@@ -14,6 +14,8 @@
import
sys
import
glob
from
bs4
import
BeautifulSoup
#==============================================================================
# Parameters
...
...
@@ -25,23 +27,103 @@ OUTPUT_DIR = "../outputData/"
# Data
PROPBANK_FRAMES_DIR
=
"
../propbankFrames/
"
PBF_DIGITS
=
2
#==============================================================================
# Functions to analyze and adapt the target description
#==============================================================================
def
itemize_amr_predicate
(
amr_predicate
):
ap_items
=
amr_predicate
.
split
(
'
-
'
)
lemma
=
ap_items
[
0
]
if
len
(
ap_items
)
>
1
:
roleset_number
=
int
(
ap_items
[
1
])
else
:
roleset_number
=
1
return
lemma
,
roleset_number
def
get_lemma_from_amr_predicate
(
amr_predicate
):
lemma
,
_
=
itemize_amr_predicate
(
amr_predicate
)
return
lemma
def
get_role_ref_from_amr_predicate
(
amr_predicate
):
_
,
roleset_number
=
itemize_amr_predicate
(
amr_predicate
)
roleset_ref
=
str
(
roleset_number
).
rjust
(
PBF_DIGITS
,
"
0
"
)
return
roleset_ref
def
get_roleset_id_from_amr_predicate
(
amr_predicate
):
lemma
=
get_lemma_from_amr_predicate
(
amr_predicate
)
roleset_ref
=
get_role_ref_from_amr_predicate
(
amr_predicate
)
roleset_id
=
lemma
+
'
.
'
+
roleset_ref
return
roleset_id
def
get_number_from_amr_role
(
amr_role
):
return
1
#==============================================================================
# Functions to find the XML description corresponding to a roleset
#==============================================================================
def
find_frame_
filepath
(
lemma
):
"""
Find the Frame XML
filep
at
h
corresponding to a given lemma
def
find_frame_
of_lemma
(
lemma
):
"""
Find the Frame XML
d
at
a
corresponding to a given lemma
"""
target_file
=
PROPBANK_FRAMES_DIR
+
lemma
+
'
.xml
'
filepath
=
glob
.
glob
(
target_file
,
recursive
=
True
)
frame_filepath
=
glob
.
glob
(
target_file
,
recursive
=
True
)
if
len
(
frame_filepath
)
>=
1
:
is_found
=
True
frame_filepath
=
frame_filepath
[
0
]
with
open
(
frame_filepath
,
'
r
'
)
as
f
:
xml_data
=
f
.
read
()
frame_data
=
BeautifulSoup
(
xml_data
,
'
xml
'
)
else
:
is_found
=
False
frame_filepath
=
''
frame_data
=
None
return
filepath
return
is_found
,
frame_filepath
,
frame_data
#==============================================================================
# Functions to analyze a frame data
#==============================================================================
def
get_roleset_in_frame
(
frame_data
,
lemma
,
roleset_id
):
"""
Get a roleset in a given frame data
"""
try
:
lemma_data
=
frame_data
.
find
(
'
predicate
'
,
{
'
lemma
'
:
lemma
})
roleset_data
=
lemma_data
.
find
(
'
roleset
'
,
{
'
id
'
:
roleset_id
})
is_found
=
True
except
:
roleset_data
=
None
is_found
=
False
return
is_found
,
roleset_data
def
get_role_in_roleset
(
roleset_data
,
role_number
):
try
:
role_data
=
roleset_data
.
find
(
'
role
'
,
{
'
n
'
:
role_number
})
is_found
=
True
except
:
roleset_data
=
None
is_found
=
False
return
is_found
,
role_data
...
...
@@ -50,21 +132,57 @@ def find_frame_filepath(lemma):
# Main function
#==============================================================================
def
main
(
lemma
):
# -- Prepare the sentences to be converted
print
(
"
\n
"
+
"
[CMT] Finding frame
"
)
print
(
"
-- lemma:
"
+
lemma
)
filepath
=
find_frame_filepath
(
lemma
)
print
(
filepath
)
def
main
(
amr_predicate
,
amr_role
):
print
(
"
\n
"
+
"
[CMT] PropBank Frame Analyzer
"
)
# -- Analyze and adapt the target description
print
(
"
-- Analyzing given data to specify the targetted data
"
)
print
(
"
----- given data:
"
+
amr_predicate
+
'
,
'
+
amr_role
)
lemma
=
get_lemma_from_amr_predicate
(
amr_predicate
)
print
(
"
----- lemma:
"
+
lemma
)
roleset_id
=
get_roleset_id_from_amr_predicate
(
amr_predicate
)
print
(
"
----- roleset id:
"
+
roleset_id
)
role_number
=
get_number_from_amr_role
(
amr_role
)
print
(
"
----- role number:
"
+
str
(
role_number
))
# -- Find the Frame XML data corresponding to a given lemma
print
(
"
-- Finding frame data
"
)
frame_found
,
frame_filepath
,
frame_data
=
find_frame_of_lemma
(
lemma
)
if
frame_found
:
print
(
"
----- frame xml file found:
"
+
frame_filepath
)
else
:
print
(
"
----- frame xml file not found
"
)
# -- Analyze frame data to get informations
print
(
"
-- Analyzing frame data
"
)
rs_found
,
rs_data
=
get_roleset_in_frame
(
frame_data
,
lemma
,
roleset_id
)
nb_roles
=
-
1
if
rs_found
:
print
(
"
----- roleset id:
"
+
rs_data
.
get
(
'
id
'
))
print
(
"
----- roleset name:
"
+
rs_data
.
get
(
'
name
'
))
nb_roles
=
len
(
rs_data
.
find_all
(
'
role
'
))
print
(
"
----- number of roles:
"
+
str
(
nb_roles
))
for
n
in
range
(
nb_roles
):
_
,
role_data
=
get_role_in_roleset
(
rs_data
,
n
)
print
(
"
----- role
"
+
str
(
n
)
+
'
:
'
+
role_data
.
get
(
'
f
'
)
+
'
,
'
+
role_data
.
get
(
'
descr
'
))
else
:
print
(
"
----- roleset not found
"
)
# -- Analyze frame data to get informations
if
rs_found
&
role_number
in
range
(
nb_roles
):
print
(
"
-- Finding role
"
)
_
,
role_data
=
get_role_in_roleset
(
rs_data
,
role_number
)
print
(
"
----- role found:
"
+
role_data
.
get
(
'
f
'
)
+
'
,
'
+
role_data
.
get
(
'
descr
'
))
# -- Ending print
print
(
"
\n
"
+
"
[SSC] Done
"
)
if
__name__
==
"
__main__
"
:
main
(
sys
.
argv
[
1
])
main
(
sys
.
argv
[
1
]
,
sys
.
argv
[
2
]
)
...
...
This diff is collapsed.
Click to expand it.
Aurélien Lamercerie
@alam
mentioned in issue
#2
·
2 years ago
mentioned in issue
#2
mentioned in issue #2
Toggle commit list
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment