Skip to content
Snippets Groups Projects
Commit 60086fc5 authored by Aurélien Lamercerie's avatar Aurélien Lamercerie
Browse files

Update propbank_analyzer with some functions to find and analyze a specific frame

parent 431c7279
No related branches found
No related tags found
No related merge requests found
......@@ -14,6 +14,8 @@
import sys
import glob
from bs4 import BeautifulSoup
#==============================================================================
# Parameters
......@@ -25,23 +27,103 @@ OUTPUT_DIR = "../outputData/"
# Data
PROPBANK_FRAMES_DIR = "../propbankFrames/"
PBF_DIGITS = 2
#==============================================================================
# Functions to analyze and adapt the target description
#==============================================================================
def itemize_amr_predicate(amr_predicate):
ap_items = amr_predicate.split('-')
lemma = ap_items[0]
if len(ap_items) > 1:
roleset_number = int(ap_items[1])
else:
roleset_number = 1
return lemma, roleset_number
def get_lemma_from_amr_predicate(amr_predicate):
lemma, _ = itemize_amr_predicate(amr_predicate)
return lemma
def get_role_ref_from_amr_predicate(amr_predicate):
_, roleset_number = itemize_amr_predicate(amr_predicate)
roleset_ref = str(roleset_number).rjust(PBF_DIGITS,"0")
return roleset_ref
def get_roleset_id_from_amr_predicate(amr_predicate):
lemma = get_lemma_from_amr_predicate(amr_predicate)
roleset_ref = get_role_ref_from_amr_predicate(amr_predicate)
roleset_id = lemma + '.' + roleset_ref
return roleset_id
def get_number_from_amr_role(amr_role):
return 1
#==============================================================================
# Functions to find the XML description corresponding to a roleset
#==============================================================================
def find_frame_filepath(lemma):
""" Find the Frame XML filepath corresponding to a given lemma
def find_frame_of_lemma(lemma):
""" Find the Frame XML data corresponding to a given lemma
"""
target_file = PROPBANK_FRAMES_DIR + lemma + '.xml'
filepath = glob.glob(target_file, recursive=True)
frame_filepath = glob.glob(target_file, recursive=True)
if len(frame_filepath) >= 1:
is_found = True
frame_filepath = frame_filepath[0]
with open(frame_filepath, 'r') as f:
xml_data = f.read()
frame_data = BeautifulSoup(xml_data, 'xml')
else:
is_found = False
frame_filepath = ''
frame_data = None
return filepath
return is_found, frame_filepath, frame_data
#==============================================================================
# Functions to analyze a frame data
#==============================================================================
def get_roleset_in_frame(frame_data, lemma, roleset_id):
""" Get a roleset in a given frame data
"""
try:
lemma_data = frame_data.find('predicate', {'lemma':lemma})
roleset_data = lemma_data.find('roleset', {'id':roleset_id})
is_found = True
except:
roleset_data = None
is_found = False
return is_found, roleset_data
def get_role_in_roleset(roleset_data, role_number):
try:
role_data = roleset_data.find('role', {'n':role_number})
is_found = True
except:
roleset_data = None
is_found = False
return is_found, role_data
......@@ -50,21 +132,57 @@ def find_frame_filepath(lemma):
# Main function
#==============================================================================
def main(lemma):
# -- Prepare the sentences to be converted
print("\n" + "[CMT] Finding frame")
print("-- lemma: " + lemma)
filepath = find_frame_filepath(lemma)
print(filepath)
def main(amr_predicate, amr_role):
print("\n" + "[CMT] PropBank Frame Analyzer")
# -- Analyze and adapt the target description
print("-- Analyzing given data to specify the targetted data")
print("----- given data: " + amr_predicate + ', ' + amr_role)
lemma = get_lemma_from_amr_predicate(amr_predicate)
print("----- lemma: " + lemma)
roleset_id = get_roleset_id_from_amr_predicate(amr_predicate)
print("----- roleset id: " + roleset_id)
role_number = get_number_from_amr_role(amr_role)
print("----- role number: " + str(role_number))
# -- Find the Frame XML data corresponding to a given lemma
print("-- Finding frame data")
frame_found, frame_filepath, frame_data = find_frame_of_lemma(lemma)
if frame_found:
print("----- frame xml file found: " + frame_filepath)
else:
print("----- frame xml file not found")
# -- Analyze frame data to get informations
print("-- Analyzing frame data")
rs_found, rs_data = get_roleset_in_frame(frame_data, lemma, roleset_id)
nb_roles = -1
if rs_found:
print("----- roleset id: " + rs_data.get('id'))
print("----- roleset name: " + rs_data.get('name'))
nb_roles = len(rs_data.find_all('role'))
print("----- number of roles: " + str(nb_roles))
for n in range(nb_roles):
_, role_data = get_role_in_roleset(rs_data, n)
print("----- role " + str(n) + ': ' + role_data.get('f') +
', ' + role_data.get('descr'))
else:
print("----- roleset not found")
# -- Analyze frame data to get informations
if rs_found & role_number in range(nb_roles):
print("-- Finding role")
_, role_data = get_role_in_roleset(rs_data, role_number)
print("----- role found: " + role_data.get('f') +
', ' + role_data.get('descr'))
# -- Ending print
print("\n" + "[SSC] Done")
if __name__ == "__main__":
main(sys.argv[1])
main(sys.argv[1], sys.argv[2])
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment