Skip to content
Snippets Groups Projects
Commit ab3309bb authored by Aurélien Lamercerie's avatar Aurélien Lamercerie
Browse files

Update propbank_analyzer to adjust searching operations

parent 60086fc5
No related branches found
No related tags found
No related merge requests found
......@@ -13,6 +13,7 @@
import sys
import glob
import re
from bs4 import BeautifulSoup
......@@ -28,7 +29,7 @@ OUTPUT_DIR = "../outputData/"
# Data
PROPBANK_FRAMES_DIR = "../propbankFrames/"
PBF_DIGITS = 2
AMR_CORE_ROLE_FORM = [':ARG\d$', 'ARG\d$', '\d$']
#==============================================================================
......@@ -64,7 +65,11 @@ def get_roleset_id_from_amr_predicate(amr_predicate):
def get_number_from_amr_role(amr_role):
return 1
role_number = -1
for role_format in AMR_CORE_ROLE_FORM:
if re.match(role_format, amr_role):
role_number = int(amr_role[-1])
return role_number
#==============================================================================
......@@ -79,16 +84,16 @@ def find_frame_of_lemma(lemma):
frame_filepath = glob.glob(target_file, recursive=True)
if len(frame_filepath) >= 1:
is_found = True
frame_filepath = frame_filepath[0]
with open(frame_filepath, 'r') as f:
xml_data = f.read()
frame_data = BeautifulSoup(xml_data, 'xml')
else:
is_found = False
frame_filepath = ''
frame_data = None
is_found = frame_data is not None
return is_found, frame_filepath, frame_data
......@@ -96,35 +101,36 @@ def find_frame_of_lemma(lemma):
# Functions to analyze a frame data
#==============================================================================
def get_roleset_in_frame(frame_data, lemma, roleset_id):
""" Get a roleset in a given frame data
def find_roleset_in_frame(frame_data, lemma, roleset_id):
""" Find the roleset corresponding to a lemma and an id in a frame data
"""
try:
lemma_data = frame_data.find('predicate', {'lemma':lemma})
roleset_data = lemma_data.find('roleset', {'id':roleset_id})
is_found = True
except:
lemma_data = None
roleset_data = None
is_found = False
is_found = (lemma_data is not None) & (roleset_data is not None)
return is_found, roleset_data
def get_role_in_roleset(roleset_data, role_number):
def find_role_in_roleset(roleset_data, role_number):
""" Find the role corresponding to a given number in a roleset data
"""
try:
role_data = roleset_data.find('role', {'n':role_number})
is_found = True
except:
roleset_data = None
is_found = False
return is_found, role_data
role_data = None
is_found = (role_data is not None)
return is_found, role_data
......@@ -152,30 +158,37 @@ def main(amr_predicate, amr_role):
if frame_found:
print("----- frame xml file found: " + frame_filepath)
else:
print("----- frame xml file not found")
print("----- frame xml file not found for lemma " + lemma)
if frame_found:
# -- Analyze frame data to get informations
print("-- Analyzing frame data")
rs_found, rs_data = find_roleset_in_frame(frame_data, lemma, roleset_id)
nb_roles = -1
# -- Analyze frame data to get informations
print("-- Analyzing frame data")
rs_found, rs_data = get_roleset_in_frame(frame_data, lemma, roleset_id)
nb_roles = -1
if rs_found:
print("----- roleset id: " + rs_data.get('id'))
print("----- roleset name: " + rs_data.get('name'))
nb_roles = len(rs_data.find_all('role'))
print("----- number of roles: " + str(nb_roles))
for n in range(nb_roles):
_, role_data = get_role_in_roleset(rs_data, n)
print("----- role " + str(n) + ': ' + role_data.get('f') +
if rs_found:
print("----- roleset id: " + rs_data.get('id'))
print("----- roleset name: " + rs_data.get('name'))
nb_roles = len(rs_data.find_all('role'))
print("----- number of roles: " + str(nb_roles))
for n in range(nb_roles):
_, role_data = find_role_in_roleset(rs_data, n)
print("----- role " + str(n) + ': ' + role_data.get('f') +
', ' + role_data.get('descr'))
else:
print("----- roleset not found")
else:
print("----- roleset " + roleset_id + " not found")
# -- Analyze frame data to get informations
if rs_found & role_number in range(nb_roles):
print("-- Finding role")
_, role_data = get_role_in_roleset(rs_data, role_number)
print("----- role found: " + role_data.get('f') +
', ' + role_data.get('descr'))
# -- Analyze frame data to get informations
if rs_found & role_number in range(nb_roles):
print("-- Finding role")
print("----- role number: " + str(role_number))
r_found, role_data = find_role_in_roleset(rs_data, role_number)
if r_found:
print("----- role " + str(role_number) + " found: " +
role_data.get('f') +
', ' + role_data.get('descr'))
else:
print("----- role " + str(role_number) + " not found")
# -- Ending print
print("\n" + "[SSC] Done")
......
......@@ -4,3 +4,5 @@ argparse
numpy
rdflib
graphviz
bs4
lxml
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment