Skip to content
Snippets Groups Projects
Commit b12b82b6 authored by Aurélien Lamercerie's avatar Aurélien Lamercerie
Browse files

Update AMR analyzer module

parent 21290792
No related branches found
No related tags found
No related merge requests found
# ::id SSC-01-01
# ::snt The Solar System is the gravitationally bound system of the Sun and the objects that orbit it, either directly or indirectly.
(s / system
:domain (p / planet
:name (n / name
......
......@@ -13,7 +13,7 @@
import sys
import glob
import re
import regex as re
import propbank_analyzer
from bs4 import BeautifulSoup
......@@ -27,80 +27,262 @@ from bs4 import BeautifulSoup
INPUT_DIR = "../inputData/"
OUTPUT_DIR = "../outputData/"
# Data
PROPBANK_FRAMES_DIR = "../propbankFrames/"
PBF_DIGITS = 2
AMR_PREDICATE_FORM = ['[a-z]+-0\d']
ROLE_SEARCH_RE = '(?<=[a-z]+-0\d):ARG\d(?=[[a-z]+-0\d)|$]'
AMR_CORE_ROLE_FORM = [':ARG\d']
PARENTHICAL_EXPRESSION = '\((?>\((?<c>)|[^()]+|\)(?<-c>))*(?(c)(?!))\)'
AMR_PREDICATE_SCOPE_FORM = ['(^())*']
# Regular expressions for AMR graph analysis
AMR_PRED_RE = '[a-z]+-0\d'
AMR_ARGOF_RE = ':ARG\d-of'
AMR_ARG_RE = re.compile(r'''
\([^.]*\) (*SKIP)(*FAIL) # match anything in parentheses and "throw it away"
| # or
:ARG\d # match :ARGi
''', re.VERBOSE)
#==============================================================================
# Functions to find AMR predicate/argument relations
#==============================================================================
def find_pred_arg_relations(graph, relation_list):
""" Find all direct predicat/argument relations in a graph
(argument as :ARGi), and add found relations in the input list.
"""
for pred_match in re.finditer(AMR_PRED_RE, graph):
for arg_match in AMR_ARG_RE.finditer(graph[pred_match.end():]):
pred = pred_match.group()
arg = arg_match.group()
relation_list.append((pred, arg))
return relation_list
def find_argof_pred_relations(graph, relation_list):
""" Find all undirect predicat/argument relations in a graph
(argument as :ARGi-of), and add found relations in the input list.
"""
for arg_match in re.finditer(AMR_ARGOF_RE, graph):
pred_match = re.findall(AMR_PRED_RE, graph[arg_match.end():])
pred = pred_match[0]
arg = arg_match.group()
relation_list.append((pred, arg))
return relation_list
def find_all_pred_arg_relations(graph, relation_list):
""" Find all predicat/argument relations in a graph
(argument as :ARGi or :ARGi-of), and add found relations
in the input list.
"""
relation_list = find_pred_arg_relations(graph, relation_list)
relation_list = find_argof_pred_relations(graph, relation_list)
return relation_list
#==============================================================================
# Functions to find AMR predicates and AMR core roles
# Functions to update relation list with probbank roles (from propbank frames)
#==============================================================================
def get_amr_predicate_list(amr_graph):
amr_predicate_list = []
for target_re in AMR_PREDICATE_FORM:
found_predicates = re.findall(target_re, amr_graph)
amr_predicate_list.extend(found_predicates)
return amr_predicate_list
def update_relation_list_with_propbank_role(old_relation_list):
new_relation_list = []
for (pred, orig_arg) in old_relation_list:
def get_parenthical_expression(amr_graph):
result_list = []
result_list.extend(re.findall(PARENTHICAL_EXPRESSION, amr_graph))
return result_list
orig_role = orig_arg[0:5]
new_role = propbank_analyzer.find_pb_role(pred, orig_role)
def get_core_role_list_of_predicate(amr_graph, predicate):
amr_core_role_list = []
for target_re in AMR_PREDICATE_FORM:
found_roles = re.findall(target_re, amr_graph)
amr_core_role_list.extend(found_roles)
return amr_core_role_list
if new_role is not None:
new_arg = orig_arg[0:5] + '-' + new_role
if len(orig_arg) >= 8:
new_arg += orig_arg[5:8]
new_relation_list.append((pred, orig_arg, new_arg))
else:
print("*** relation (" + pred + ", " + orig_role + ") " +
"no found in PropBank frames ***")
return new_relation_list
#==============================================================================
# Main function
# Functions to substitute arguments in AMR graph
#==============================================================================
def main(amr_graph_file):
def sub_betwenn_pos(text, start, end, new_str):
result = text[:start]
result += new_str
result += text[end:]
return result
def substitute_pred_arg_relations(graph, relation_list):
""" Substitute direct predicat/argument relations in a given AMR graph.
"""
for (pred, old_arg, new_arg) in relation_list:
for pred_match in re.finditer(AMR_PRED_RE, graph):
for arg_match in AMR_ARG_RE.finditer(graph[pred_match.end():]):
start = pred_match.end() + arg_match.start()
end = pred_match.end() + arg_match.end()
if ((pred == pred_match.group()) &
(arg_match.group() == old_arg)):
graph = sub_betwenn_pos(graph, start, end, new_arg)
return graph
def substitute_argof_pred_relations(graph, relation_list):
""" Substitute undirect predicat/argument relations in a given AMR graph.
"""
for (pred, old_arg, new_arg) in relation_list:
for arg_match in re.finditer(AMR_ARGOF_RE, graph):
pred_match = re.findall(AMR_PRED_RE, graph[arg_match.end():])
start = arg_match.start()
end = arg_match.end()
if ((pred == pred_match[0]) &
(arg_match.group() == old_arg)):
graph = sub_betwenn_pos(graph, start, end, new_arg)
return graph
def substitute_all_pred_arg_relations(graph, relation_list):
""" Substitute all predicat/argument relations in a given AMR graph.
"""
graph = substitute_pred_arg_relations(graph, relation_list)
graph = substitute_argof_pred_relations(graph, relation_list)
return graph
#==============================================================================
# Main Function(s)
#==============================================================================
def enrich_amr_graph_with_propbank_role(graph):
"""
Enrich an AMR graph with PropBank roles.
Parameters
----------
graph : STRING
AMR graph in PENMAN form.
Returns
-------
graph : STRING
AMR graph enriched with PropBank roles.
"""
relation_list = []
relation_list = find_all_pred_arg_relations(graph, relation_list)
relation_list = update_relation_list_with_propbank_role(relation_list)
graph = substitute_all_pred_arg_relations(graph, relation_list)
return graph
#==============================================================================
# *** Dev Test ***
#==============================================================================
def dev_analyze(amr_graph_file):
print("\n" + "[CMT-Dev] AMR Graph Analyzer")
print("\n-- Start data")
amr_graph_file = INPUT_DIR + amr_graph_file
print("----- Reading file " + amr_graph_file)
with open(amr_graph_file, 'r') as f:
amr_graph_1 = f.read()
amr_graph_2 = ''.join(amr_graph_1)
print("----- AMR Graph 1: \n" + amr_graph_1)
print("----- AMR Graph 2: \n" + amr_graph_2)
rel_list_1 = []
nb_relation_1 = len(rel_list_1)
print("----- Relation list 1 (init): " + str(rel_list_1))
print("----- Number of relations in list 1: " + str(nb_relation_1))
rel_list_2 = []
nb_relation_2 = len(rel_list_2)
print("----- Relation list 2 (init): " + str(rel_list_2))
print("----- Number of relations in list 2: " + str(nb_relation_2))
print("\n" + "[CMT] AMR Graph Analyzer")
print("\n-- Finding AMR predicate/argument relations (step-by-step)")
rel_list_1 = find_pred_arg_relations(amr_graph_1, rel_list_1)
if len(rel_list_1) > nb_relation_1:
nb_relation_1 = len(rel_list_1)
print("----- some relations found ")
print("----- Relation list (update): " + str(rel_list_1))
print("----- Number of relations in list 1: " + str(nb_relation_1))
else:
print("----- no relation found ")
rel_list_1 = find_argof_pred_relations(amr_graph_1, rel_list_1)
if len(rel_list_1) > nb_relation_1:
nb_relation_1 = len(rel_list_1)
print("----- some relations found ")
print("----- Relation list (update): " + str(rel_list_1))
print("----- Number of relations in list 1: " + str(nb_relation_1))
else:
print("----- no relation found ")
print(re.findall(ROLE_SEARCH_RE, 'test (d \ bind-01 :ARG1)'))
print("\n-- Finding AMR predicate/argument relations (all-in)")
rel_list_2 = find_all_pred_arg_relations(amr_graph_1, rel_list_2)
if len(rel_list_2) > nb_relation_2:
nb_relation_2 = len(rel_list_2)
print("----- some relations found ")
print("----- Relation list (update): " + str(rel_list_2))
print("----- Number of relations: " + str(nb_relation_2))
else:
print("----- no relation found ")
# amr_graph_file = INPUT_DIR + amr_graph_file
# print("-- Reading file " + amr_graph_file)
# with open(amr_graph_file, 'r') as f:
# amr_graph = f.read()
# print("----- AMR Graph: \n" + amr_graph)
print("\n-- Update relation list with probbank roles (from propbank frames)")
rel_list_3 = update_relation_list_with_propbank_role(rel_list_2)
nb_relation_3 = len(rel_list_3)
if nb_relation_3 >= nb_relation_2:
print("----- All relation update (good!)")
else:
print("----- Update imperfect")
print("----- Relation list (update): " + str(rel_list_3))
print("----- Number of relations: " + str(nb_relation_3))
# print("-- Analyzing graph ")
# amr_predicate_list = get_amr_predicate_list(amr_graph)
# print("--- predicates found: ")
# if len(amr_predicate_list) > 0:
# for p in amr_predicate_list:
# print("----- " + p)
# else:
# print("None")
print("\n-- Enrich AMR graph with PropBank roles (step-by-step)")
amr_graph_1 = substitute_pred_arg_relations(amr_graph_1, rel_list_3)
print("----- AMR Graph 1 (update after step 1): \n" + amr_graph_1)
amr_graph_1 = substitute_argof_pred_relations(amr_graph_1, rel_list_3)
print("----- AMR Graph 1 (update after step 2): \n" + amr_graph_1)
# parenthical_expression_list = get_parenthical_expression(amr_graph)
# print("-- Parenthical expression found: ")
# if len(parenthical_expression_list) > 0:
# for e in parenthical_expression_list:
# print("----- " + e)
# else:
# print("None")
print("\n-- Enrich AMR graph with PropBank roles (main function)")
amr_graph_2 = enrich_amr_graph_with_propbank_role(amr_graph_2)
print("----- AMR Graph 2 (update): \n" + amr_graph_2)
# -- Ending print
print("\n" + "[SSC] Done")
if __name__ == "__main__":
main(sys.argv[1])
def dev_test_1():
dev_analyze('test-amr-graph-1.penman')
......
......@@ -187,7 +187,7 @@ def find_pb_role(amr_predicate, amr_role):
def dev_analyze(amr_predicate, amr_role):
print("\n" + "[CMT] PropBank Frame Analyzer")
print("\n" + "[CMT-Dev] PropBank Frame Analyzer")
# -- Analyze and adapt the target description
print("-- Analyzing given data to specify the targetted data")
......
import regex as re
import propbank_analyzer as pba
print("[DEV] Regular Expression Test")
# -- Données de test
print("\n-- Données de test")
GRAPH_INIT = ''' (s / system
:domain (p / planet
:name (n / name
:op1 "Solar"
:op2 "System"))
:ARG1-of (b / bind-01
:ARG0 (g / gravitation))
:part (a / and
:op1 (s2 / sun)
:op2 (o / object
:ARG0-of (o2 / orbit-01
:ARG1 s2
:manner (o3 / or
:op1 (d / direct-02)
:op2 (d2 / direct-02
:polarity -))))))'''
print("----- graphe AMR traité : " + GRAPH_INIT)
substitutions = []
substitutions.append(('bind-01', ':ARG0', ':ARG0-AGT'))
substitutions.append(('orbit-01', ':ARG1', ':ARG1-PPT'))
substitutions.append(('bind-01', ':ARG1-of', ':ARG1-PPT-of'))
substitutions.append(('orbit-01', ':ARG0-of', ':ARG0-GOL-of'))
print("----- substitutions visées : " + str(substitutions))
rx = re.compile(r'''
\([^.]*\) (*SKIP)(*FAIL) # match anything in parentheses and "throw it away"
| # or
:ARG\d # match :ARGi
''', re.VERBOSE)
rx_2 = re.compile(r'''
\([^.]*\) (*SKIP)(*FAIL) # match anything in parentheses and "throw it away"
| # or
:ARG\d-of # match :ARGi-of
''', re.VERBOSE)
PRED_PATTERN = '[a-z]+-0\d'
ARGOF_PATTERN = ':ARG\d-of'
# -- Recherche des relations (predicat, argument)
print("\n-- Recherche des relations (predicat, argument)")
graph_1 = GRAPH_INIT
pred_arg_relation_list = []
# ----- argument pour chaque prédicat
def find_pred_arg_relations(graph, pred_arg_relation_list):
for pred_match in re.finditer(PRED_PATTERN, graph):
print("----- Match pour prédicat: " + str(pred_match))
for arg_match in rx.finditer(graph[pred_match.end():]):
print("-------- Match pour argument de type ARGi: " + str(arg_match))
arg_pos_start = pred_match.end() + arg_match.start()
arg_pos_end = pred_match.end() + arg_match.end()
pred_arg_relation_list.append((pred_match.group(), arg_match.group(),
arg_pos_start, arg_pos_end))
return pred_arg_relation_list
pred_arg_relation_list = find_pred_arg_relations(graph_1, pred_arg_relation_list)
# ----- prédicat pour chaque ARGi-of
def find_argof_pred_relations(graph, pred_arg_relation_list):
for arg_match in re.finditer(ARGOF_PATTERN, graph):
print("----- Match pour argument de type ARGi-of: " + str(arg_match))
pred_match = re.findall(PRED_PATTERN, graph[arg_match.end():])
print("-------- Prédicat correspondant: " + pred_match[0])
arg_pos_start = arg_match.start()
arg_pos_end = arg_match.end()
pred_arg_relation_list.append((pred_match[0], arg_match.group(),
arg_pos_start, arg_pos_end))
return pred_arg_relation_list
find_argof_pred_relations(graph_1, pred_arg_relation_list)
print("----- Resultat (matchs trouvés) :")
for r in pred_arg_relation_list:
print(r)
# -- Substitution des arguments dans le graphe
print("\n-- Substitution des arguments dans le graphe")
graph_2 = GRAPH_INIT
def sub_betwenn_pos(text, start, end, new_str):
result = text[:start]
result += new_str
result += text[end:]
return result
# ----- argument pour chaque prédicat
def sub_pred_arg_relations(graph, sub_list):
for (pred, old_arg, new_arg) in sub_list:
for pred_match in re.finditer(PRED_PATTERN, graph):
for arg_match in rx.finditer(graph[pred_match.end():]):
arg_pos_start = pred_match.end() + arg_match.start()
arg_pos_end = pred_match.end() + arg_match.end()
if (pred == pred_match.group()) & (arg_match.group() == old_arg):
print("----- substition de " + new_arg +
" sur le segment [" + str(arg_pos_start) +
", " + str(arg_pos_end) + "]")
graph = sub_betwenn_pos(graph,
arg_pos_start,
arg_pos_end,
new_arg)
return graph
graph_2 = sub_pred_arg_relations(graph_2, substitutions)
# ----- prédicat pour chaque ARGi-of
def sub_argof_pred_relations(graph, sub_list):
for (pred, old_arg, new_arg) in sub_list:
for arg_match in re.finditer(ARGOF_PATTERN, graph):
pred_match = re.findall(PRED_PATTERN, graph[arg_match.end():])
arg_pos_start = arg_match.start()
arg_pos_end = arg_match.end()
if (pred == pred_match[0]) & (arg_match.group() == old_arg):
print("----- substition de " + new_arg +
" sur le segment [" + str(arg_pos_start) +
", " + str(arg_pos_end) + "]")
graph = sub_betwenn_pos(graph,
arg_pos_start,
arg_pos_end,
new_arg)
return graph
graph_2 = sub_argof_pred_relations(graph_2, substitutions)
print("----- Résultat (graphe après substitutions) :" + graph_2)
# -- Substitution des arguments dans le graphe
print("\n-- Test avec l'analyseur des cadres ProbBank (pba)")
graph_3 = GRAPH_INIT
init_relations = pred_arg_relation_list
substitutions_from_pb = []
for (pred, orig_arg, _, _) in init_relations:
orig_role = orig_arg[0:5]
print("----- find pb role for: " + pred + " and " + orig_role)
new_role = pba.find_pb_role(pred, orig_role)
if new_role is not None:
print("----- pb role found: " + new_role)
new_arg = orig_arg[0:5] + '-' + new_role
if len(orig_arg) >= 8:
new_arg += orig_arg[5:8]
print("----- substitution add: " + pred +
", " + orig_arg + ", " + new_arg)
substitutions_from_pb.append((pred, orig_arg, new_arg))
else:
print("----- pb role not found")
print("----- origin relations: " + str(init_relations))
print("----- substitutions list: " + str(substitutions_from_pb))
graph_3 = sub_pred_arg_relations(graph_3, substitutions_from_pb)
result_graph = sub_argof_pred_relations(graph_3, substitutions_from_pb)
print("----- Result: " + result_graph)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment