diff --git a/lib/re_test.py b/lib/re_test.py index 9c3c53f61d971e211ec033d05d55c14464d7aa7c..8c7375de09ceb857399198d4a5ecd93e63bf95f3 100644 --- a/lib/re_test.py +++ b/lib/re_test.py @@ -1,8 +1,11 @@ import regex as re -text = ''' # ::id SSC-01-01 -# ::snt The Solar System is the gravitationally bound system of the Sun and the objects that orbit it, either directly or indirectly. -(s / system +print("[DEV] Regular Expression Test") + +# -- Données de test +print("\n-- Données de test") + +graph = ''' (s / system :domain (p / planet :name (n / name :op1 "Solar" @@ -19,6 +22,17 @@ text = ''' # ::id SSC-01-01 :op2 (d2 / direct-02 :polarity -))))))''' +print("----- graphe AMR traité : " + graph) + +substitutions = [] +substitutions.append(('bind-01', ':ARG0', ':ARG0-AGT')) +substitutions.append(('orbit-01', ':ARG1', ':ARG1-PPT')) +substitutions.append(('bind-01', ':ARG1-of', ':ARG1-PPT-of')) +substitutions.append(('orbit-01', ':ARG0-of', ':ARG0-GOL-of')) + +print("----- substitutions visées : " + str(substitutions)) + + rx = re.compile(r''' \([^.]*\) (*SKIP)(*FAIL) # match anything in parentheses and "throw it away" | # or @@ -32,26 +46,99 @@ rx_2 = re.compile(r''' ''', re.VERBOSE) -pred_pattern = '[a-z]+-0\d' -arg_of_pattern = ':ARG\d-of' +PRED_PATTERN = '[a-z]+-0\d' +ARGOF_PATTERN = ':ARG\d-of' -result = [] + +# -- Recherche des relations (predicat, argument) + +print("\n-- Recherche des relations (predicat, argument)") + + +pred_arg_relation_list = [] + + +# ----- argument pour chaque prédicat +def find_pred_arg_relations(graph, pred_arg_relation_list): + for pred_match in re.finditer(PRED_PATTERN, graph): + print("----- Match pour prédicat: " + str(pred_match)) + for arg_match in rx.finditer(graph[pred_match.end():]): + print("-------- Match pour argument de type ARGi: " + str(arg_match)) + arg_pos_start = pred_match.end() + arg_match.start() + arg_pos_end = pred_match.end() + arg_match.end() + pred_arg_relation_list.append((pred_match.group(), arg_match.group(), + arg_pos_start, arg_pos_end)) + return pred_arg_relation_list -# -- argument pour chaque prédicat -for pred_match in re.finditer(pred_pattern, text): - print(pred_match) - arg_match_list = rx.findall(text[pred_match.end():]) - print(arg_match_list) - for arg_match in arg_match_list: - result.append((pred_match.group(), arg_match)) +pred_arg_relation_list = find_pred_arg_relations(graph, pred_arg_relation_list) + +# ----- prédicat pour chaque ARGi-of +def find_argof_pred_relations(graph, pred_arg_relation_list): + for arg_match in re.finditer(ARGOF_PATTERN, graph): + print("----- Match pour argument de type ARGi-of: " + str(arg_match)) + pred_match = re.findall(PRED_PATTERN, graph[arg_match.end():]) + print("-------- Prédicat correspondant: " + pred_match[0]) + arg_pos_start = arg_match.start() + arg_pos_end = arg_match.end() + pred_arg_relation_list.append((pred_match[0], arg_match.group(), + arg_pos_start, arg_pos_end)) + return pred_arg_relation_list + +find_argof_pred_relations(graph, pred_arg_relation_list) + +print("----- Resultat (matchs trouvés) :") +for r in pred_arg_relation_list: + print(r) + -# -- prédicat pour chaque ARGi-of -for arg_match in re.finditer(arg_of_pattern, text): - print(arg_match) - pred_match = re.findall(pred_pattern, text[arg_match.end():]) - print(pred_match[0]) - result.append((pred_match[0], arg_match.group())) +# -- Substitution des arguments dans le graphe + +print("\n-- Substitution des arguments dans le graphe") + +def sub_betwenn_pos(text, start, end, new_str): + result = text[:start] + result += new_str + result += text[end:] + return result + + +# ----- argument pour chaque prédicat +def sub_pred_arg_relations(graph): + for (pred, old_arg, new_arg) in substitutions: + for pred_match in re.finditer(PRED_PATTERN, graph): + for arg_match in rx.finditer(graph[pred_match.end():]): + arg_pos_start = pred_match.end() + arg_match.start() + arg_pos_end = pred_match.end() + arg_match.end() + if (pred == pred_match.group()) & (arg_match.group() == old_arg): + print("----- substition de " + new_arg + + " sur le segment [" + str(arg_pos_start) + + ", " + str(arg_pos_end) + "]") + graph = sub_betwenn_pos(graph, + arg_pos_start, + arg_pos_end, + new_arg) + return graph + +graph = sub_pred_arg_relations(graph) + +# ----- prédicat pour chaque ARGi-of +def sub_argof_pred_relations(graph): + for (pred, old_arg, new_arg) in substitutions: + for arg_match in re.finditer(ARGOF_PATTERN, graph): + pred_match = re.findall(PRED_PATTERN, graph[arg_match.end():]) + arg_pos_start = arg_match.start() + arg_pos_end = arg_match.end() + if (pred == pred_match[0]) & (arg_match.group() == old_arg): + print("----- substition de " + new_arg + + " sur le segment [" + str(arg_pos_start) + + ", " + str(arg_pos_end) + "]") + graph = sub_betwenn_pos(graph, + arg_pos_start, + arg_pos_end, + new_arg) + return graph + +graph = sub_argof_pred_relations(graph) + -print("Result:") -for r in result: - print(r) \ No newline at end of file +print("----- Résultat (graphe après substitutions) :" + graph) \ No newline at end of file