Skip to content
Snippets Groups Projects
Commit c974963d authored by Aurélien Lamercerie's avatar Aurélien Lamercerie
Browse files

Dev: regular expression test and amr analyzer init

parent ab3309bb
No related branches found
No related tags found
No related merge requests found
*-env/*
lib/amrld/wk/*
*__pycache__*
# ::id SSC-01-01
# ::snt The Solar System is the gravitationally bound system of the Sun and the objects that orbit it, either directly or indirectly.
(s / system
:domain (p / planet
:name (n / name
:op1 "Solar"
:op2 "System"))
:ARG1-of (b / bind-01
:ARG0 (g / gravitation))
:part (a / and
:op1 (s2 / sun)
:op2 (o / object
:ARG0-of (o2 / orbit-01
:ARG1 s2
:manner (o3 / or
:op1 (d / direct-02)
:op2 (d2 / direct-02
:polarity -))))))
\ No newline at end of file
#!/usr/bin/python3.10
# -*-coding:Utf-8 -*
#==============================================================================
# C.M. Tool: AMR Graph (penman) Analzer
#------------------------------------------------------------------------------
# Module to analyze AMR Graph in penman format
#==============================================================================
#==============================================================================
# Importing required modules
#==============================================================================
import sys
import glob
import re
import propbank_analyzer
from bs4 import BeautifulSoup
#==============================================================================
# Parameters
#==============================================================================
# Input/Output Directories
INPUT_DIR = "../inputData/"
OUTPUT_DIR = "../outputData/"
# Data
PROPBANK_FRAMES_DIR = "../propbankFrames/"
PBF_DIGITS = 2
AMR_PREDICATE_FORM = ['[a-z]+-0\d']
ROLE_SEARCH_RE = '(?<=[a-z]+-0\d):ARG\d(?=[[a-z]+-0\d)|$]'
AMR_CORE_ROLE_FORM = [':ARG\d']
PARENTHICAL_EXPRESSION = '\((?>\((?<c>)|[^()]+|\)(?<-c>))*(?(c)(?!))\)'
AMR_PREDICATE_SCOPE_FORM = ['(^())*']
#==============================================================================
# Functions to find AMR predicates and AMR core roles
#==============================================================================
def get_amr_predicate_list(amr_graph):
amr_predicate_list = []
for target_re in AMR_PREDICATE_FORM:
found_predicates = re.findall(target_re, amr_graph)
amr_predicate_list.extend(found_predicates)
return amr_predicate_list
def get_parenthical_expression(amr_graph):
result_list = []
result_list.extend(re.findall(PARENTHICAL_EXPRESSION, amr_graph))
return result_list
def get_core_role_list_of_predicate(amr_graph, predicate):
amr_core_role_list = []
for target_re in AMR_PREDICATE_FORM:
found_roles = re.findall(target_re, amr_graph)
amr_core_role_list.extend(found_roles)
return amr_core_role_list
#==============================================================================
# Main function
#==============================================================================
def main(amr_graph_file):
print("\n" + "[CMT] AMR Graph Analyzer")
print(re.findall(ROLE_SEARCH_RE, 'test (d \ bind-01 :ARG1)'))
# amr_graph_file = INPUT_DIR + amr_graph_file
# print("-- Reading file " + amr_graph_file)
# with open(amr_graph_file, 'r') as f:
# amr_graph = f.read()
# print("----- AMR Graph: \n" + amr_graph)
# print("-- Analyzing graph ")
# amr_predicate_list = get_amr_predicate_list(amr_graph)
# print("--- predicates found: ")
# if len(amr_predicate_list) > 0:
# for p in amr_predicate_list:
# print("----- " + p)
# else:
# print("None")
# parenthical_expression_list = get_parenthical_expression(amr_graph)
# print("-- Parenthical expression found: ")
# if len(parenthical_expression_list) > 0:
# for e in parenthical_expression_list:
# print("----- " + e)
# else:
# print("None")
# -- Ending print
print("\n" + "[SSC] Done")
if __name__ == "__main__":
main(sys.argv[1])
......@@ -2,7 +2,7 @@
# -*-coding:Utf-8 -*
#==============================================================================
# C.M. Tool: prop
# C.M. Tool: PropBank Frame Analyzer
#------------------------------------------------------------------------------
# Module to analyze PropBank frames
#==============================================================================
......
import regex as re
text = ''' # ::id SSC-01-01
# ::snt The Solar System is the gravitationally bound system of the Sun and the objects that orbit it, either directly or indirectly.
(s / system
:domain (p / planet
:name (n / name
:op1 "Solar"
:op2 "System"))
:ARG1-of (b / bind-01
:ARG0 (g / gravitation))
:part (a / and
:op1 (s2 / sun)
:op2 (o / object
:ARG0-of (o2 / orbit-01
:ARG1 s2
:manner (o3 / or
:op1 (d / direct-02)
:op2 (d2 / direct-02
:polarity -))))))'''
rx = re.compile(r'''
\([^.]*\) (*SKIP)(*FAIL) # match anything in parentheses and "throw it away"
| # or
:ARG\d # match :ARGi
''', re.VERBOSE)
rx_2 = re.compile(r'''
\([^.]*\) (*SKIP)(*FAIL) # match anything in parentheses and "throw it away"
| # or
:ARG\d-of # match :ARGi-of
''', re.VERBOSE)
pred_pattern = '[a-z]+-0\d'
arg_of_pattern = ':ARG\d-of'
result = []
# -- argument pour chaque prédicat
for pred_match in re.finditer(pred_pattern, text):
print(pred_match)
arg_match_list = rx.findall(text[pred_match.end():])
print(arg_match_list)
for arg_match in arg_match_list:
result.append((pred_match.group(), arg_match))
# -- prédicat pour chaque ARGi-of
for arg_match in re.finditer(arg_of_pattern, text):
print(arg_match)
pred_match = re.findall(pred_pattern, text[arg_match.end():])
print(pred_match[0])
result.append((pred_match[0], arg_match.group()))
print("Result:")
for r in result:
print(r)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment