diff --git a/amrbatch/main.py b/amrbatch/main.py index baf568e138b1bef27b0c29578742d2f53372f487..742a2e4cc979835a06280d0831108939bea861e2 100644 --- a/amrbatch/main.py +++ b/amrbatch/main.py @@ -21,6 +21,8 @@ from amrlib.graph_processing.amr_plot import AMRPlot from filepath_manager import FilepathManager from work_data import WorkData +#global AMR_MODEL_PATH + # -- Config File Path LIB_PATH = os.path.dirname(os.path.abspath(__file__)) + '/' LOGGING_CONF_FILE_PATH = f'{LIB_PATH}logging.conf' @@ -61,7 +63,7 @@ def __prepare_workdata(filepath_manager, line_set): sentence_number += 1 new_data = WorkData(sentence, sentence_number, filepath_manager) workdata_list.append(new_data) - logger.debug(f' *** sentence {sentence_number} *** \n{new_data}') + logger.debug(f' *** sentence {sentence_number} *** \n{new_data.sentence}') logger.info(f'----- number of sentences: {len(workdata_list)}') return workdata_list @@ -88,14 +90,59 @@ def __generate_sentence_file(filepath_manager, workdata_list): + + + + +#============================================================================== +# Sentence Conversion to AMR +#============================================================================== + +def __run_conversion(arg_dict): + data = arg_dict['data'] + amr_model_path = arg_dict['amr_model_path'] + + logger.info("-- Loading AMR model") + stog = amrlib.load_stog_model(model_dir=amr_model_path) + + logger.info("-- Converting sentences to AMR graphs") + stog_result = stog.parse_sents([data.sentence]) + logger.info(f'----- Sentence successfully processed') + logger.info(stog_result) + data.graph = stog_result[0] + + return data + + +def __convert_sentences_to_graphs(amr_model_path, input_data_list): + """ Converting text sentences to AMR graphs """ + + mapIterable = [] + for data in input_data_list: + arg_dict = { 'data': data, 'amr_model_path': amr_model_path} + mapIterable = mapIterable + [arg_dict] + + number_of_processes = min(round((multiprocessing.cpu_count()-1)/4), len(input_data_list)) + with multiprocessing.Pool(number_of_processes) as p: + result_data_list = p.map(__run_conversion, mapIterable) + +# result_data_list = [] +# for arg_dict in mapIterable: +# result_data_list += __convert_sentence_to_graph_multiprocess_run(arg_dict) + + logger.info(f'----- Total processed graph number: {len(result_data_list)}') + return result_data_list + + + #============================================================================== -# Conversion Steps +# AMR Graph File Generation #============================================================================== def __generate_penman_amr_graph(filepath_manager, data): """ AMR graph generation in penman format """ output_filepath = data.get_penman_amr_graph_output_filepath() - logger.debug(f"----- AMR Graph file (penman): {os.path.basename(output_filepath)}") + logger.info(f"----- AMR Graph file (penman): {os.path.basename(output_filepath)}") with open(output_filepath, "w") as writing_file: # w = write writing_file.write(data.id_line_str) writing_file.write(data.graph) @@ -107,14 +154,14 @@ def __generate_dot_amr_graph(filepath_manager, data): try: # -- generating dot/png/svg files using AMRLib and GraphViz dot_filename = data.get_dot_amr_graph_output_filepath() - logger.debug(f'----- AMR Graph file (dot): {os.path.basename(dot_filename)}') + logger.info(f'----- AMR Graph file (dot): {os.path.basename(dot_filename)}') format = 'png' plot = AMRPlot(dot_filename, format) plot.build_from_graph(data.graph) plot.graph.render() render_fn = dot_filename + '.' + format # -- renaming PNG file good_png_fn = data.get_png_amr_graph_output_filepath() - logger.debug(f'----- AMR Graph file (png): {{os.path.basename(good_png_fn)}}') + logger.info(f'----- AMR Graph file (png): {os.path.basename(good_png_fn)}') os.rename(render_fn, good_png_fn) returnValue = dot_filename format = 'svg' @@ -123,49 +170,20 @@ def __generate_dot_amr_graph(filepath_manager, data): plot.graph.render() render_fn = dot_filename + '.' + format # -- renaming PNG file good_svg_fn = good_png_fn.replace('.png','.svg') - logger.debug(f'----- AMR Graph file (svg): {{os.path.basename(good_svg_fn)}}') + logger.info(f'----- AMR Graph file (svg): {os.path.basename(good_svg_fn)}') os.rename(render_fn, good_svg_fn) except Exception as ex: logger.warning('Exception when trying to plot: '+ex) traceback.print_exc() returnValue = 'Exception when trying to plot' return(returnValue) - -# Function executed when a worker is created in the pool -def init_pool_worker(): - amr_model_path = "/home/daxid/hdd_data/jupyterlab_root/lib/amrModel/model_parse_xfm_bart_large-v0_1_0" - # declare scope of a new global variable - global stog - # store argument in the global variable for this process - logger.info("-- Loading AMR model") - stog = amrlib.load_stog_model(model_dir=amr_model) - -def __convert_sentence_to_graph_multiprocess_run(data): - print("in worker\n") - wd_number = 1 - stog_result = stog.parse_sents([data.sentence]) - logger.info(f'----- Sentence {wd_number} successfully processed') - logger.debug(stog_result) - data.graph = stog_result[0] - return(stog_result) - -def __convert_sentences_to_graphs(amr_model_path, workdata_list): - """ Converting text sentences to AMR graphs """ - # ----- (Multi-processing) Extraction Run - number_of_processes = min(multiprocessing.cpu_count()-1, len(workdata_list)) - global stog - with multiprocessing.Pool(2, initializer=init_pool_worker) as p: - logger.info("-- Converting sentences to AMR graphs") - print("pool created\n") - stog_result_list = p.map(__convert_sentence_to_graph_multiprocess_run, workdata_list) - logger.info(f'----- Total processed graph number: {len(stog_result_list)}') - return workdata_list + def __generate_amr_graph_files(filepath_manager, workdata_list): logger.info("-- Generating AMR graph files") # ----- Prepare multiprocessing data - starmapIterable = [(data,filepath_manager) for data in workdata_list] + starmapIterable = [(filepath_manager,data) for data in workdata_list] # ----- (Multi-processing) Extraction Run with multiprocessing.Pool(multiprocessing.cpu_count()-1) as p: penmanFilePathList = p.starmap(__generate_penman_amr_graph, starmapIterable) @@ -174,7 +192,7 @@ def __generate_amr_graph_files(filepath_manager, workdata_list): #============================================================================== -# Serialization Steps +# AMR Serialization #============================================================================== def __serialize_amr_graph_to_amr_rdf(filepath_manager, data): @@ -184,9 +202,9 @@ def __serialize_amr_graph_to_amr_rdf(filepath_manager, data): amr_penman_filepath = data.get_penman_amr_graph_output_filepath() amr_rdf_triple_filepath = data.get_amr_rdf_triple_output_filepath() amr_rdf_turtle_filepath = data.get_amr_rdf_turtle_output_filepath() - logger.debug(f'----- AMR filepath (penman): {amr_penman_filepath}') - logger.debug(f'----- AMR-RDF filepath (triple): {amr_rdf_triple_filepath}') - logger.debug(f'----- AMR-RDF filepath (turtle): {amr_rdf_turtle_filepath}') + logger.info(f'----- AMR filepath (penman): {os.path.basename(amr_penman_filepath)}') + logger.info(f'----- AMR-RDF filepath (triple): {os.path.basename(amr_rdf_triple_filepath)}') + logger.info(f'----- AMR-RDF filepath (turtle): {os.path.basename(amr_rdf_turtle_filepath)}') # -- AMR-LD processing amrld_process = ["python3", "amr_to_rdf.py", "-i", amr_penman_filepath, "-o", amr_rdf_triple_filepath] @@ -195,14 +213,21 @@ def __serialize_amr_graph_to_amr_rdf(filepath_manager, data): os.chdir(AMRLD_DIR) subprocess.run(amrld_process) os.chdir(current_dirpath) - logger.info(f'----- AMR-RDF triple successfully processed ({os.path.basename(amr_rdf_triple_filepath)})') + if (os.path.isfile(amr_rdf_triple_filepath)): + logger.info(f'----- AMR-RDF triple successfully processed ({os.path.basename(amr_rdf_triple_filepath)})') + else: + logger.info(f'----- *** Process of AMR-RDF triple generation failed ***') # -- Turtle Conversion if (os.path.isfile(amr_rdf_triple_filepath)): g = Graph() g.parse(amr_rdf_triple_filepath) g.serialize(destination=amr_rdf_turtle_filepath, format='turtle') - logger.info(f'----- AMR-RDF triple successfully processed ({os.path.basename(amr_rdf_turtle_filepath)})') + if (os.path.isfile(amr_rdf_turtle_filepath)): + logger.info(f'----- AMR-RDF turtle successfully processed ({os.path.basename(amr_rdf_turtle_filepath)})') + else: + logger.info(f'----- *** Process of AMR-RDF turtle generation failed ***') + @@ -233,7 +258,10 @@ def __analyze_line_set_to_produce_amr_graphs(line_set, data_reference, amr_model AMR Graph String (in PENMAN format). """ - + + #global AMR_MODEL_PATH + #AMR_MODEL_PATH = amr_model_path + logger.info('\n === Preparation === ') # -- Initialize a filepath manager