diff --git a/tenet/__init__.py b/tenet/__init__.py index 7a930616f3d1c3257e94229357b64790e753bbe8..65baacec16d0f2daf3c6bf5f161db9964dea4014 100644 --- a/tenet/__init__.py +++ b/tenet/__init__.py @@ -7,7 +7,6 @@ sys.path.insert(0, os.path.abspath(LIB_PATH)) # -- Main Methods from tenet.main import create_ontology_from_amrld_file from tenet.main import create_ontology_from_amrld_dir -from tenet.main import create_ontology_from_amrld_dir_multi_cpu from tenet.main import generate_odrl_from_amrld_file from tenet.main import generate_odrl_from_amrld_dir #from main import create_ontology_from_unlrdf_file \ No newline at end of file diff --git a/tenet/main.py b/tenet/main.py index 92774bea73cb433fad3f24a35e25fe49e9b7fda9..9fcc43b4c5710578ac6999b00b37d30f4c1d7a99 100644 --- a/tenet/main.py +++ b/tenet/main.py @@ -35,7 +35,6 @@ logger = logging.getLogger('root') #============================================================================== def __set_context(): - # LIB_PATH = os.path.dirname(os.path.abspath(__file__)) + '/' print(f'Tenet Running in {LIB_PATH}') os.chdir(LIB_PATH) @@ -121,10 +120,10 @@ def run_extraction(arg_dict): #============================================================================== -# AMR Main Methods (to create an ontology) - with one processing +# AMR Main Methods (to create an ontology) #============================================================================== -#@timed +@timed def create_ontology_from_amrld_file(amrld_file_path, base_ontology_path=None, onto_prefix=None, @@ -151,7 +150,7 @@ def create_ontology_from_amrld_file(amrld_file_path, logger.info('[TENET] Extraction Processing') # -- Process Initialization - logger.info('\n === Process Initialization === ') + logger.info('\n\n === Process Initialization === ') __set_context() if onto_prefix is None: onto_prefix = 'DefaultId' base_output_dir = os.path.dirname(out_file_path) if out_file_path is not None else None @@ -170,17 +169,17 @@ def create_ontology_from_amrld_file(amrld_file_path, assert os.path.exists(amrld_file_path), f'input file does not exists ({amrld_file_path})' # -- Extraction Processing - logger.info('\n === Extraction Processing === ') + logger.info('\n\n === Extraction Processing === ') config.sentence_output_dir = f'-0' result_triple_list = __apply_extraction(config, amrld_file_path) # -- Final Ontology Generation (factoid_graph) - logger.info('\n === Final Ontology Generation === ') + logger.info('\n\n === Final Ontology Generation === ') factoid_graph = __generate_final_ontology(result_triple_list) ontology_turtle_string = __serialize_factoid_graph(config, factoid_graph, out_file_path) # -- Done - logger.info('\n === Done === ') + logger.info('\n\n === Done === ') if config.technical_dir_path is not None: log_file_name = 'tenet.log' dest_file_path = f'{config.technical_dir_path}{log_file_name}' @@ -189,12 +188,15 @@ def create_ontology_from_amrld_file(amrld_file_path, return ontology_turtle_string -#@timed +@timed def create_ontology_from_amrld_dir(amrld_dir_path, base_ontology_path=None, onto_prefix=None, out_file_path=None, - technical_dir_path=None): + technical_dir_path=None, + multiprocessing_run=False, + processes=multiprocessing.cpu_count()-1 + ): """ Method to create an ontology (as Turtle String) from a transduction analysis of an AMRLD file. @@ -238,130 +240,46 @@ def create_ontology_from_amrld_dir(amrld_dir_path, # -- Extraction Processing logger.info('\n === Extraction Processing === ') - - # ----- Sentence File List - sentence_dir = config.source_sentence_file - sentence_file_list = glob.glob(sentence_dir, recursive = True) - - # ----- Computing Extraction Argument (config_dict update) - for i in range(len(sentence_file_list)): - config_dict['sentence_list_indice'] = i - config_dict['sentence_file'] = sentence_file_list[i] - - # ----- Single Processing Extraction Run - #sentence_count = 0 result_triple_list = [] - for sentence_file in sentence_file_list:# -# sentence_count += 1 -# logger.info(f' *** sentence {sentence_count} *** ') -# config.sentence_output_dir = f'-{sentence_count}' -# new_triple_list = __apply_extraction(config, sentence_file) -# result_triple_list.extend(new_triple_list) - new_triple_list = run_extraction(config_dict) - result_triple_list.extend(new_triple_list) - - # -- Final Ontology Generation (factoid_graph) - logger.info('\n === Final Ontology Generation === ') - factoid_graph = __generate_final_ontology(result_triple_list) - ontology_turtle_string = __serialize_factoid_graph(config, factoid_graph, out_file_path) - - # -- Done - logger.info('\n === Done === ') - if config.technical_dir_path is not None: - log_file_name = 'tenet.log' - dest_file_path = f'{config.technical_dir_path}{log_file_name}' - shutil.copy(log_file_name, dest_file_path) - - return ontology_turtle_string - - -#============================================================================== -# AMR Main Methods (to create an ontology) - Multiprocessing -#============================================================================== - - - - - -@timed -def create_ontology_from_amrld_dir_multi_cpu(amrld_dir_path, - base_ontology_path=None, - onto_prefix=None, - out_file_path=None, - technical_dir_path=None, - processes=multiprocessing.cpu_count()-1 - ): - """ - Method to create an ontology (as Turtle String) from a transduction - analysis of an AMRLD file. - - Parameters - ---------- - amrld_dir_path: a path to a directory recursively containing AMR-LD Turtle Files. - base_ontology_path: a path to a Base Ontology Turtle File if defined. - onto_prefix: the target ontology prefix if defined (if not defined a prefix based on the amrld filename is used). - out_file_path: a file path where the output ontology is written if defined (the function still outputs the string). - technical_dir_path: a dir path where some technical and log files are written if defined. - processes: the number of processes in the multiprocessing pool - - Returns - ------- - Dictionary [filename -> Ontology Turtle String]. - Complete Ontology Turtle String (synthesis of all ontology) - - """ - logger.info('[TENET] Extraction Processing') - - # -- Process Initialization - logger.info('\n === Process Initialization === ') - __set_context() - if onto_prefix is None: onto_prefix = 'DefaultId' - base_output_dir = os.path.dirname(out_file_path) if out_file_path is not None else None - - config_dict = { - 'config_file_path': OWL_CONFIG_FILE_PATH, - 'source_type': 'amr', - 'source_corpus': amrld_dir_path, - 'onto_prefix': onto_prefix, - 'base_output_dir': base_output_dir, - 'technical_dir_path': technical_dir_path - } - - config = __set_config(config_dict) - - assert os.path.exists(amrld_dir_path), f'input directory does not exists ({amrld_dir_path})' - __count_number_of_graph(config) - - # -- Extraction Processing - logger.info('\n === Extraction Processing === ') # ----- Sentence File List sentence_dir = config.source_sentence_file sentence_file_list = glob.glob(sentence_dir, recursive = True) - - # The following is for multiprocessing logging (must be exec before the pool is created - multiprocessing_logging.install_mp_handler() - + # ----- Computing Extraction Argument mapIterable = [] for i in range(len(sentence_file_list)): config_dict['sentence_list_indice'] = i config_dict['sentence_file'] = sentence_file_list[i] mapIterable = mapIterable + [config_dict.copy()] - - # ----- (Multiprocessing) Extraction Run - with multiprocessing.Pool(processes) as p: - triplesLists = p.map(run_extraction, mapIterable) - - result_triple_list = [] - for tripleList in triplesLists : - result_triple_list = result_triple_list + tripleList + if multiprocessing_run: + logger.info('Multi-Processing Run ') + + # ----- Multiprocessing Logging (must be exec before the pool is created) + multiprocessing_logging.install_mp_handler() + + # ----- (Multi-processing) Extraction Run + with multiprocessing.Pool(processes) as p: + triplesLists = p.map(run_extraction, mapIterable) + + # ----- Result Triple List Update + for tripleList in triplesLists : + result_triple_list = result_triple_list + tripleList + + else: + logger.info('Single-Processing Run ') + + # ----- (Single-processing) Extraction Run + for config_dict in mapIterable:# + new_triple_list = run_extraction(config_dict) + result_triple_list.extend(new_triple_list) + # -- Final Ontology Generation (factoid_graph) logger.info('\n === Final Ontology Generation === ') factoid_graph = __generate_final_ontology(result_triple_list) ontology_turtle_string = __serialize_factoid_graph(config, factoid_graph, out_file_path) - + # -- Done logger.info('\n === Done === ') if config.technical_dir_path is not None: @@ -372,12 +290,11 @@ def create_ontology_from_amrld_dir_multi_cpu(amrld_dir_path, return ontology_turtle_string - #============================================================================== # AMR Main Methods (to generate ODRL statements) #============================================================================== -#@timed +@timed def generate_odrl_from_amrld_file( amrld_file_path, onto_prefix=None, out_file_path=None, technical_dir_path=None): @@ -439,7 +356,7 @@ def generate_odrl_from_amrld_file( return ontology_turtle_string -#@timed +@timed def generate_odrl_from_amrld_dir( amrld_dir_path, onto_prefix=None, out_file_path=None, technical_dir_path=None):