diff --git a/tenet/main.py b/tenet/main.py index 2e959155bdd762dd68a265f5797ac45fb4c0a1de..28fa3a5e1b3861244a2a8ebc6bae4bd2f6301a1e 100644 --- a/tenet/main.py +++ b/tenet/main.py @@ -110,7 +110,7 @@ def __serialize_factoid_graph(config, factoid_graph, out_file_path=None): #============================================================================== -# AMR Main Methods (to create an ontology) +# AMR Main Methods (to create an ontology) - with one processing #============================================================================== #@timed @@ -168,6 +168,76 @@ def create_ontology_from_amrld_file(amrld_file_path, return ontology_turtle_string + +#@timed +def create_ontology_from_amrld_dir(amrld_dir_path, + base_ontology_path=None, + onto_prefix=None, + out_file_path=None, + technical_dir_path=None): + """ + Method to create an ontology (as Turtle String) from a transduction + analysis of an AMRLD file. + + Parameters + ---------- + amrld_file_path: a path to an AMR-LD Turtle File. + base_ontology_path: a path to a Base Ontology Turtle File if defined. + onto_prefix: the target ontology prefix if defined (if not defined a prefix based on the amrld filename is used). + out_file_path: a file path where the output ontology is written if defined (the function still outputs the string). + technical_dir_path: a dir path where some technical and log files are written if defined. + + Returns + ------- + Dictionary [filename -> Ontology Turtle String]. + Complete Ontology Turtle String (synthesis of all ontology) + + """ + + logger.info('[TENET] Extraction Processing') + + # -- Process Initialization + logger.info('\n === Process Initialization === ') + __set_context() + if onto_prefix is None: onto_prefix = 'DefaultId' + base_output_dir = os.path.dirname(out_file_path) if out_file_path is not None else None + config = __set_config(OWL_CONFIG_FILE_PATH, + 'amr', amrld_dir_path, onto_prefix, + base_output_dir, technical_dir_path) + assert os.path.exists(amrld_dir_path), f'input directory does not exists ({amrld_dir_path})' + __count_number_of_graph(config) + + # -- Extraction Processing + logger.info('\n === Extraction Processing === ') + sentence_dir = config.source_sentence_file + sentence_count = 0 + result_triple_list = [] + for sentence_file in glob.glob(sentence_dir, recursive = True): + sentence_count += 1 + logger.info(f' *** sentence {sentence_count} *** ') + config.sentence_output_dir = f'-{sentence_count}' + new_triple_list = __apply_extraction(config, sentence_file) + result_triple_list.extend(new_triple_list) + + # -- Final Ontology Generation (factoid_graph) + logger.info('\n === Final Ontology Generation === ') + factoid_graph = __generate_final_ontology(result_triple_list) + ontology_turtle_string = __serialize_factoid_graph(config, factoid_graph, out_file_path) + + # -- Done + logger.info('\n === Done === ') + if config.technical_dir_path is not None: + log_file_name = 'tenet.log' + dest_file_path = f'{config.technical_dir_path}{log_file_name}' + shutil.copy(log_file_name, dest_file_path) + + return ontology_turtle_string + + +#============================================================================== +# AMR Main Methods (to create an ontology) - Multiprocessing +#============================================================================== + global result_triple_queue global sentence_file_list @@ -197,13 +267,13 @@ def pool_function(arg_dic): #@timed -def create_ontology_from_amrld_dir(amrld_dir_path, - base_ontology_path=None, - onto_prefix=None, - out_file_path=None, - technical_dir_path=None, - processes=3#multiprocessing.cpu_count()-1 - ): +def create_ontology_from_amrld_dir_with_multiprocessing(amrld_dir_path, + base_ontology_path=None, + onto_prefix=None, + out_file_path=None, + technical_dir_path=None, + processes=3#multiprocessing.cpu_count()-1 + ): """ Method to create an ontology (as Turtle String) from a transduction analysis of an AMRLD file.