From 250c84808554793cdd5910071c829118434a8873 Mon Sep 17 00:00:00 2001
From: daxid <david.rouquet@tetras-libre.fr>
Date: Fri, 23 Jun 2023 11:03:26 +0000
Subject: [PATCH] Merge of single and multi processing extraction functions

---
 tenet/__init__.py |   1 -
 tenet/main.py     | 157 +++++++++++-----------------------------------
 2 files changed, 37 insertions(+), 121 deletions(-)

diff --git a/tenet/__init__.py b/tenet/__init__.py
index 7a930616..65baacec 100644
--- a/tenet/__init__.py
+++ b/tenet/__init__.py
@@ -7,7 +7,6 @@ sys.path.insert(0, os.path.abspath(LIB_PATH))
 # -- Main Methods
 from tenet.main import create_ontology_from_amrld_file
 from tenet.main import create_ontology_from_amrld_dir 
-from tenet.main import create_ontology_from_amrld_dir_multi_cpu
 from tenet.main import generate_odrl_from_amrld_file
 from tenet.main import generate_odrl_from_amrld_dir 
 #from main import create_ontology_from_unlrdf_file
\ No newline at end of file
diff --git a/tenet/main.py b/tenet/main.py
index 92774bea..9fcc43b4 100644
--- a/tenet/main.py
+++ b/tenet/main.py
@@ -35,7 +35,6 @@ logger = logging.getLogger('root')
 #==============================================================================
 
 def __set_context():
-    # LIB_PATH = os.path.dirname(os.path.abspath(__file__)) + '/'
     print(f'Tenet Running in {LIB_PATH}')
     os.chdir(LIB_PATH)
 
@@ -121,10 +120,10 @@ def run_extraction(arg_dict):
     
     
 #==============================================================================
-# AMR Main Methods (to create an ontology) - with one processing
+# AMR Main Methods (to create an ontology)
 #==============================================================================
 
-#@timed
+@timed
 def create_ontology_from_amrld_file(amrld_file_path, 
                                     base_ontology_path=None, 
                                     onto_prefix=None, 
@@ -151,7 +150,7 @@ def create_ontology_from_amrld_file(amrld_file_path,
     logger.info('[TENET] Extraction Processing')    
     
     # -- Process Initialization
-    logger.info('\n === Process Initialization === ')
+    logger.info('\n\n === Process Initialization === ')
     __set_context()
     if onto_prefix is None: onto_prefix = 'DefaultId'
     base_output_dir = os.path.dirname(out_file_path) if out_file_path is not None else None
@@ -170,17 +169,17 @@ def create_ontology_from_amrld_file(amrld_file_path,
     assert os.path.exists(amrld_file_path), f'input file does not exists ({amrld_file_path})'
     
     # -- Extraction Processing   
-    logger.info('\n === Extraction Processing === ')
+    logger.info('\n\n === Extraction Processing === ')
     config.sentence_output_dir = f'-0'
     result_triple_list = __apply_extraction(config, amrld_file_path)
     
     # -- Final Ontology Generation (factoid_graph)
-    logger.info('\n === Final Ontology Generation  === ') 
+    logger.info('\n\n === Final Ontology Generation  === ') 
     factoid_graph = __generate_final_ontology(result_triple_list)
     ontology_turtle_string = __serialize_factoid_graph(config, factoid_graph, out_file_path)
         
     # -- Done
-    logger.info('\n === Done === ')
+    logger.info('\n\n === Done === ')
     if config.technical_dir_path is not None:
         log_file_name = 'tenet.log'
         dest_file_path = f'{config.technical_dir_path}{log_file_name}'
@@ -189,12 +188,15 @@ def create_ontology_from_amrld_file(amrld_file_path,
     return ontology_turtle_string
 
 
-#@timed
+@timed
 def create_ontology_from_amrld_dir(amrld_dir_path, 
                                    base_ontology_path=None, 
                                    onto_prefix=None, 
                                    out_file_path=None, 
-                                   technical_dir_path=None):
+                                   technical_dir_path=None,
+                                   multiprocessing_run=False,
+                                   processes=multiprocessing.cpu_count()-1
+                                  ):
     """
     Method to create an ontology (as Turtle String) from a transduction 
     analysis of an AMRLD file.
@@ -238,130 +240,46 @@ def create_ontology_from_amrld_dir(amrld_dir_path,
     
     # -- Extraction Processing    
     logger.info('\n === Extraction Processing === ')
-    
-    # ----- Sentence File List
-    sentence_dir = config.source_sentence_file   
-    sentence_file_list = glob.glob(sentence_dir, recursive = True)
-    
-    # ----- Computing Extraction Argument (config_dict update)
-    for i in range(len(sentence_file_list)):
-        config_dict['sentence_list_indice'] = i
-        config_dict['sentence_file'] = sentence_file_list[i]
-    
-    # ----- Single Processing Extraction Run
-    #sentence_count = 0
     result_triple_list = []
-    for sentence_file in sentence_file_list:#
-#        sentence_count += 1
-#        logger.info(f'     *** sentence {sentence_count} *** ')
-#        config.sentence_output_dir = f'-{sentence_count}'
-#        new_triple_list = __apply_extraction(config, sentence_file)
-#        result_triple_list.extend(new_triple_list)
-        new_triple_list = run_extraction(config_dict)
-        result_triple_list.extend(new_triple_list)
-    
-    # -- Final Ontology Generation (factoid_graph)
-    logger.info('\n === Final Ontology Generation  === ') 
-    factoid_graph = __generate_final_ontology(result_triple_list)
-    ontology_turtle_string = __serialize_factoid_graph(config, factoid_graph, out_file_path)
-        
-    # -- Done
-    logger.info('\n === Done === ')
-    if config.technical_dir_path is not None:
-        log_file_name = 'tenet.log'
-        dest_file_path = f'{config.technical_dir_path}{log_file_name}'
-        shutil.copy(log_file_name, dest_file_path)
-    
-    return ontology_turtle_string
-
-
-#==============================================================================
-# AMR Main Methods (to create an ontology) - Multiprocessing
-#==============================================================================
-
-
-
-    
-
-@timed
-def create_ontology_from_amrld_dir_multi_cpu(amrld_dir_path, 
-                                                        base_ontology_path=None, 
-                                                        onto_prefix=None, 
-                                                        out_file_path=None, 
-                                                        technical_dir_path=None,
-                                                        processes=multiprocessing.cpu_count()-1
-                                                        ):
-    """
-    Method to create an ontology (as Turtle String) from a transduction 
-    analysis of an AMRLD file.
-
-    Parameters
-    ----------
-    amrld_dir_path: a path to a directory recursively containing AMR-LD Turtle Files.
-    base_ontology_path: a path to a Base Ontology Turtle File if defined.
-    onto_prefix: the target ontology prefix if defined (if not defined a prefix based on the amrld filename is used).
-    out_file_path: a file path where the output ontology is written if defined (the function still outputs the string). 
-    technical_dir_path: a dir path where some technical and log files are written if defined.
-    processes: the number of processes in the multiprocessing pool
-
-    Returns
-    -------
-    Dictionary [filename -> Ontology Turtle String].
-    Complete Ontology Turtle String (synthesis of all ontology)
-
-    """
-    logger.info('[TENET] Extraction Processing')    
-    
-    # -- Process Initialization
-    logger.info('\n === Process Initialization === ')
-    __set_context()
-    if onto_prefix is None: onto_prefix = 'DefaultId'
-    base_output_dir = os.path.dirname(out_file_path) if out_file_path is not None else None    
-    
-    config_dict = {
-        'config_file_path': OWL_CONFIG_FILE_PATH,
-        'source_type': 'amr',
-        'source_corpus': amrld_dir_path,
-        'onto_prefix': onto_prefix,
-        'base_output_dir': base_output_dir,
-        'technical_dir_path': technical_dir_path
-    }
-
-    config = __set_config(config_dict)
-    
-    assert os.path.exists(amrld_dir_path), f'input directory does not exists ({amrld_dir_path})'
-    __count_number_of_graph(config)
-    
-    # -- Extraction Processing    
-    logger.info('\n === Extraction Processing === ')
     
     # ----- Sentence File List
     sentence_dir = config.source_sentence_file   
     sentence_file_list = glob.glob(sentence_dir, recursive = True)
-    
-    # The following is for multiprocessing logging (must be exec before the pool is created
-    multiprocessing_logging.install_mp_handler()
-    
+
     # ----- Computing Extraction Argument
     mapIterable = []    
     for i in range(len(sentence_file_list)):
         config_dict['sentence_list_indice'] = i
         config_dict['sentence_file'] = sentence_file_list[i]
         mapIterable = mapIterable + [config_dict.copy()]
-            
-    # ----- (Multiprocessing) Extraction Run 
-    with multiprocessing.Pool(processes) as p:
-        triplesLists = p.map(run_extraction, mapIterable)
-    
-    result_triple_list = []
-    for tripleList in triplesLists :
-        result_triple_list = result_triple_list + tripleList
     
+    if multiprocessing_run:
+        logger.info('Multi-Processing Run ')
+        
+        # ----- Multiprocessing Logging (must be exec before the pool is created)
+        multiprocessing_logging.install_mp_handler()
+
+        # ----- (Multi-processing) Extraction Run 
+        with multiprocessing.Pool(processes) as p:
+            triplesLists = p.map(run_extraction, mapIterable)    
+            
+        # ----- Result Triple List Update
+        for tripleList in triplesLists :
+            result_triple_list = result_triple_list + tripleList
+        
+    else:
+        logger.info('Single-Processing Run ')
+            
+        # ----- (Single-processing) Extraction Run
+        for config_dict in mapIterable:#
+            new_triple_list = run_extraction(config_dict)
+            result_triple_list.extend(new_triple_list)
+      
     # -- Final Ontology Generation (factoid_graph)
     logger.info('\n === Final Ontology Generation  === ') 
     factoid_graph = __generate_final_ontology(result_triple_list)
     ontology_turtle_string = __serialize_factoid_graph(config, factoid_graph, out_file_path)
-        
+     
     # -- Done
     logger.info('\n === Done === ')
     if config.technical_dir_path is not None:
@@ -372,12 +290,11 @@ def create_ontology_from_amrld_dir_multi_cpu(amrld_dir_path,
     return ontology_turtle_string
 
 
-
 #==============================================================================
 # AMR Main Methods (to generate ODRL statements) 
 #==============================================================================
 
-#@timed
+@timed
 def generate_odrl_from_amrld_file(
         amrld_file_path, onto_prefix=None, out_file_path=None, 
         technical_dir_path=None):
@@ -439,7 +356,7 @@ def generate_odrl_from_amrld_file(
     return ontology_turtle_string
 
 
-#@timed
+@timed
 def generate_odrl_from_amrld_dir(
         amrld_dir_path, onto_prefix=None, out_file_path=None, 
         technical_dir_path=None):
-- 
GitLab