Merge of single and multi processing extraction functions

250c8480 · David Rouquet · 06e7a5c6 · 250c8480 · 250c8480
Commit 250c8480 authored 2 years ago by David Rouquet
--- a/tenet/__init__.py
+++ b/tenet/__init__.py
@@ -7,7 +7,6 @@ sys.path.insert(0, os.path.abspath(LIB_PATH))
 # -- Main Methods
 from tenet.main import create_ontology_from_amrld_file
 from tenet.main import create_ontology_from_amrld_dir 
-from tenet.main import create_ontology_from_amrld_dir_multi_cpu
 from tenet.main import generate_odrl_from_amrld_file
 from tenet.main import generate_odrl_from_amrld_dir 
 #from main import create_ontology_from_unlrdf_file
\ No newline at end of file
--- a/tenet/main.py
+++ b/tenet/main.py
@@ -35,7 +35,6 @@ logger = logging.getLogger('root')
 #==============================================================================

 def __set_context():
-    # LIB_PATH = os.path.dirname(os.path.abspath(__file__)) + '/'
    print(f'Tenet Running in {LIB_PATH}')
    os.chdir(LIB_PATH)

@@ -121,10 +120,10 @@ def run_extraction(arg_dict):
    
    
 #==============================================================================
-# AMR Main Methods (to create an ontology) - with one processing
+# AMR Main Methods (to create an ontology)
 #==============================================================================

-#@timed
+@timed
 def create_ontology_from_amrld_file(amrld_file_path, 
                                    base_ontology_path=None, 
                                    onto_prefix=None, 
@@ -151,7 +150,7 @@ def create_ontology_from_amrld_file(amrld_file_path,
    logger.info('[TENET] Extraction Processing')    
    
    # -- Process Initialization
-    logger.info('\n === Process Initialization === ')
+    logger.info('\n\n === Process Initialization === ')
    __set_context()
    if onto_prefix is None: onto_prefix = 'DefaultId'
    base_output_dir = os.path.dirname(out_file_path) if out_file_path is not None else None
@@ -170,17 +169,17 @@ def create_ontology_from_amrld_file(amrld_file_path,
    assert os.path.exists(amrld_file_path), f'input file does not exists ({amrld_file_path})'
    
    # -- Extraction Processing   
-    logger.info('\n === Extraction Processing === ')
+    logger.info('\n\n === Extraction Processing === ')
    config.sentence_output_dir = f'-0'
    result_triple_list = __apply_extraction(config, amrld_file_path)
    
    # -- Final Ontology Generation (factoid_graph)
-    logger.info('\n === Final Ontology Generation  === ') 
+    logger.info('\n\n === Final Ontology Generation  === ') 
    factoid_graph = __generate_final_ontology(result_triple_list)
    ontology_turtle_string = __serialize_factoid_graph(config, factoid_graph, out_file_path)
        
    # -- Done
-    logger.info('\n === Done === ')
+    logger.info('\n\n === Done === ')
    if config.technical_dir_path is not None:
        log_file_name = 'tenet.log'
        dest_file_path = f'{config.technical_dir_path}{log_file_name}'
@@ -189,106 +188,13 @@ def create_ontology_from_amrld_file(amrld_file_path,
    return ontology_turtle_string


-#@timed
-def create_ontology_from_amrld_dir(amrld_dir_path, 
-                                   base_ontology_path=None, 
-                                   onto_prefix=None, 
-                                   out_file_path=None, 
-                                   technical_dir_path=None):
-    """
-    Method to create an ontology (as Turtle String) from a transduction 
-    analysis of an AMRLD file.
-
-    Parameters
-    ----------
-    amrld_file_path: a path to an AMR-LD Turtle File.
-    base_ontology_path: a path to a Base Ontology Turtle File if defined.
-    onto_prefix: the target ontology prefix if defined (if not defined a prefix based on the amrld filename is used).
-    out_file_path: a file path where the output ontology is written if defined (the function still outputs the string). 
-    technical_dir_path: a dir path where some technical and log files are written if defined.
-
-    Returns
-    -------
-    Dictionary [filename -> Ontology Turtle String].
-    Complete Ontology Turtle String (synthesis of all ontology)
-
-    """
-
-    logger.info('[TENET] Extraction Processing')    
-    
-    # -- Process Initialization
-    logger.info('\n === Process Initialization === ')
-    __set_context()
-    if onto_prefix is None: onto_prefix = 'DefaultId'
-    base_output_dir = os.path.dirname(out_file_path) if out_file_path is not None else None
-        
-    config_dict = {
-        'config_file_path': OWL_CONFIG_FILE_PATH,
-        'source_type': 'amr',
-        'source_corpus': amrld_dir_path,
-        'onto_prefix': onto_prefix,
-        'base_output_dir': base_output_dir,
-        'technical_dir_path': technical_dir_path
-    }
-
-    config = __set_config(config_dict)
-
-    assert os.path.exists(amrld_dir_path), f'input directory does not exists ({amrld_dir_path})'
-    __count_number_of_graph(config)
-    
-    # -- Extraction Processing    
-    logger.info('\n === Extraction Processing === ')
-    
-    # ----- Sentence File List
-    sentence_dir = config.source_sentence_file   
-    sentence_file_list = glob.glob(sentence_dir, recursive = True)
-    
-    # ----- Computing Extraction Argument (config_dict update)
-    for i in range(len(sentence_file_list)):
-        config_dict['sentence_list_indice'] = i
-        config_dict['sentence_file'] = sentence_file_list[i]
-    
-    # ----- Single Processing Extraction Run
-    #sentence_count = 0
-    result_triple_list = []
-    for sentence_file in sentence_file_list:#
-#        sentence_count += 1
-#        logger.info(f'     *** sentence {sentence_count} *** ')
-#        config.sentence_output_dir = f'-{sentence_count}'
-#        new_triple_list = __apply_extraction(config, sentence_file)
-#        result_triple_list.extend(new_triple_list)
-        new_triple_list = run_extraction(config_dict)
-        result_triple_list.extend(new_triple_list)
-    
-    # -- Final Ontology Generation (factoid_graph)
-    logger.info('\n === Final Ontology Generation  === ') 
-    factoid_graph = __generate_final_ontology(result_triple_list)
-    ontology_turtle_string = __serialize_factoid_graph(config, factoid_graph, out_file_path)
-        
-    # -- Done
-    logger.info('\n === Done === ')
-    if config.technical_dir_path is not None:
-        log_file_name = 'tenet.log'
-        dest_file_path = f'{config.technical_dir_path}{log_file_name}'
-        shutil.copy(log_file_name, dest_file_path)
-    
-    return ontology_turtle_string
-
-
-#==============================================================================
-# AMR Main Methods (to create an ontology) - Multiprocessing
-#==============================================================================
-
-
-
-    
-
 @timed
-def create_ontology_from_amrld_dir_multi_cpu(amrld_dir_path, 
+def create_ontology_from_amrld_dir(amrld_dir_path, 
                                   base_ontology_path=None, 
                                   onto_prefix=None, 
                                   out_file_path=None, 
                                   technical_dir_path=None,
+                                   multiprocessing_run=False,
                                   processes=multiprocessing.cpu_count()-1
                                  ):
    """
@@ -297,12 +203,11 @@ def create_ontology_from_amrld_dir_multi_cpu(amrld_dir_path,

    Parameters
    ----------
-    amrld_dir_path: a path to a directory recursively containing AMR-LD Turtle Files.
+    amrld_file_path: a path to an AMR-LD Turtle File.
    base_ontology_path: a path to a Base Ontology Turtle File if defined.
    onto_prefix: the target ontology prefix if defined (if not defined a prefix based on the amrld filename is used).
    out_file_path: a file path where the output ontology is written if defined (the function still outputs the string). 
    technical_dir_path: a dir path where some technical and log files are written if defined.
-    processes: the number of processes in the multiprocessing pool

    Returns
    -------
@@ -310,6 +215,7 @@ def create_ontology_from_amrld_dir_multi_cpu(amrld_dir_path,
    Complete Ontology Turtle String (synthesis of all ontology)

    """
+
    logger.info('[TENET] Extraction Processing')    
    
    # -- Process Initialization
@@ -334,14 +240,12 @@ def create_ontology_from_amrld_dir_multi_cpu(amrld_dir_path,
    
    # -- Extraction Processing    
    logger.info('\n === Extraction Processing === ')
+    result_triple_list = []
    
    # ----- Sentence File List
    sentence_dir = config.source_sentence_file   
    sentence_file_list = glob.glob(sentence_dir, recursive = True)

-    # The following is for multiprocessing logging (must be exec before the pool is created
-    multiprocessing_logging.install_mp_handler()
-    
    # ----- Computing Extraction Argument
    mapIterable = []    
    for i in range(len(sentence_file_list)):
@@ -349,14 +253,28 @@ def create_ontology_from_amrld_dir_multi_cpu(amrld_dir_path,
        config_dict['sentence_file'] = sentence_file_list[i]
        mapIterable = mapIterable + [config_dict.copy()]
    
-    # ----- (Multiprocessing) Extraction Run 
+    if multiprocessing_run:
+        logger.info('Multi-Processing Run ')
+        
+        # ----- Multiprocessing Logging (must be exec before the pool is created)
+        multiprocessing_logging.install_mp_handler()
+
+        # ----- (Multi-processing) Extraction Run 
        with multiprocessing.Pool(processes) as p:
            triplesLists = p.map(run_extraction, mapIterable)    
            
-    result_triple_list = []
+        # ----- Result Triple List Update
        for tripleList in triplesLists :
            result_triple_list = result_triple_list + tripleList
        
+    else:
+        logger.info('Single-Processing Run ')
+            
+        # ----- (Single-processing) Extraction Run
+        for config_dict in mapIterable:#
+            new_triple_list = run_extraction(config_dict)
+            result_triple_list.extend(new_triple_list)
+      
    # -- Final Ontology Generation (factoid_graph)
    logger.info('\n === Final Ontology Generation  === ') 
    factoid_graph = __generate_final_ontology(result_triple_list)
@@ -372,12 +290,11 @@ def create_ontology_from_amrld_dir_multi_cpu(amrld_dir_path,
    return ontology_turtle_string


-
 #==============================================================================
 # AMR Main Methods (to generate ODRL statements) 
 #==============================================================================

-#@timed
+@timed
 def generate_odrl_from_amrld_file(
        amrld_file_path, onto_prefix=None, out_file_path=None, 
        technical_dir_path=None):
@@ -439,7 +356,7 @@ def generate_odrl_from_amrld_file(
    return ontology_turtle_string


-#@timed
+@timed
 def generate_odrl_from_amrld_dir(
        amrld_dir_path, onto_prefix=None, out_file_path=None, 
        technical_dir_path=None):