Skip to content
Snippets Groups Projects
Commit 250c8480 authored by David Rouquet's avatar David Rouquet
Browse files

Merge of single and multi processing extraction functions

parent 06e7a5c6
Branches
No related tags found
No related merge requests found
......@@ -7,7 +7,6 @@ sys.path.insert(0, os.path.abspath(LIB_PATH))
# -- Main Methods
from tenet.main import create_ontology_from_amrld_file
from tenet.main import create_ontology_from_amrld_dir
from tenet.main import create_ontology_from_amrld_dir_multi_cpu
from tenet.main import generate_odrl_from_amrld_file
from tenet.main import generate_odrl_from_amrld_dir
#from main import create_ontology_from_unlrdf_file
\ No newline at end of file
......@@ -35,7 +35,6 @@ logger = logging.getLogger('root')
#==============================================================================
def __set_context():
# LIB_PATH = os.path.dirname(os.path.abspath(__file__)) + '/'
print(f'Tenet Running in {LIB_PATH}')
os.chdir(LIB_PATH)
......@@ -121,10 +120,10 @@ def run_extraction(arg_dict):
#==============================================================================
# AMR Main Methods (to create an ontology) - with one processing
# AMR Main Methods (to create an ontology)
#==============================================================================
#@timed
@timed
def create_ontology_from_amrld_file(amrld_file_path,
base_ontology_path=None,
onto_prefix=None,
......@@ -151,7 +150,7 @@ def create_ontology_from_amrld_file(amrld_file_path,
logger.info('[TENET] Extraction Processing')
# -- Process Initialization
logger.info('\n === Process Initialization === ')
logger.info('\n\n === Process Initialization === ')
__set_context()
if onto_prefix is None: onto_prefix = 'DefaultId'
base_output_dir = os.path.dirname(out_file_path) if out_file_path is not None else None
......@@ -170,17 +169,17 @@ def create_ontology_from_amrld_file(amrld_file_path,
assert os.path.exists(amrld_file_path), f'input file does not exists ({amrld_file_path})'
# -- Extraction Processing
logger.info('\n === Extraction Processing === ')
logger.info('\n\n === Extraction Processing === ')
config.sentence_output_dir = f'-0'
result_triple_list = __apply_extraction(config, amrld_file_path)
# -- Final Ontology Generation (factoid_graph)
logger.info('\n === Final Ontology Generation === ')
logger.info('\n\n === Final Ontology Generation === ')
factoid_graph = __generate_final_ontology(result_triple_list)
ontology_turtle_string = __serialize_factoid_graph(config, factoid_graph, out_file_path)
# -- Done
logger.info('\n === Done === ')
logger.info('\n\n === Done === ')
if config.technical_dir_path is not None:
log_file_name = 'tenet.log'
dest_file_path = f'{config.technical_dir_path}{log_file_name}'
......@@ -189,106 +188,13 @@ def create_ontology_from_amrld_file(amrld_file_path,
return ontology_turtle_string
#@timed
def create_ontology_from_amrld_dir(amrld_dir_path,
base_ontology_path=None,
onto_prefix=None,
out_file_path=None,
technical_dir_path=None):
"""
Method to create an ontology (as Turtle String) from a transduction
analysis of an AMRLD file.
Parameters
----------
amrld_file_path: a path to an AMR-LD Turtle File.
base_ontology_path: a path to a Base Ontology Turtle File if defined.
onto_prefix: the target ontology prefix if defined (if not defined a prefix based on the amrld filename is used).
out_file_path: a file path where the output ontology is written if defined (the function still outputs the string).
technical_dir_path: a dir path where some technical and log files are written if defined.
Returns
-------
Dictionary [filename -> Ontology Turtle String].
Complete Ontology Turtle String (synthesis of all ontology)
"""
logger.info('[TENET] Extraction Processing')
# -- Process Initialization
logger.info('\n === Process Initialization === ')
__set_context()
if onto_prefix is None: onto_prefix = 'DefaultId'
base_output_dir = os.path.dirname(out_file_path) if out_file_path is not None else None
config_dict = {
'config_file_path': OWL_CONFIG_FILE_PATH,
'source_type': 'amr',
'source_corpus': amrld_dir_path,
'onto_prefix': onto_prefix,
'base_output_dir': base_output_dir,
'technical_dir_path': technical_dir_path
}
config = __set_config(config_dict)
assert os.path.exists(amrld_dir_path), f'input directory does not exists ({amrld_dir_path})'
__count_number_of_graph(config)
# -- Extraction Processing
logger.info('\n === Extraction Processing === ')
# ----- Sentence File List
sentence_dir = config.source_sentence_file
sentence_file_list = glob.glob(sentence_dir, recursive = True)
# ----- Computing Extraction Argument (config_dict update)
for i in range(len(sentence_file_list)):
config_dict['sentence_list_indice'] = i
config_dict['sentence_file'] = sentence_file_list[i]
# ----- Single Processing Extraction Run
#sentence_count = 0
result_triple_list = []
for sentence_file in sentence_file_list:#
# sentence_count += 1
# logger.info(f' *** sentence {sentence_count} *** ')
# config.sentence_output_dir = f'-{sentence_count}'
# new_triple_list = __apply_extraction(config, sentence_file)
# result_triple_list.extend(new_triple_list)
new_triple_list = run_extraction(config_dict)
result_triple_list.extend(new_triple_list)
# -- Final Ontology Generation (factoid_graph)
logger.info('\n === Final Ontology Generation === ')
factoid_graph = __generate_final_ontology(result_triple_list)
ontology_turtle_string = __serialize_factoid_graph(config, factoid_graph, out_file_path)
# -- Done
logger.info('\n === Done === ')
if config.technical_dir_path is not None:
log_file_name = 'tenet.log'
dest_file_path = f'{config.technical_dir_path}{log_file_name}'
shutil.copy(log_file_name, dest_file_path)
return ontology_turtle_string
#==============================================================================
# AMR Main Methods (to create an ontology) - Multiprocessing
#==============================================================================
@timed
def create_ontology_from_amrld_dir_multi_cpu(amrld_dir_path,
def create_ontology_from_amrld_dir(amrld_dir_path,
base_ontology_path=None,
onto_prefix=None,
out_file_path=None,
technical_dir_path=None,
multiprocessing_run=False,
processes=multiprocessing.cpu_count()-1
):
"""
......@@ -297,12 +203,11 @@ def create_ontology_from_amrld_dir_multi_cpu(amrld_dir_path,
Parameters
----------
amrld_dir_path: a path to a directory recursively containing AMR-LD Turtle Files.
amrld_file_path: a path to an AMR-LD Turtle File.
base_ontology_path: a path to a Base Ontology Turtle File if defined.
onto_prefix: the target ontology prefix if defined (if not defined a prefix based on the amrld filename is used).
out_file_path: a file path where the output ontology is written if defined (the function still outputs the string).
technical_dir_path: a dir path where some technical and log files are written if defined.
processes: the number of processes in the multiprocessing pool
Returns
-------
......@@ -310,6 +215,7 @@ def create_ontology_from_amrld_dir_multi_cpu(amrld_dir_path,
Complete Ontology Turtle String (synthesis of all ontology)
"""
logger.info('[TENET] Extraction Processing')
# -- Process Initialization
......@@ -334,14 +240,12 @@ def create_ontology_from_amrld_dir_multi_cpu(amrld_dir_path,
# -- Extraction Processing
logger.info('\n === Extraction Processing === ')
result_triple_list = []
# ----- Sentence File List
sentence_dir = config.source_sentence_file
sentence_file_list = glob.glob(sentence_dir, recursive = True)
# The following is for multiprocessing logging (must be exec before the pool is created
multiprocessing_logging.install_mp_handler()
# ----- Computing Extraction Argument
mapIterable = []
for i in range(len(sentence_file_list)):
......@@ -349,14 +253,28 @@ def create_ontology_from_amrld_dir_multi_cpu(amrld_dir_path,
config_dict['sentence_file'] = sentence_file_list[i]
mapIterable = mapIterable + [config_dict.copy()]
# ----- (Multiprocessing) Extraction Run
if multiprocessing_run:
logger.info('Multi-Processing Run ')
# ----- Multiprocessing Logging (must be exec before the pool is created)
multiprocessing_logging.install_mp_handler()
# ----- (Multi-processing) Extraction Run
with multiprocessing.Pool(processes) as p:
triplesLists = p.map(run_extraction, mapIterable)
result_triple_list = []
# ----- Result Triple List Update
for tripleList in triplesLists :
result_triple_list = result_triple_list + tripleList
else:
logger.info('Single-Processing Run ')
# ----- (Single-processing) Extraction Run
for config_dict in mapIterable:#
new_triple_list = run_extraction(config_dict)
result_triple_list.extend(new_triple_list)
# -- Final Ontology Generation (factoid_graph)
logger.info('\n === Final Ontology Generation === ')
factoid_graph = __generate_final_ontology(result_triple_list)
......@@ -372,12 +290,11 @@ def create_ontology_from_amrld_dir_multi_cpu(amrld_dir_path,
return ontology_turtle_string
#==============================================================================
# AMR Main Methods (to generate ODRL statements)
#==============================================================================
#@timed
@timed
def generate_odrl_from_amrld_file(
amrld_file_path, onto_prefix=None, out_file_path=None,
technical_dir_path=None):
......@@ -439,7 +356,7 @@ def generate_odrl_from_amrld_file(
return ontology_turtle_string
#@timed
@timed
def generate_odrl_from_amrld_dir(
amrld_dir_path, onto_prefix=None, out_file_path=None,
technical_dir_path=None):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment