Skip to content
Snippets Groups Projects
Commit 06e7a5c6 authored by David Rouquet's avatar David Rouquet
Browse files

Fix some bug and clean code

parent 634cc1c4
Branches
No related tags found
No related merge requests found
# -- Update System Path # -- Update System Path
import os, sys import os, sys
LIB_PATH = os.path.dirname(os.path.abspath(__file__)) + '/' LIB_PATH = os.path.dirname(os.path.abspath(__file__)) + '/'
print('Running in ' + LIB_PATH)
os.chdir(LIB_PATH) os.chdir(LIB_PATH)
sys.path.insert(0, os.path.abspath(LIB_PATH)) sys.path.insert(0, os.path.abspath(LIB_PATH))
......
...@@ -36,38 +36,10 @@ logger = logging.getLogger('root') ...@@ -36,38 +36,10 @@ logger = logging.getLogger('root')
def __set_context(): def __set_context():
# LIB_PATH = os.path.dirname(os.path.abspath(__file__)) + '/' # LIB_PATH = os.path.dirname(os.path.abspath(__file__)) + '/'
print(f'Running in {LIB_PATH}') print(f'Tenet Running in {LIB_PATH}')
os.chdir(LIB_PATH) os.chdir(LIB_PATH)
# def __set_config(
# config_file_path,
# source_type, source_corpus, onto_prefix,
# base_output_dir, technical_dir_path):
# logger.info("-- Process Setting ")
# logger.info(f'----- Corpus source: {source_corpus} ({source_type})')
# logger.info(f'----- Base output dir: {base_output_dir}')
# logger.info(f'----- technical dir path: {technical_dir_path}')
# logger.info(f'----- Ontology target (id): {onto_prefix}')
# logger.info(f'----- Current path: {os.getcwd()}')
# logger.debug(f'----- Config file: {config_file_path}')
# process_config = config.Config(config_file_path,
# onto_prefix,
# source_corpus,
# base_output_dir = base_output_dir,
# technical_dir_path = technical_dir_path,
# source_type = source_type
# )
# #process_config.source_type = source_type
# # config.output_ontology_namespace = target_ontology_namespace
# logger.debug(process_config.get_full_config())
# return process_config
def __set_config(config_dict): def __set_config(config_dict):
config_file_path = config_dict['config_file_path'] config_file_path = config_dict['config_file_path']
source_type = config_dict['source_type'] source_type = config_dict['source_type']
...@@ -84,14 +56,6 @@ def __set_config(config_dict): ...@@ -84,14 +56,6 @@ def __set_config(config_dict):
logger.info(f'----- Current path: {os.getcwd()}') logger.info(f'----- Current path: {os.getcwd()}')
logger.debug(f'----- Config file: {config_file_path}') logger.debug(f'----- Config file: {config_file_path}')
# process_config = config.Config(
# config_file_path,
# onto_prefix,
# source_corpus,
# base_output_dir=base_output_dir,
# technical_dir_path=technical_dir_path,
# source_type=source_type
# )
base_config = config.Config(config_dict) base_config = config.Config(config_dict)
logger.debug(base_config.get_full_config()) logger.debug(base_config.get_full_config())
...@@ -112,7 +76,6 @@ def __apply_extraction(config, sentence_file): ...@@ -112,7 +76,6 @@ def __apply_extraction(config, sentence_file):
if config.technical_dir_path is not None: if config.technical_dir_path is not None:
os.makedirs(config.sentence_output_dir, exist_ok=True) os.makedirs(config.sentence_output_dir, exist_ok=True)
work_graph = structure.prepare_sentence_work(config, sentence_file) work_graph = structure.prepare_sentence_work(config, sentence_file)
_, new_triple_list = process.apply(config, work_graph) _, new_triple_list = process.apply(config, work_graph)
return new_triple_list return new_triple_list
...@@ -142,6 +105,21 @@ def __serialize_factoid_graph(config, factoid_graph, out_file_path=None): ...@@ -142,6 +105,21 @@ def __serialize_factoid_graph(config, factoid_graph, out_file_path=None):
return ontology_turtle_string return ontology_turtle_string
#==============================================================================
# Extraction Run
#==============================================================================
def run_extraction(arg_dict):
process_config = config.Config(arg_dict)
sentence_indice = arg_dict['sentence_list_indice']
sentence_file = arg_dict['sentence_file']
logger.info(f' *** sentence {sentence_indice} *** ')
process_config.sentence_output_dir = f'-{sentence_indice}'
new_triple_list = __apply_extraction(process_config, sentence_file)
return(new_triple_list)
#============================================================================== #==============================================================================
# AMR Main Methods (to create an ontology) - with one processing # AMR Main Methods (to create an ontology) - with one processing
#============================================================================== #==============================================================================
...@@ -189,10 +167,6 @@ def create_ontology_from_amrld_file(amrld_file_path, ...@@ -189,10 +167,6 @@ def create_ontology_from_amrld_file(amrld_file_path,
config = __set_config(config_dict) config = __set_config(config_dict)
# config = __set_config(OWL_CONFIG_FILE_PATH,
# 'amr', amrld_file_path, onto_prefix,
# base_output_dir, technical_dir_path)
assert os.path.exists(amrld_file_path), f'input file does not exists ({amrld_file_path})' assert os.path.exists(amrld_file_path), f'input file does not exists ({amrld_file_path})'
# -- Extraction Processing # -- Extraction Processing
...@@ -259,23 +233,31 @@ def create_ontology_from_amrld_dir(amrld_dir_path, ...@@ -259,23 +233,31 @@ def create_ontology_from_amrld_dir(amrld_dir_path,
config = __set_config(config_dict) config = __set_config(config_dict)
# config = __set_config(OWL_CONFIG_FILE_PATH,
# 'amr', amrld_dir_path, onto_prefix,
# base_output_dir, technical_dir_path)
assert os.path.exists(amrld_dir_path), f'input directory does not exists ({amrld_dir_path})' assert os.path.exists(amrld_dir_path), f'input directory does not exists ({amrld_dir_path})'
__count_number_of_graph(config) __count_number_of_graph(config)
# -- Extraction Processing # -- Extraction Processing
logger.info('\n === Extraction Processing === ') logger.info('\n === Extraction Processing === ')
# ----- Sentence File List
sentence_dir = config.source_sentence_file sentence_dir = config.source_sentence_file
sentence_count = 0 sentence_file_list = glob.glob(sentence_dir, recursive = True)
# ----- Computing Extraction Argument (config_dict update)
for i in range(len(sentence_file_list)):
config_dict['sentence_list_indice'] = i
config_dict['sentence_file'] = sentence_file_list[i]
# ----- Single Processing Extraction Run
#sentence_count = 0
result_triple_list = [] result_triple_list = []
for sentence_file in glob.glob(sentence_dir, recursive = True): for sentence_file in sentence_file_list:#
sentence_count += 1 # sentence_count += 1
logger.info(f' *** sentence {sentence_count} *** ') # logger.info(f' *** sentence {sentence_count} *** ')
config.sentence_output_dir = f'-{sentence_count}' # config.sentence_output_dir = f'-{sentence_count}'
new_triple_list = __apply_extraction(config, sentence_file) # new_triple_list = __apply_extraction(config, sentence_file)
# result_triple_list.extend(new_triple_list)
new_triple_list = run_extraction(config_dict)
result_triple_list.extend(new_triple_list) result_triple_list.extend(new_triple_list)
# -- Final Ontology Generation (factoid_graph) # -- Final Ontology Generation (factoid_graph)
...@@ -297,35 +279,11 @@ def create_ontology_from_amrld_dir(amrld_dir_path, ...@@ -297,35 +279,11 @@ def create_ontology_from_amrld_dir(amrld_dir_path,
# AMR Main Methods (to create an ontology) - Multiprocessing # AMR Main Methods (to create an ontology) - Multiprocessing
#============================================================================== #==============================================================================
#global result_triple_queue
#global sentence_file_list
def dump_queue(q):
q.put(None)
return list(iter(q.get, None))
def pool_function(arg_dict):
#global result_triple_queue
#global sentence_file_list
#process_config = config.Config(OWL_CONFIG_FILE_PATH, 'default', 'default')
#process_config.update_from_dict(arg_dict)
process_config = config.Config(arg_dict)
sentence_indice = arg_dict['sentence_list_indice']
sentence_file = sentence_file_list[sentence_indice]
logger.info(f' *** sentence {sentence_indice} *** ')
process_config.sentence_output_dir = f'-{sentence_indice}\n'
new_triple_list = __apply_extraction(process_config, sentence_file)
# The following must handled via a global queue
#result_triple_queue.extend(new_triple_list)
return(new_triple_list)
#@timed @timed
def create_ontology_from_amrld_dir_multi_cpu(amrld_dir_path, def create_ontology_from_amrld_dir_multi_cpu(amrld_dir_path,
base_ontology_path=None, base_ontology_path=None,
onto_prefix=None, onto_prefix=None,
...@@ -352,8 +310,6 @@ def create_ontology_from_amrld_dir_multi_cpu(amrld_dir_path, ...@@ -352,8 +310,6 @@ def create_ontology_from_amrld_dir_multi_cpu(amrld_dir_path,
Complete Ontology Turtle String (synthesis of all ontology) Complete Ontology Turtle String (synthesis of all ontology)
""" """
global result_triple_queue
global sentence_file_list
logger.info('[TENET] Extraction Processing') logger.info('[TENET] Extraction Processing')
# -- Process Initialization # -- Process Initialization
...@@ -373,37 +329,29 @@ def create_ontology_from_amrld_dir_multi_cpu(amrld_dir_path, ...@@ -373,37 +329,29 @@ def create_ontology_from_amrld_dir_multi_cpu(amrld_dir_path,
config = __set_config(config_dict) config = __set_config(config_dict)
# config = __set_config(OWL_CONFIG_FILE_PATH,
# 'amr', amrld_dir_path, onto_prefix,
# base_output_dir, technical_dir_path)
assert os.path.exists(amrld_dir_path), f'input directory does not exists ({amrld_dir_path})' assert os.path.exists(amrld_dir_path), f'input directory does not exists ({amrld_dir_path})'
__count_number_of_graph(config) __count_number_of_graph(config)
# -- Extraction Processing # -- Extraction Processing
logger.info('\n === Extraction Processing === ') logger.info('\n === Extraction Processing === ')
sentence_dir = config.source_sentence_file
sentence_count = 0
result_triple_list = []
#result_triple_queue = multiprocessing.Queue()
# ----- Sentence File List
sentence_dir = config.source_sentence_file
sentence_file_list = glob.glob(sentence_dir, recursive = True) sentence_file_list = glob.glob(sentence_dir, recursive = True)
# The following is for multiprocessing logging (must be exec before the pool is created # The following is for multiprocessing logging (must be exec before the pool is created
multiprocessing_logging.install_mp_handler() multiprocessing_logging.install_mp_handler()
# config_dict = config.to_dict() # ----- Computing Extraction Argument
#star_iterable = [(i, config) for i in range(len(sentence_file_list))]
mapIterable = [] mapIterable = []
for i in range(len(sentence_file_list)): for i in range(len(sentence_file_list)):
config_dict['sentence_list_indice'] = i config_dict['sentence_list_indice'] = i
config_dict['sentence_file'] = sentence_file_list[i]
mapIterable = mapIterable + [config_dict.copy()] mapIterable = mapIterable + [config_dict.copy()]
# ----- (Multiprocessing) Extraction Run
with multiprocessing.Pool(processes) as p: with multiprocessing.Pool(processes) as p:
triplesLists = p.map(pool_function, mapIterable) triplesLists = p.map(run_extraction, mapIterable)
result_triple_list = [] result_triple_list = []
for tripleList in triplesLists : for tripleList in triplesLists :
...@@ -411,7 +359,6 @@ def create_ontology_from_amrld_dir_multi_cpu(amrld_dir_path, ...@@ -411,7 +359,6 @@ def create_ontology_from_amrld_dir_multi_cpu(amrld_dir_path,
# -- Final Ontology Generation (factoid_graph) # -- Final Ontology Generation (factoid_graph)
logger.info('\n === Final Ontology Generation === ') logger.info('\n === Final Ontology Generation === ')
#result_triple_list = dump_queue(result_triple_queue)
factoid_graph = __generate_final_ontology(result_triple_list) factoid_graph = __generate_final_ontology(result_triple_list)
ontology_turtle_string = __serialize_factoid_graph(config, factoid_graph, out_file_path) ontology_turtle_string = __serialize_factoid_graph(config, factoid_graph, out_file_path)
...@@ -470,10 +417,6 @@ def generate_odrl_from_amrld_file( ...@@ -470,10 +417,6 @@ def generate_odrl_from_amrld_file(
config = __set_config(config_dict) config = __set_config(config_dict)
# config = __set_config(ODRL_CONFIG_FILE_PATH,
# 'amr', amrld_file_path, onto_prefix,
# base_output_dir, technical_dir_path)
assert os.path.exists(amrld_file_path), f'input file does not exists ({amrld_file_path})' assert os.path.exists(amrld_file_path), f'input file does not exists ({amrld_file_path})'
# -- Extraction Processing # -- Extraction Processing
...@@ -537,10 +480,6 @@ def generate_odrl_from_amrld_dir( ...@@ -537,10 +480,6 @@ def generate_odrl_from_amrld_dir(
config = __set_config(config_dict) config = __set_config(config_dict)
# config = __set_config(ODRL_CONFIG_FILE_PATH,
# 'amr', amrld_dir_path, onto_prefix,
# base_output_dir, technical_dir_path)
assert os.path.exists(amrld_dir_path), f'input directory does not exists ({amrld_dir_path})' assert os.path.exists(amrld_dir_path), f'input directory does not exists ({amrld_dir_path})'
__count_number_of_graph(config) __count_number_of_graph(config)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment