Skip to content
Snippets Groups Projects
Commit a5d9df71 authored by Aurélien Lamercerie's avatar Aurélien Lamercerie
Browse files

Specific Main Method to ODRL extraction

parent 1aa46a44
Branches
Tags
No related merge requests found
Showing
with 282 additions and 107 deletions
...@@ -8,4 +8,6 @@ sys.path.insert(0, os.path.abspath(LIB_PATH)) ...@@ -8,4 +8,6 @@ sys.path.insert(0, os.path.abspath(LIB_PATH))
# -- Main Methods # -- Main Methods
from tenet.main import create_ontology_from_amrld_file from tenet.main import create_ontology_from_amrld_file
from tenet.main import create_ontology_from_amrld_dir from tenet.main import create_ontology_from_amrld_dir
from tenet.main import generate_odrl_from_amrld_file
from tenet.main import generate_odrl_from_amrld_dir
#from main import create_ontology_from_unlrdf_file #from main import create_ontology_from_unlrdf_file
\ No newline at end of file
...@@ -19,7 +19,8 @@ from utility.timer import timed ...@@ -19,7 +19,8 @@ from utility.timer import timed
# -- Config File Path # -- Config File Path
LIB_PATH = os.path.dirname(os.path.abspath(__file__)) + '/' LIB_PATH = os.path.dirname(os.path.abspath(__file__)) + '/'
LOGGING_CONF_FILE_PATH = f'{LIB_PATH}logging.conf' LOGGING_CONF_FILE_PATH = f'{LIB_PATH}logging.conf'
CONFIG_FILE_PATH = f'{LIB_PATH}config.xml' OWL_CONFIG_FILE_PATH = f'{LIB_PATH}owl_amr_config.xml'
ODRL_CONFIG_FILE_PATH = f'{LIB_PATH}odrl_amr_config.xml'
# -- Logging # -- Logging
logging.config.fileConfig(LOGGING_CONF_FILE_PATH, disable_existing_loggers=False) logging.config.fileConfig(LOGGING_CONF_FILE_PATH, disable_existing_loggers=False)
...@@ -36,7 +37,9 @@ def __set_context(): ...@@ -36,7 +37,9 @@ def __set_context():
os.chdir(LIB_PATH) os.chdir(LIB_PATH)
def __set_config(source_type, source_corpus, onto_prefix, def __set_config(
config_file_path,
source_type, source_corpus, onto_prefix,
base_output_dir, technical_dir_path): base_output_dir, technical_dir_path):
logger.info("-- Process Setting ") logger.info("-- Process Setting ")
...@@ -45,9 +48,9 @@ def __set_config(source_type, source_corpus, onto_prefix, ...@@ -45,9 +48,9 @@ def __set_config(source_type, source_corpus, onto_prefix,
logger.info(f'----- technical dir path: {technical_dir_path}') logger.info(f'----- technical dir path: {technical_dir_path}')
logger.info(f'----- Ontology target (id): {onto_prefix}') logger.info(f'----- Ontology target (id): {onto_prefix}')
logger.info(f'----- Current path: {os.getcwd()}') logger.info(f'----- Current path: {os.getcwd()}')
logger.debug(f'----- Config file: {CONFIG_FILE_PATH}') logger.debug(f'----- Config file: {config_file_path}')
process_config = config.Config(CONFIG_FILE_PATH, process_config = config.Config(config_file_path,
onto_prefix, onto_prefix,
source_corpus, source_corpus,
base_output_dir = base_output_dir, base_output_dir = base_output_dir,
...@@ -104,7 +107,7 @@ def __serialize_factoid_graph(config, factoid_graph, out_file_path=None): ...@@ -104,7 +107,7 @@ def __serialize_factoid_graph(config, factoid_graph, out_file_path=None):
#============================================================================== #==============================================================================
# AMR Main Methods # AMR Main Methods (to create an ontology)
#============================================================================== #==============================================================================
@timed @timed
...@@ -137,7 +140,8 @@ def create_ontology_from_amrld_file(amrld_file_path, ...@@ -137,7 +140,8 @@ def create_ontology_from_amrld_file(amrld_file_path,
logger.info('\n === Process Initialization === ') logger.info('\n === Process Initialization === ')
__set_context() __set_context()
if onto_prefix is None: onto_prefix = 'DefaultId' if onto_prefix is None: onto_prefix = 'DefaultId'
config = __set_config('amr', amrld_file_path, onto_prefix, config = __set_config(OWL_CONFIG_FILE_PATH,
'amr', amrld_file_path, onto_prefix,
out_file_path, technical_dir_path) out_file_path, technical_dir_path)
assert os.path.exists(amrld_file_path), f'input file does not exists ({amrld_file_path})' assert os.path.exists(amrld_file_path), f'input file does not exists ({amrld_file_path})'
...@@ -192,7 +196,126 @@ def create_ontology_from_amrld_dir(amrld_dir_path, ...@@ -192,7 +196,126 @@ def create_ontology_from_amrld_dir(amrld_dir_path,
logger.info('\n === Process Initialization === ') logger.info('\n === Process Initialization === ')
__set_context() __set_context()
if onto_prefix is None: onto_prefix = 'DefaultId' if onto_prefix is None: onto_prefix = 'DefaultId'
config = __set_config('amr', amrld_dir_path, onto_prefix, config = __set_config(OWL_CONFIG_FILE_PATH,
'amr', amrld_dir_path, onto_prefix,
out_file_path, technical_dir_path)
assert os.path.exists(amrld_dir_path), f'input directory does not exists ({amrld_dir_path})'
__count_number_of_graph(config)
# -- Extraction Processing
logger.info('\n === Extraction Processing === ')
sentence_dir = config.source_sentence_file
sentence_count = 0
result_triple_list = []
for sentence_file in glob.glob(sentence_dir, recursive = True):
sentence_count += 1
logger.info(f' *** sentence {sentence_count} *** ')
config.sentence_output_dir = f'-{sentence_count}'
new_triple_list = __apply_extraction(config, sentence_file)
result_triple_list.extend(new_triple_list)
# -- Final Ontology Generation (factoid_graph)
logger.info('\n === Final Ontology Generation === ')
factoid_graph = __generate_final_ontology(result_triple_list)
ontology_turtle_string = __serialize_factoid_graph(config, factoid_graph, out_file_path)
# -- Done
logger.info('\n === Done === ')
if config.technical_dir_path is not None:
log_file_name = 'tenet.log'
dest_file_path = f'{config.technical_dir_path}{log_file_name}'
shutil.copy(log_file_name, dest_file_path)
return ontology_turtle_string
#==============================================================================
# AMR Main Methods (to generate ODRL statements)
#==============================================================================
@timed
def generate_odrl_from_amrld_file(
amrld_file_path, onto_prefix=None, out_file_path=None,
technical_dir_path=None):
"""
Method to generate an ODRL statement (as Turtle String) from a transduction
analysis of an AMRLD file.
Parameters
----------
amrld_file_path: a path to an AMR-LD Turtle File.
onto_prefix: the target ontology prefix if defined (if not defined a prefix based on the amrld filename is used).
out_file_path: a file path where the ODRL statement is written if defined (the function still outputs the string).
technical_dir_path: a dir path where some technical and log files are written if defined.
Returns
-------
Ontology Turtle String.
"""
logger.info('[TENET] Extraction Processing')
# -- Process Initialization
logger.info('\n === Process Initialization === ')
__set_context()
if onto_prefix is None: onto_prefix = 'DefaultId'
config = __set_config(ODRL_CONFIG_FILE_PATH,
'amr', amrld_file_path, onto_prefix,
out_file_path, technical_dir_path)
assert os.path.exists(amrld_file_path), f'input file does not exists ({amrld_file_path})'
# -- Extraction Processing
logger.info('\n === Extraction Processing === ')
config.sentence_output_dir = f'-0'
result_triple_list = __apply_extraction(config, amrld_file_path)
# -- Final Ontology Generation (factoid_graph)
logger.info('\n === Final Ontology Generation === ')
factoid_graph = __generate_final_ontology(result_triple_list)
ontology_turtle_string = __serialize_factoid_graph(config, factoid_graph, out_file_path)
# -- Done
logger.info('\n === Done === ')
if config.technical_dir_path is not None:
log_file_name = 'tenet.log'
dest_file_path = f'{config.technical_dir_path}{log_file_name}'
shutil.copy(log_file_name, dest_file_path)
return ontology_turtle_string
@timed
def generate_odrl_from_amrld_dir(
amrld_dir_path, onto_prefix=None, out_file_path=None,
technical_dir_path=None):
"""
Method to create an ontology (as Turtle String) from a transduction
analysis of an AMRLD file.
Parameters
----------
amrld_file_path: a path to an AMR-LD Turtle File.
onto_prefix: the target ontology prefix if defined (if not defined a prefix based on the amrld filename is used).
out_file_path: a file path where the ODRL statement is written if defined (the function still outputs the string).
technical_dir_path: a dir path where some technical and log files are written if defined.
Returns
-------
Dictionary [filename -> Ontology Turtle String].
Complete Ontology Turtle String (synthesis of all ontology)
"""
logger.info('[TENET] Extraction Processing')
# -- Process Initialization
logger.info('\n === Process Initialization === ')
__set_context()
if onto_prefix is None: onto_prefix = 'DefaultId'
config = __set_config(ODRL_CONFIG_FILE_PATH,
'amr', amrld_dir_path, onto_prefix,
out_file_path, technical_dir_path) out_file_path, technical_dir_path)
assert os.path.exists(amrld_dir_path), f'input directory does not exists ({amrld_dir_path})' assert os.path.exists(amrld_dir_path), f'input directory does not exists ({amrld_dir_path})'
__count_number_of_graph(config) __count_number_of_graph(config)
......
...@@ -7,8 +7,8 @@ ...@@ -7,8 +7,8 @@
/> />
<cts <cts
amr_cts_ref = "amr_scheme_clara_1" amr_cts_ref = "odrl_amr_scheme_1"
unl_cts_ref = "unl_scheme_1" unl_cts_ref = "None"
/> />
<directory <directory
......
<?xml version="1.0" encoding="UTF-8"?>
<config>
<base
process_level = "sentence"
source_type = "amr"
/>
<cts
amr_cts_ref = "owl_amr_scheme_1"
unl_cts_ref = "owl_unl_scheme_1"
/>
<directory
base_dir = "./"
structure = "structure/"
cts = "scheme/"
target_frame = "../input/targetFrameStructure/"
amr_input_documents = "../input/amrDocuments/"
unl_input_documents = "../input/unlDocuments/"
output = "../output/"
/>
<file
amr_input_data_schema = "amr-rdf-schema"
unl_input_data_schema = "unl-rdf-schema"
semantic_net_schema = "odrl-snet-schema"
config_param = "config-parameters"
/>
<reference
default_base_uri = "https://tenet.tetras-libre.fr/working"
default_ontology_suffix = "-ontology.ttl"
default_ontology_seed_suffix = "-ontology-seed.ttl"
/>
<output
ontology_namespace = "https://tenet.tetras-libre.fr/base-ontology/"
/>
</config>
File moved
This diff is collapsed.
import os import os
import sys import sys
LIB_PATH = f'{os.path.dirname(os.path.abspath(__file__))}/..' LIB_PATH = f'{os.path.dirname(os.path.abspath(__file__))}/../..'
print(f'Test Context: {LIB_PATH}') print(f'Test Context: {LIB_PATH}')
sys.path.insert(0, os.path.abspath(LIB_PATH)) sys.path.insert(0, os.path.abspath(LIB_PATH))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment