diff --git a/extract.py b/extract.py index aa78b0b4c2d887d38948f1d5153029b207e96666..9801d3f41b8a26b36e10ce8f4e41b41cb58dd49e 100644 --- a/extract.py +++ b/extract.py @@ -12,6 +12,7 @@ # Importing required modules #============================================================================== +import glob from rdflib import Graph @@ -26,16 +27,19 @@ CORPUS_DIR = "corpus/" CTS_DIR = "cts/" # Config Definition +dash_file = "http://datashapes.org/dash.ttl" schema_file = "unl-rdf-schema.ttl" semantic_net_file = "semantic-net.ttl" +cts_file = "transduction-schemes.ttl" c_param_file = "config-parameters.ttl" req_onto_file = "requirement-ontology.ttl" sys_onto_file = "system-ontology.ttl" f_param_file = "ontology-parameters.ttl" # Dev Tests -corpus = "CCTP-SRSA-IP-20210625/" -req_300_file = "SRSA-IP_STB_PHON_00300.ttl" +base_uri = "https://unsel.tetras-libre.fr/tenet/working" +corpus = "CCTP-SRSA-IP-20210831/" +req_file = "SRSA-IP_STB_PHON_00100.ttl" #============================================================================== @@ -63,6 +67,10 @@ def load_config(g): g.parse(file_ref) print("----- Semantic Net Definition (" + str(len(g)) + ")") + file_ref = dash_file + g.parse(file_ref) + print("----- Data Shapes Dash (" + str(len(g)) + ")") + file_ref = CONFIG_DIR + c_param_file g.parse(file_ref) print("----- Config Parameter Definition (" + str(len(g)) + ")") @@ -84,120 +92,40 @@ def load_frame(g): print("----- Ontology Parameters (" + str(len(g)) + ")") -def load_sentence(g, sentence_file): +def load_sentences(g): print("-- Sentence Loading:") - file_ref = CORPUS_DIR + corpus + sentence_file - g.parse(file_ref) - print("----- " + sentence_file + " (" + str(len(g)) + ")") - - -#============================================================================== -# Graph Preprocessing -#============================================================================== - -def bypass_reification(g): - print("-- Bypass Reification:") - cts_group = "preprocessing/reification/" - - queryString = read_query(cts_group, 201) - g.update(queryString) - print("----- update to extend UNL relation (" + str(len(g)) + ")") - - queryString = read_query(cts_group, 202) - g.update(queryString) - print("----- update to bypass scopes (" + str(len(g)) + ")") + #file_ref = CORPUS_DIR + corpus + sentence_file + target_ref = CORPUS_DIR + corpus + '**/*.ttl' + for file_ref in glob.glob(target_ref, recursive = True): + g.parse(file_ref) + print("----- " + file_ref + " (" + str(len(g)) + ")") #============================================================================== -# Net Extension (CTS Application) -#============================================================================== - -def extract_entity(g): - print("-- Net Extension to Extract Entity:") - cts_group = "net_extension/entity_extraction/" - - queryString = read_query(cts_group, 301) - g.update(queryString) - print("----- update to create Entity net (" + str(len(g)) + ")") - print("-------- net instances: ") - for r in g.query(read_query("selection/", 101)): - print("-------- " + f"{r.net} ") - - queryString = read_query(cts_group, 302) - g.update(queryString) - print("----- update to create Entity net (" + str(len(g)) + ")") - print("-------- net instances: ") - for r in g.query(read_query("selection/", 101)): - print("-------- " + f"{r.net} ") - - queryString = read_query(cts_group, 303) - g.update(queryString) - print("----- update to create Entity net (" + str(len(g)) + ")") - print("-------- net instances: ") - for r in g.query(read_query("selection/", 101)): - print("-------- " + f"{r.net} ") - - queryString = read_query(cts_group, 304) - g.update(queryString) - print("----- update to create Entity net (" + str(len(g)) + ")") - print("-------- net instances: ") - for r in g.query(read_query("selection/", 101)): - print("-------- " + f"{r.net} ") - - queryString = read_query(cts_group, 305) - g.update(queryString) - print("----- update to create Entity net (" + str(len(g)) + ")") - print("-------- net instances: ") - for r in g.query(read_query("selection/", 101)): - print("-------- " + f"{r.net} ") - - queryString = read_query(cts_group, 306) - g.update(queryString) - print("----- update to create Entity net (" + str(len(g)) + ")") - print("-------- net instances: ") - for r in g.query(read_query("selection/", 101)): - print("-------- " + f"{r.net} ") - - queryString = read_query(cts_group, 307) - g.update(queryString) - print("----- update to create Entity net (" + str(len(g)) + ")") - print("-------- net instances: ") - for r in g.query(read_query("selection/", 101)): - print("-------- " + f"{r.net} ") - - queryString = read_query(cts_group, 308) - g.update(queryString) - print("----- update to create Entity net (" + str(len(g)) + ")") - print("-------- net instances: ") - for r in g.query(read_query("selection/", 101)): - print("-------- " + f"{r.net} ") +# CT Schemes for Transduction Process +#============================================================================== + +def load_cts(g): + print("-- CTS Loading:") + + file_ref = CONFIG_DIR + cts_file + g.parse(file_ref) + print("----- All Schemes (" + str(len(g)) + ")") #============================================================================== -# Ontology Generation +# Result (export) #============================================================================== + +def export_result(g): + export_file = 'output.ttl' + print("-- Export result as turtle: " + export_file) + g.serialize(destination=export_file, base=base_uri, format='turtle') + -def classify_entity(g): - print("-- Enity Classification (From Net Ontology to System Ontology):") - cts_group = "generation/classification/" - - queryString = read_query(cts_group, 401) - g.update(queryString) - print("----- update to create Entity net (" + str(len(g)) + ")") - print("-------- entity classes: ") - for r in g.query(read_query("selection/", 102)): - print("-------- " + f"{r.entity}") - - queryString = read_query(cts_group, 402) - g.update(queryString) - print("----- update to create Entity net (" + str(len(g)) + ")") - print("-------- entity classes: ") - for r in g.query(read_query("selection/", 102)): - print("-------- " + f"{r.entity}") - #============================================================================== # Main Function @@ -211,16 +139,15 @@ def run(): g = Graph() load_config(g) load_frame(g) - load_sentence(g, req_300_file) - print("\n" + "- Graph Preprocessing") - bypass_reification(g) + print("\n" + "- Preparation of Transduction Process") + load_cts(g) - print("\n" + "- Net Extension (CTS Application)") - extract_entity(g) + print("\n" + "- Data Source Imports") + load_sentences(g) - print("\n" + "- Ontology Generation ") - classify_entity(g) + print("\n" + "- Result") + export_result(g) except: print("!!! An exception occurred !!!")