diff --git a/config.xml b/config.xml new file mode 100644 index 0000000000000000000000000000000000000000..66e9dfbace22b9017050ea5b7907c026f5ced6e1 --- /dev/null +++ b/config.xml @@ -0,0 +1,25 @@ +<?xml version="1.0" encoding="UTF-8"?> +<config> + + <directory + config="config/" + frame="frame/" + corpus="corpus/" + cts="cts/" + output="output/" + /> + + <file + schema = "unl-rdf-schema.ttl" + semantic_net = "semantic-net.ttl" + dash = "dash-data-shapes.ttl" + config_param = "config-parameters.ttl" + cts = "transduction-schemes.ttl" + /> + + <reference + default_base_uri = "https://unsel.tetras-libre.fr/tenet/working" + default_ontology_suffix = "-ontology.ttl" + /> + +</config> diff --git a/create_graph.py b/create_graph.py new file mode 100644 index 0000000000000000000000000000000000000000..a64fb1624aed965281825f3805ea2eea1a8ee4fb --- /dev/null +++ b/create_graph.py @@ -0,0 +1,64 @@ +#!/usr/bin/python3.5 +# -*-coding:Utf-8 -* + +#============================================================================== +# TENET: crea +#------------------------------------------------------------------------------ +# Script to prepare the work structure for extraction process +#============================================================================== + +#============================================================================== +# Importing required modules +#============================================================================== + +from lib import structure + + +#============================================================================== +# Parameters +#============================================================================== + + +# Dev Tests +base_uri = "https://unsel.tetras-libre.fr/tenet/working" +corpus_40 = "CCTP-SRSA-IP-20210831/" +req_100 = "CCTP-SRSA-IP-20210831-R100/" +req_200 = "CCTP-SRSA-IP-20210831-R200/" +req_300 = "CCTP-SRSA-IP-20210831-R300/" +req_1100 = "CCTP-SRSA-IP-20210831-R1100/" +req_13900 = "CCTP-SRSA-IP-20210831-R13900/" +corpus_comp = "COMP/" +req_f1 = "COMP/F1/" +req_34 = "COMP/34/" +req_50 = "COMP/50/" +corpus_ERTMS = "ERTMS/" +corpus_PEV = "PEV-RSE-Approach/" + + +#============================================================================== +# Process +#============================================================================== + +target_ref = "system" +#target_ref = "environment" + +#structure.create_extraction_graph(corpus_comp, 'COMP-01', target_ref) +#structure.create_extraction_graph(req_f1, 'C-F1', target_ref) +#structure.create_extraction_graph(req_50, 'C-50', target_ref) +#structure.create_extraction_graph(req_34, 'C-34', target_ref) + +structure.create_extraction_graph(req_13900, 'R13900', target_ref) +structure.create_extraction_graph(req_100, 'R100f', target_ref) +#structure.create_extraction_graph(req_200, 'R200f', target_ref) +#structure.create_extraction_graph(req_300, 'R300f', target_ref) +#structure.create_extraction_graph(req_1100, 'R1100f', target_ref) + +#structure.create_extraction_graph(corpus_40, 'Corpus-CCTP-40f', target_ref) + +#structure.create_extraction_graph(corpus_ERTMS, 'Corpus-ERTMS', target_ref) +#structure.create_extraction_graph(corpus_PEV, 'Corpus-PEV', target_ref) + + + + + diff --git a/lib/config.py b/lib/config.py new file mode 100644 index 0000000000000000000000000000000000000000..852c309b5cced2dd647621149a7b1fb86879b5cd --- /dev/null +++ b/lib/config.py @@ -0,0 +1,90 @@ +#!/usr/bin/python3.5 +# -*-coding:Utf-8 -* + +#============================================================================== +# TENET: config +#------------------------------------------------------------------------------ +# Class grouping the configuration data +#============================================================================== + +#============================================================================== +# Importing required modules +#============================================================================== + +from lxml import etree + + +#============================================================================== +# Default Values +#============================================================================== + +# Dev Tests +req_100 = "CCTP-SRSA-IP-20210831-R100/" +req_200 = "CCTP-SRSA-IP-20210831-R200/" +req_300 = "CCTP-SRSA-IP-20210831-R300/" +req_1100 = "CCTP-SRSA-IP-20210831-R1100/" +corpus_40 = "CCTP-SRSA-IP-20210831/" +corpus_ERTMS = "ERTMS/" +corpus_PEV = "PEV-RSE-Approach/" + + +#============================================================================== +# Class +#============================================================================== + +class Config: + """Configuration data for Tenet process """ + + + def __init__(self, config_file, source_corpus, source_ref, target_ref): + + # -- Config XML Tree + config_tree = etree.parse(config_file) + + # -- Source and Target + self.source_corpus = source_corpus + self.source_ref = source_ref + self.target_ref = target_ref + + # -- Working directories + c_dir = config_tree.xpath("directory")[0] + self.config_dir = c_dir.get("config") + self.frame_dir = c_dir.get("frame") + self.corpus_dir = c_dir.get("corpus") + self.cts_dir = c_dir.get("cts") + self.output_dir = c_dir.get("output") + + # -- Config File Definition + c_file = config_tree.xpath("file")[0] + self.schema_file = self.config_dir + c_file.get("schema") + self.semantic_net_file = self.config_dir + c_file.get("semantic_net") + self.dash_file = self.config_dir + c_file.get("dash") + self.config_param_file = self.config_dir + c_file.get("config_param") + self.cts_file = self.config_dir + c_file.get("cts") + + # -- Reference + c_ref = config_tree.xpath("reference")[0] + self.base_uri = c_ref.get("default_base_uri") + self.onto_suffix = c_ref.get("default_ontology_suffix") + + # -- Source File Definition + self.source_sentence_files = self.corpus_dir + self.source_corpus + self.source_sentence_files += '**/*.ttl' + + # -- Target File Definition + self.frame_ontology_file = self.frame_dir + self.target_ref + self.frame_ontology_file += self.onto_suffix + self.output_file = self.output_dir + self.source_ref + ".ttl" + + + # TODO: fonction to print + + +#============================================================================== +# Test +#============================================================================== + +if __name__ == '__main__': + default_config_file = "../config.xml" + config = Config(default_config_file, req_100, "R100", "system") + print(config.__dict__) \ No newline at end of file diff --git a/lib/structure.py b/lib/structure.py new file mode 100644 index 0000000000000000000000000000000000000000..bb10eaa08d9a93af8b5ac38c33bb1e1764fc5976 --- /dev/null +++ b/lib/structure.py @@ -0,0 +1,143 @@ +#!/usr/bin/python3.5 +# -*-coding:Utf-8 -* + +#============================================================================== +# TENET: structure +#------------------------------------------------------------------------------ +# Work structure for extraction processing. +#============================================================================== + +#============================================================================== +# Importing required modules +#============================================================================== + +import glob +from rdflib import Graph +from lib.config import Config + + +#============================================================================== +# Parameters +#============================================================================== + +# Config File +config_file = "config.xml" + + +#============================================================================== +# Graph Loading +#============================================================================== + +def load_config(config, work_graph): + print("-- Configuration Loading:") + + work_graph.parse(config.schema_file) + print("----- RDF Schema (" + str(len(work_graph)) + ")") + + work_graph.parse(config.semantic_net_file) + print("----- Semantic Net Definition (" + str(len(work_graph)) + ")") + + work_graph.parse(config.dash_file) + print("----- Data Shapes Dash (" + str(len(work_graph)) + ")") + + work_graph.parse(config.config_param_file) + print("----- Config Parameter Definition (" + str(len(work_graph)) + ")") + + +def load_cts(config, work_graph): + print("-- CTS Loading:") + + work_graph.parse(config.cts_file) + print("----- All Schemes (" + str(len(work_graph)) + ")") + + +def load_frame(config, work_graph): + print("-- Frame Ontology Loading:") + + work_graph.parse(config.frame_ontology_file) + print("----- System Frame Ontology (" + str(len(work_graph)) + ")") + + +def load_sentences(config, work_graph): + print("-- Sentence Loading:") + + for file_ref in glob.glob(config.source_sentence_files, recursive = True): + work_graph.parse(file_ref) + print("----- " + file_ref + " (" + str(len(work_graph)) + ")") + + + +#============================================================================== +# Result (export) +#============================================================================== + +def export_result(config, work_graph, export_file): + print("-- Export result as turtle: " + export_file) + + work_graph.serialize(destination=export_file, + base=config.base_uri + '/' + config.source_ref, + format='turtle') + + +def finalize_export_file(export_file): + """ finalize the export file by adding some useful prefixes """ + + with open(export_file, "rt") as file: + x = file.read() + + with open(export_file, "wt") as file: + x = x.replace( + "@prefix sys: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/> .", + """ + @prefix sys: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/> . + @prefix sys-Event: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/Event#> . + @prefix sys-State_Property: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/State_Property#> . + @prefix sys-abstract_thing: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/abstract_thing#> . + @prefix sys-action_verb: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/action_verb#> . + @prefix sys-agent: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/agent#> . + @prefix sys-attributive_verb: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/attributive_verb#> . + @prefix sys-component: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/component#> . + @prefix sys-message: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/message#> . + @prefix sys-place: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/place#> . + @prefix owl: <http://www.w3.org/2002/07/owl#> . + @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . + @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . + @prefix sys-relation: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/relation/> . + @prefix xsd: <http://www.w3.org/2001/XMLSchema#> . + """) + file.write(x) + + +#============================================================================== +# Main Function +#============================================================================== + +def create_extraction_graph(corpus, source_ref, target_ref): + try: + print("[Tenet] Prepare work data from corpus " + corpus) + + print("\n" + "- Config Loading") + config = Config(config_file, corpus, source_ref, target_ref) + print(config.__dict__) + + print("\n" + "- Graph Initialization") + work_graph = Graph() + load_config(config, work_graph) + load_frame(config, work_graph) + + print("\n" + "- Preparation of Transduction Process") + load_cts(config, work_graph) + + print("\n" + "- Data Source Imports") + load_sentences(config, work_graph) + + print("\n" + "- Result") + export_file = config.output_file + export_result(config, work_graph, export_file) + finalize_export_file(export_file) + + print() + + except: + print("!!! An exception occurred !!!") + diff --git a/prepare_work_data.py b/prepare_work_data.py deleted file mode 100644 index 9e441f86a5619d4f96abce8792996f833552e692..0000000000000000000000000000000000000000 --- a/prepare_work_data.py +++ /dev/null @@ -1,237 +0,0 @@ -#!/usr/bin/python3.5 -# -*-coding:Utf-8 -* - -#============================================================================== -# TENET: prepare work data -#------------------------------------------------------------------------------ -# Prepare work data for extraction processing. -#============================================================================== - -#============================================================================== -# Importing required modules -#============================================================================== - -import glob -from rdflib import Graph, Namespace, URIRef - - -#============================================================================== -# Parameters -#============================================================================== - -# Working directories -CONFIG_DIR = "config/" -FRAME_DIR = "frame/" -CORPUS_DIR = "corpus/" -CTS_DIR = "cts/" -OUTPUT_DIR = "output/" - -# Config Definition -TURTLE_SUFFIX = ".ttl" -ONTO_FILE = "-ontology" + TURTLE_SUFFIX -dash_file = "dash-data-shapes.ttl" # data from "http://datashapes.org/dash.ttl" -schema_file = "unl-rdf-schema.ttl" -semantic_net_file = "semantic-net.ttl" -cts_file = "transduction-schemes.ttl" -c_param_file = "config-parameters.ttl" - -# Dev Tests -corpus_40 = "CCTP-SRSA-IP-20210831/" -base_uri = "https://unsel.tetras-libre.fr/tenet/working" -req_100 = "CCTP-SRSA-IP-20210831-R100/" -req_200 = "CCTP-SRSA-IP-20210831-R200/" -req_300 = "CCTP-SRSA-IP-20210831-R300/" -req_1100 = "CCTP-SRSA-IP-20210831-R1100/" -corpus_comp = "COMP/" -req_f1 = "COMP/F1/" -req_34 = "COMP/34/" -req_50 = "COMP/50/" -corpus_ERTMS = "ERTMS/" -corpus_PEV = "PEV-RSE-Approach/" - - -#============================================================================== -# Utility -#============================================================================== - -def read_query(cts_group, query_ref): - query_file = CTS_DIR + cts_group + str(query_ref) + ".cts" - with open(query_file, "r") as file: - return file.read() - - -#============================================================================== -# Graph Initialization -#============================================================================== - -def load_config(work_graph): - print("-- Configuration Loading:") - - file_ref = CONFIG_DIR + schema_file - work_graph.parse(file_ref) - print("----- RDF Schema (" + str(len(work_graph)) + ")") - - file_ref = CONFIG_DIR + semantic_net_file - work_graph.parse(file_ref) - print("----- Semantic Net Definition (" + str(len(work_graph)) + ")") - - file_ref = CONFIG_DIR + dash_file - work_graph.parse(file_ref) - print("----- Data Shapes Dash (" + str(len(work_graph)) + ")") - - file_ref = CONFIG_DIR + c_param_file - work_graph.parse(file_ref) - print("----- Config Parameter Definition (" + str(len(work_graph)) + ")") - - -def load_frame(work_graph, target_system): - print("-- Frame Ontology Loading:") - - # -- old --- file_ref = FRAME_DIR + req_onto_file - # -- old --- work_graph.parse(file_ref) - # -- old --- print("----- Requirement Frame Ontology (" + str(len(work_graph)) + ")") - - file_ref = FRAME_DIR + target_system + ONTO_FILE - work_graph.parse(file_ref) - print("----- System Frame Ontology (" + str(len(work_graph)) + ")") - - # -- old --- file_ref = FRAME_DIR + f_param_file - # -- old --- work_graph.parse(file_ref) - # -- old --- print("----- Ontology Parameters (" + str(len(work_graph)) + ")") - - -#def define_namespace(work_graph): -# print("-- Namespace Definition:") -# -# sys_uri = "https://unsel.tetras-libre.fr/tenet/frame/system-ontology/" -# concept_classes = ["agent"] -# for concept in concept_classes: -# new_prefix = "sys-" + concept -# new_uri = URIRef(sys_uri + concept + '#') -# work_graph.namespace_manager.bind(new_prefix, new_uri) -# print("----- " + new_prefix + ": " + new_uri) -# print(list(work_graph.namespace_manager.namespaces())) - - -def load_sentences(work_graph, corpus): - print("-- Sentence Loading:") - - target_ref = CORPUS_DIR + corpus + '**/*.ttl' - for file_ref in glob.glob(target_ref, recursive = True): - work_graph.parse(file_ref) - print("----- " + file_ref + " (" + str(len(work_graph)) + ")") - - - -#============================================================================== -# CT Schemes for Transduction Process -#============================================================================== - -def load_cts(work_graph): - print("-- CTS Loading:") - - file_ref = CONFIG_DIR + cts_file - work_graph.parse(file_ref) - print("----- All Schemes (" + str(len(work_graph)) + ")") - - - -#============================================================================== -# Result (export) -#============================================================================== - -def export_result(work_graph, export_ref, export_file): - print("-- Export result as turtle: " + export_file) - - work_graph.serialize(destination=export_file, - base=base_uri + '/' + export_ref, - format='turtle') - - -def finalize_export_file(export_file): - """ finalize the export file by adding some useful prefixes """ - - with open(export_file, "rt") as file: - x = file.read() - - with open(export_file, "wt") as file: - x = x.replace( - "@prefix sys: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/> .", - """ - @prefix sys: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/> . - @prefix sys-class: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/class/> . - @prefix sys-property: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/property/> . - @prefix sys-relation: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/relation/> . - @prefix sys-Event: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/Event#> . - @prefix sys-event: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/eventObjectProperty#> . - @prefix sys-State_Property: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/State_Property#> . - @prefix sys-stateProperty: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/statePropertyObjectProperty#> . - @prefix sys-abstract_thing: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/abstract_thing#> . - @prefix sys-action_verb: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/action_verb#> . - @prefix sys-agent: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/agent#> . - @prefix sys-attributive_verb: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/attributive_verb#> . - @prefix sys-component: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/component#> . - @prefix sys-message: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/message#> . - @prefix sys-place: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/place#> . - """) - file.write(x) - -#============================================================================== -# Main Function -#============================================================================== - -def run(corpus, source_ref, target_ref): - try: - print("[Tenet] Prepare work data from corpus " + corpus) - - print("\n" + "- Graph Initialization") - work_graph = Graph() - load_config(work_graph) - load_frame(work_graph, target_ref) - #define_namespace(work_graph) - - print("\n" + "- Preparation of Transduction Process") - load_cts(work_graph) - - print("\n" + "- Data Source Imports") - load_sentences(work_graph, corpus) - - print("\n" + "- Result") - output_file = OUTPUT_DIR + source_ref + TURTLE_SUFFIX - export_result(work_graph, source_ref, output_file) - finalize_export_file(output_file) - - print() - - except: - print("!!! An exception occurred !!!") - - -#============================================================================== -# Execution -#============================================================================== - -if __name__ == '__main__': - - target_ref = "system" - #target_ref = "environment" - - run(corpus_comp, 'COMP-01', target_ref) - run(req_f1, 'C-F1', target_ref) - run(req_50, 'C-50', target_ref) - run(req_34, 'C-34', target_ref) - - #run(req_100, 'R100f', target_ref) - #run(req_200, 'R200f', target_ref) - #run(req_300, 'R300f', target_ref) - #run(req_1100, 'R1100f', target_ref) - - #run(corpus_40, 'Corpus-CCTP-40f', target_ref) - - #run(corpus_ERTMS, 'Corpus-ERTMS', target_ref) - #run(corpus_PEV, 'Corpus-PEV', target_ref) - - - - -