Skip to content
Snippets Groups Projects
Commit 8e8ea506 authored by Aurélien Lamercerie's avatar Aurélien Lamercerie
Browse files

Add lib (with the modules config and structure) and adapt the main script (create_graph) to use it

parent acfd1e91
Branches
Tags 0.2.1
No related merge requests found
<?xml version="1.0" encoding="UTF-8"?>
<config>
<directory
config="config/"
frame="frame/"
corpus="corpus/"
cts="cts/"
output="output/"
/>
<file
schema = "unl-rdf-schema.ttl"
semantic_net = "semantic-net.ttl"
dash = "dash-data-shapes.ttl"
config_param = "config-parameters.ttl"
cts = "transduction-schemes.ttl"
/>
<reference
default_base_uri = "https://unsel.tetras-libre.fr/tenet/working"
default_ontology_suffix = "-ontology.ttl"
/>
</config>
#!/usr/bin/python3.5
# -*-coding:Utf-8 -*
#==============================================================================
# TENET: crea
#------------------------------------------------------------------------------
# Script to prepare the work structure for extraction process
#==============================================================================
#==============================================================================
# Importing required modules
#==============================================================================
from lib import structure
#==============================================================================
# Parameters
#==============================================================================
# Dev Tests
base_uri = "https://unsel.tetras-libre.fr/tenet/working"
corpus_40 = "CCTP-SRSA-IP-20210831/"
req_100 = "CCTP-SRSA-IP-20210831-R100/"
req_200 = "CCTP-SRSA-IP-20210831-R200/"
req_300 = "CCTP-SRSA-IP-20210831-R300/"
req_1100 = "CCTP-SRSA-IP-20210831-R1100/"
req_13900 = "CCTP-SRSA-IP-20210831-R13900/"
corpus_comp = "COMP/"
req_f1 = "COMP/F1/"
req_34 = "COMP/34/"
req_50 = "COMP/50/"
corpus_ERTMS = "ERTMS/"
corpus_PEV = "PEV-RSE-Approach/"
#==============================================================================
# Process
#==============================================================================
target_ref = "system"
#target_ref = "environment"
#structure.create_extraction_graph(corpus_comp, 'COMP-01', target_ref)
#structure.create_extraction_graph(req_f1, 'C-F1', target_ref)
#structure.create_extraction_graph(req_50, 'C-50', target_ref)
#structure.create_extraction_graph(req_34, 'C-34', target_ref)
structure.create_extraction_graph(req_13900, 'R13900', target_ref)
structure.create_extraction_graph(req_100, 'R100f', target_ref)
#structure.create_extraction_graph(req_200, 'R200f', target_ref)
#structure.create_extraction_graph(req_300, 'R300f', target_ref)
#structure.create_extraction_graph(req_1100, 'R1100f', target_ref)
#structure.create_extraction_graph(corpus_40, 'Corpus-CCTP-40f', target_ref)
#structure.create_extraction_graph(corpus_ERTMS, 'Corpus-ERTMS', target_ref)
#structure.create_extraction_graph(corpus_PEV, 'Corpus-PEV', target_ref)
#!/usr/bin/python3.5
# -*-coding:Utf-8 -*
#==============================================================================
# TENET: config
#------------------------------------------------------------------------------
# Class grouping the configuration data
#==============================================================================
#==============================================================================
# Importing required modules
#==============================================================================
from lxml import etree
#==============================================================================
# Default Values
#==============================================================================
# Dev Tests
req_100 = "CCTP-SRSA-IP-20210831-R100/"
req_200 = "CCTP-SRSA-IP-20210831-R200/"
req_300 = "CCTP-SRSA-IP-20210831-R300/"
req_1100 = "CCTP-SRSA-IP-20210831-R1100/"
corpus_40 = "CCTP-SRSA-IP-20210831/"
corpus_ERTMS = "ERTMS/"
corpus_PEV = "PEV-RSE-Approach/"
#==============================================================================
# Class
#==============================================================================
class Config:
"""Configuration data for Tenet process """
def __init__(self, config_file, source_corpus, source_ref, target_ref):
# -- Config XML Tree
config_tree = etree.parse(config_file)
# -- Source and Target
self.source_corpus = source_corpus
self.source_ref = source_ref
self.target_ref = target_ref
# -- Working directories
c_dir = config_tree.xpath("directory")[0]
self.config_dir = c_dir.get("config")
self.frame_dir = c_dir.get("frame")
self.corpus_dir = c_dir.get("corpus")
self.cts_dir = c_dir.get("cts")
self.output_dir = c_dir.get("output")
# -- Config File Definition
c_file = config_tree.xpath("file")[0]
self.schema_file = self.config_dir + c_file.get("schema")
self.semantic_net_file = self.config_dir + c_file.get("semantic_net")
self.dash_file = self.config_dir + c_file.get("dash")
self.config_param_file = self.config_dir + c_file.get("config_param")
self.cts_file = self.config_dir + c_file.get("cts")
# -- Reference
c_ref = config_tree.xpath("reference")[0]
self.base_uri = c_ref.get("default_base_uri")
self.onto_suffix = c_ref.get("default_ontology_suffix")
# -- Source File Definition
self.source_sentence_files = self.corpus_dir + self.source_corpus
self.source_sentence_files += '**/*.ttl'
# -- Target File Definition
self.frame_ontology_file = self.frame_dir + self.target_ref
self.frame_ontology_file += self.onto_suffix
self.output_file = self.output_dir + self.source_ref + ".ttl"
# TODO: fonction to print
#==============================================================================
# Test
#==============================================================================
if __name__ == '__main__':
default_config_file = "../config.xml"
config = Config(default_config_file, req_100, "R100", "system")
print(config.__dict__)
\ No newline at end of file
......@@ -2,9 +2,9 @@
# -*-coding:Utf-8 -*
#==============================================================================
# TENET: prepare work data
# TENET: structure
#------------------------------------------------------------------------------
# Prepare work data for extraction processing.
# Work structure for extraction processing.
#==============================================================================
#==============================================================================
......@@ -12,139 +12,70 @@
#==============================================================================
import glob
from rdflib import Graph, Namespace, URIRef
from rdflib import Graph
from lib.config import Config
#==============================================================================
# Parameters
#==============================================================================
# Working directories
CONFIG_DIR = "config/"
FRAME_DIR = "frame/"
CORPUS_DIR = "corpus/"
CTS_DIR = "cts/"
OUTPUT_DIR = "output/"
# Config Definition
TURTLE_SUFFIX = ".ttl"
ONTO_FILE = "-ontology" + TURTLE_SUFFIX
dash_file = "dash-data-shapes.ttl" # data from "http://datashapes.org/dash.ttl"
schema_file = "unl-rdf-schema.ttl"
semantic_net_file = "semantic-net.ttl"
cts_file = "transduction-schemes.ttl"
c_param_file = "config-parameters.ttl"
# Dev Tests
corpus_40 = "CCTP-SRSA-IP-20210831/"
base_uri = "https://unsel.tetras-libre.fr/tenet/working"
req_100 = "CCTP-SRSA-IP-20210831-R100/"
req_200 = "CCTP-SRSA-IP-20210831-R200/"
req_300 = "CCTP-SRSA-IP-20210831-R300/"
req_1100 = "CCTP-SRSA-IP-20210831-R1100/"
corpus_comp = "COMP/"
req_f1 = "COMP/F1/"
req_34 = "COMP/34/"
req_50 = "COMP/50/"
corpus_ERTMS = "ERTMS/"
corpus_PEV = "PEV-RSE-Approach/"
# Config File
config_file = "config.xml"
#==============================================================================
# Utility
# Graph Loading
#==============================================================================
def read_query(cts_group, query_ref):
query_file = CTS_DIR + cts_group + str(query_ref) + ".cts"
with open(query_file, "r") as file:
return file.read()
#==============================================================================
# Graph Initialization
#==============================================================================
def load_config(work_graph):
def load_config(config, work_graph):
print("-- Configuration Loading:")
file_ref = CONFIG_DIR + schema_file
work_graph.parse(file_ref)
work_graph.parse(config.schema_file)
print("----- RDF Schema (" + str(len(work_graph)) + ")")
file_ref = CONFIG_DIR + semantic_net_file
work_graph.parse(file_ref)
work_graph.parse(config.semantic_net_file)
print("----- Semantic Net Definition (" + str(len(work_graph)) + ")")
file_ref = CONFIG_DIR + dash_file
work_graph.parse(file_ref)
work_graph.parse(config.dash_file)
print("----- Data Shapes Dash (" + str(len(work_graph)) + ")")
file_ref = CONFIG_DIR + c_param_file
work_graph.parse(file_ref)
work_graph.parse(config.config_param_file)
print("----- Config Parameter Definition (" + str(len(work_graph)) + ")")
def load_frame(work_graph, target_system):
print("-- Frame Ontology Loading:")
# -- old --- file_ref = FRAME_DIR + req_onto_file
# -- old --- work_graph.parse(file_ref)
# -- old --- print("----- Requirement Frame Ontology (" + str(len(work_graph)) + ")")
def load_cts(config, work_graph):
print("-- CTS Loading:")
file_ref = FRAME_DIR + target_system + ONTO_FILE
work_graph.parse(file_ref)
print("----- System Frame Ontology (" + str(len(work_graph)) + ")")
work_graph.parse(config.cts_file)
print("----- All Schemes (" + str(len(work_graph)) + ")")
# -- old --- file_ref = FRAME_DIR + f_param_file
# -- old --- work_graph.parse(file_ref)
# -- old --- print("----- Ontology Parameters (" + str(len(work_graph)) + ")")
def load_frame(config, work_graph):
print("-- Frame Ontology Loading:")
#def define_namespace(work_graph):
# print("-- Namespace Definition:")
#
# sys_uri = "https://unsel.tetras-libre.fr/tenet/frame/system-ontology/"
# concept_classes = ["agent"]
# for concept in concept_classes:
# new_prefix = "sys-" + concept
# new_uri = URIRef(sys_uri + concept + '#')
# work_graph.namespace_manager.bind(new_prefix, new_uri)
# print("----- " + new_prefix + ": " + new_uri)
# print(list(work_graph.namespace_manager.namespaces()))
work_graph.parse(config.frame_ontology_file)
print("----- System Frame Ontology (" + str(len(work_graph)) + ")")
def load_sentences(work_graph, corpus):
def load_sentences(config, work_graph):
print("-- Sentence Loading:")
target_ref = CORPUS_DIR + corpus + '**/*.ttl'
for file_ref in glob.glob(target_ref, recursive = True):
for file_ref in glob.glob(config.source_sentence_files, recursive = True):
work_graph.parse(file_ref)
print("----- " + file_ref + " (" + str(len(work_graph)) + ")")
#==============================================================================
# CT Schemes for Transduction Process
#==============================================================================
def load_cts(work_graph):
print("-- CTS Loading:")
file_ref = CONFIG_DIR + cts_file
work_graph.parse(file_ref)
print("----- All Schemes (" + str(len(work_graph)) + ")")
#==============================================================================
# Result (export)
#==============================================================================
def export_result(work_graph, export_ref, export_file):
def export_result(config, work_graph, export_file):
print("-- Export result as turtle: " + export_file)
work_graph.serialize(destination=export_file,
base=base_uri + '/' + export_ref,
base=config.base_uri + '/' + config.source_ref,
format='turtle')
......@@ -159,13 +90,8 @@ def finalize_export_file(export_file):
"@prefix sys: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/> .",
"""
@prefix sys: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/> .
@prefix sys-class: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/class/> .
@prefix sys-property: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/property/> .
@prefix sys-relation: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/relation/> .
@prefix sys-Event: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/Event#> .
@prefix sys-event: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/eventObjectProperty#> .
@prefix sys-State_Property: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/State_Property#> .
@prefix sys-stateProperty: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/statePropertyObjectProperty#> .
@prefix sys-abstract_thing: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/abstract_thing#> .
@prefix sys-action_verb: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/action_verb#> .
@prefix sys-agent: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/agent#> .
......@@ -173,65 +99,45 @@ def finalize_export_file(export_file):
@prefix sys-component: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/component#> .
@prefix sys-message: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/message#> .
@prefix sys-place: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/place#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix sys-relation: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/relation/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
""")
file.write(x)
#==============================================================================
# Main Function
#==============================================================================
def run(corpus, source_ref, target_ref):
def create_extraction_graph(corpus, source_ref, target_ref):
try:
print("[Tenet] Prepare work data from corpus " + corpus)
print("\n" + "- Config Loading")
config = Config(config_file, corpus, source_ref, target_ref)
print(config.__dict__)
print("\n" + "- Graph Initialization")
work_graph = Graph()
load_config(work_graph)
load_frame(work_graph, target_ref)
#define_namespace(work_graph)
load_config(config, work_graph)
load_frame(config, work_graph)
print("\n" + "- Preparation of Transduction Process")
load_cts(work_graph)
load_cts(config, work_graph)
print("\n" + "- Data Source Imports")
load_sentences(work_graph, corpus)
load_sentences(config, work_graph)
print("\n" + "- Result")
output_file = OUTPUT_DIR + source_ref + TURTLE_SUFFIX
export_result(work_graph, source_ref, output_file)
finalize_export_file(output_file)
export_file = config.output_file
export_result(config, work_graph, export_file)
finalize_export_file(export_file)
print()
except:
print("!!! An exception occurred !!!")
#==============================================================================
# Execution
#==============================================================================
if __name__ == '__main__':
target_ref = "system"
#target_ref = "environment"
run(corpus_comp, 'COMP-01', target_ref)
run(req_f1, 'C-F1', target_ref)
run(req_50, 'C-50', target_ref)
run(req_34, 'C-34', target_ref)
#run(req_100, 'R100f', target_ref)
#run(req_200, 'R200f', target_ref)
#run(req_300, 'R300f', target_ref)
#run(req_1100, 'R1100f', target_ref)
#run(corpus_40, 'Corpus-CCTP-40f', target_ref)
#run(corpus_ERTMS, 'Corpus-ERTMS', target_ref)
#run(corpus_PEV, 'Corpus-PEV', target_ref)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment