From 52e1f8014b5f591d6bfa24131a8783566c5414a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Lamercerie?= <aurelien.lamercerie@laposte.net> Date: Wed, 25 Aug 2021 00:10:29 +0200 Subject: [PATCH] Add command EXTRACT --- CHANGELOG | 18 +-- extract.py | 240 ++++++++++++++++++++++++++++++++++ frame/ontology-parameters.ttl | 2 +- 3 files changed, 248 insertions(+), 12 deletions(-) create mode 100644 extract.py diff --git a/CHANGELOG b/CHANGELOG index 901fed68..376d8120 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -5,22 +5,18 @@ All notable changes to this project will be documented in this file. ------------------------------------------------------------------------------- -## [0.2.0] - 2021-xx-xx TODO +## [0.1.1] - 2021-08-24 + ### Added -- Implementing transduction schemes as a SPARQL query (using sparql-wrapper) -- Saving queries with XML files (cdata) -- Loading corpus as RDF ontology +- Compositionnal Trandusction Scheme (CTS) +- Command **_extract.py_** to analyse one sentence +- Init corpus ABCD (dev test) ### Changed -- TODO - -### Fix -- TODO +- ontologies in config and frame -------------------------------------------------------------------------------- - -## [0.1.0] - 2021-08-10 IN PROGRESS +## [0.1.0] - 2021-08-10 ### Added - Init project diff --git a/extract.py b/extract.py new file mode 100644 index 00000000..aa78b0b4 --- /dev/null +++ b/extract.py @@ -0,0 +1,240 @@ +#!/usr/bin/python3.5 +# -*-coding:Utf-8 -* + +#============================================================================== +# TENET: extract +#------------------------------------------------------------------------------ +# Command to extract data from corpus and generate targetted ontology +# following frame ontology +#============================================================================== + +#============================================================================== +# Importing required modules +#============================================================================== + +from rdflib import Graph + + +#============================================================================== +# Parameters +#============================================================================== + +# Working directories +CONFIG_DIR = "config/" +FRAME_DIR = "frame/" +CORPUS_DIR = "corpus/" +CTS_DIR = "cts/" + +# Config Definition +schema_file = "unl-rdf-schema.ttl" +semantic_net_file = "semantic-net.ttl" +c_param_file = "config-parameters.ttl" +req_onto_file = "requirement-ontology.ttl" +sys_onto_file = "system-ontology.ttl" +f_param_file = "ontology-parameters.ttl" + +# Dev Tests +corpus = "CCTP-SRSA-IP-20210625/" +req_300_file = "SRSA-IP_STB_PHON_00300.ttl" + + +#============================================================================== +# Utility +#============================================================================== + +def read_query(cts_group, query_ref): + query_file = CTS_DIR + cts_group + str(query_ref) + ".cts" + with open(query_file, "r") as file: + return file.read() + + +#============================================================================== +# Graph Initialization +#============================================================================== + +def load_config(g): + print("-- Configuration Loading:") + + file_ref = CONFIG_DIR + schema_file + g.parse(file_ref) + print("----- RDF Schema (" + str(len(g)) + ")") + + file_ref = CONFIG_DIR + semantic_net_file + g.parse(file_ref) + print("----- Semantic Net Definition (" + str(len(g)) + ")") + + file_ref = CONFIG_DIR + c_param_file + g.parse(file_ref) + print("----- Config Parameter Definition (" + str(len(g)) + ")") + + +def load_frame(g): + print("-- Frame Ontology Loading:") + + file_ref = FRAME_DIR + req_onto_file + g.parse(file_ref) + print("----- Requirement Frame Ontology (" + str(len(g)) + ")") + + file_ref = FRAME_DIR + sys_onto_file + g.parse(file_ref) + print("----- System Frame Ontology (" + str(len(g)) + ")") + + file_ref = FRAME_DIR + f_param_file + g.parse(file_ref) + print("----- Ontology Parameters (" + str(len(g)) + ")") + + +def load_sentence(g, sentence_file): + print("-- Sentence Loading:") + + file_ref = CORPUS_DIR + corpus + sentence_file + g.parse(file_ref) + print("----- " + sentence_file + " (" + str(len(g)) + ")") + + +#============================================================================== +# Graph Preprocessing +#============================================================================== + +def bypass_reification(g): + print("-- Bypass Reification:") + cts_group = "preprocessing/reification/" + + queryString = read_query(cts_group, 201) + g.update(queryString) + print("----- update to extend UNL relation (" + str(len(g)) + ")") + + queryString = read_query(cts_group, 202) + g.update(queryString) + print("----- update to bypass scopes (" + str(len(g)) + ")") + + + +#============================================================================== +# Net Extension (CTS Application) +#============================================================================== + +def extract_entity(g): + print("-- Net Extension to Extract Entity:") + cts_group = "net_extension/entity_extraction/" + + queryString = read_query(cts_group, 301) + g.update(queryString) + print("----- update to create Entity net (" + str(len(g)) + ")") + print("-------- net instances: ") + for r in g.query(read_query("selection/", 101)): + print("-------- " + f"{r.net} ") + + queryString = read_query(cts_group, 302) + g.update(queryString) + print("----- update to create Entity net (" + str(len(g)) + ")") + print("-------- net instances: ") + for r in g.query(read_query("selection/", 101)): + print("-------- " + f"{r.net} ") + + queryString = read_query(cts_group, 303) + g.update(queryString) + print("----- update to create Entity net (" + str(len(g)) + ")") + print("-------- net instances: ") + for r in g.query(read_query("selection/", 101)): + print("-------- " + f"{r.net} ") + + queryString = read_query(cts_group, 304) + g.update(queryString) + print("----- update to create Entity net (" + str(len(g)) + ")") + print("-------- net instances: ") + for r in g.query(read_query("selection/", 101)): + print("-------- " + f"{r.net} ") + + queryString = read_query(cts_group, 305) + g.update(queryString) + print("----- update to create Entity net (" + str(len(g)) + ")") + print("-------- net instances: ") + for r in g.query(read_query("selection/", 101)): + print("-------- " + f"{r.net} ") + + queryString = read_query(cts_group, 306) + g.update(queryString) + print("----- update to create Entity net (" + str(len(g)) + ")") + print("-------- net instances: ") + for r in g.query(read_query("selection/", 101)): + print("-------- " + f"{r.net} ") + + queryString = read_query(cts_group, 307) + g.update(queryString) + print("----- update to create Entity net (" + str(len(g)) + ")") + print("-------- net instances: ") + for r in g.query(read_query("selection/", 101)): + print("-------- " + f"{r.net} ") + + queryString = read_query(cts_group, 308) + g.update(queryString) + print("----- update to create Entity net (" + str(len(g)) + ")") + print("-------- net instances: ") + for r in g.query(read_query("selection/", 101)): + print("-------- " + f"{r.net} ") + + + +#============================================================================== +# Ontology Generation +#============================================================================== + +def classify_entity(g): + print("-- Enity Classification (From Net Ontology to System Ontology):") + cts_group = "generation/classification/" + + queryString = read_query(cts_group, 401) + g.update(queryString) + print("----- update to create Entity net (" + str(len(g)) + ")") + print("-------- entity classes: ") + for r in g.query(read_query("selection/", 102)): + print("-------- " + f"{r.entity}") + + queryString = read_query(cts_group, 402) + g.update(queryString) + print("----- update to create Entity net (" + str(len(g)) + ")") + print("-------- entity classes: ") + for r in g.query(read_query("selection/", 102)): + print("-------- " + f"{r.entity}") + + +#============================================================================== +# Main Function +#============================================================================== + +def run(): + try: + print("Tenet Processing") + + print("\n" + "- Graph Initialization") + g = Graph() + load_config(g) + load_frame(g) + load_sentence(g, req_300_file) + + print("\n" + "- Graph Preprocessing") + bypass_reification(g) + + print("\n" + "- Net Extension (CTS Application)") + extract_entity(g) + + print("\n" + "- Ontology Generation ") + classify_entity(g) + + except: + print("!!! An exception occurred !!!") + + +#============================================================================== +# Execution +#============================================================================== + +if __name__ == '__main__': + run() + + + + + + diff --git a/frame/ontology-parameters.ttl b/frame/ontology-parameters.ttl index 4d46c6de..f4c02543 100644 --- a/frame/ontology-parameters.ttl +++ b/frame/ontology-parameters.ttl @@ -18,7 +18,7 @@ fprm:Frame_Parameters fprm:System_Entity rdf:type owl:Class ; rdfs:comment "Parameters related to Entity (System Ontology)." ; - rdfs:label "entity" ; + rdfs:label "Entity" ; rdfs:subClassOf fprm:System_Ontology ; . fprm:System_Feature -- GitLab