Skip to content
Snippets Groups Projects
Commit 833256a4 authored by David Rouquet's avatar David Rouquet
Browse files

Add post edit to the OWL extractions to "propagate" the instances to subclasses

parent 60c84fc7
No related branches found
No related tags found
No related merge requests found
%% Cell type:code id:160ab8ab-091e-4b00-b63a-c3746c71c540 tags:
``` python
import panel as pn
import param
from glob import glob
import base64
import re
import requests
from subprocess import Popen, PIPE, STDOUT
import html
from cairosvg import svg2png
import graphviz
from rdflib import Graph, Namespace, URIRef
import os
from collections import OrderedDict
from urllib.request import urlopen
import uuid
import shutil
from SPARQLWrapper import SPARQLWrapper, JSON
pn.extension(comms='ipywidgets')
```
%% Cell type:code id:7b4685d7-698d-4a86-a0a4-a81d337bc9d7 tags:
``` python
#######################################################################################################
# Parameters
#######################################################################################################
shaclBase = '/opt/dashboards/tools/shacl-1.3.2/bin/'
owl2vowlPath = '/opt/dashboards/tools/owl2vowl_0.3.7/owl2vowl.jar'
storeBase = '../store/CCTP-SRSA-IP-20210831/'
extractionGraph = '/opt/dashboards/tetras-lab-unl-demos/work_graph.ttl' # -- old --- extractionGraph = '/opt/dashboards/tetras-lab-unl-demos/extraction-data-9.ttl'
workDir = 'work-data/'
webvowlData = '/opt/webvowl'
pane_width = 1300
pane_width = 1250
# Fuseki
fusekiBase = "https://fuseki.unsel.tetras-lab.io/unsel/"
sparqlQuery = SPARQLWrapper(fusekiBase+'query',returnFormat=JSON)
sparqlUpdate = SPARQLWrapper(fusekiBase+'update')
sparqlUpdate.method = 'POST'
sparqlUpdate.setCredentials("admin", "wezW3EHTH4LfEdaKtnC9errLH1YwVXssIO6DUfnjGAHuBApSfvDb4R1uDX5JmSVK")
sparqlUpdate.setQuery("DELETE {?s ?p ?o} WHERE {?s ?p ?o}")
sparqlLog = sparqlUpdate.query()
```
%% Cell type:code id:2c41c319-4beb-4a85-a232-61a12d00cdbf tags:
``` python
#######################################################################################################
# UNL tools functions
#######################################################################################################
def unl2stuff(unlFilePath, jarPath, outPrefix, outType):
# Run java parser
cmd = ['java', '-jar', jarPath,
'--input-file', unlFilePath,
'--output-file', outPrefix,
'--output-type', outType]
with Popen(cmd, stdout=PIPE, stderr=STDOUT) as p:
p.wait()
p.stdout.flush()
if p.returncode != 0:
print("Error in unl2rdf: \n\n"+p.stdout.read().decode())
print('UNL;')
#print(text)
def unl2dotWeb(unldata) :
data={'unl': unldata, 'outputs':['dot', 'svg', 'rdf']}
try:
r = requests.post('https://unl.demo.tetras-libre.fr/unl2rdf', data=data)
except Exception as e:
return 'Error calling https://unl.demo.tetras-libre.fr/unl2rdf : "{error}"'.format(error=e)
html=r.text
# On utilise une regex au lieu de parser le html car ce dernier est mal formé
regexSvg = re.compile('<svg.*svg>',re.MULTILINE|re.DOTALL)
regexRdf = re.compile("<code id='rdf' class='collapse show'>(.*?)</code>",re.MULTILINE|re.DOTALL)
try :
svg = regexSvg.search(html).group()
rdf = regexRdf.search(html).group(1)
except Exception as e :
svg = ''
rdf = ''
print(e)
return(svg, rdf)
def zipdir(path, ziph):
# ziph is zipfile handle
for root, dirs, files in os.walk(path):
for file in files:
if not('orig' in root):
ziph.write(os.path.join(root, file),
os.path.relpath(os.path.join(root, file),
os.path.join(path, '..')))
def addBaseUri(rdfStr):
regexBaseUri = re.compile("http://rdf-unl.org.*?sentence.*?ontology")
baseUri = regexBaseUri.search(rdfStr).group()
rdfStr = "# baseURI: "+baseUri+"\n"+rdfStr
return(rdfStr)
def postEditRdf(rdfPath, frStr, enStr):
textID = rdfPath.rsplit('/', 1)[0]
newPrefix = "http://unsel.rdf-unl.org/"+textID
with open(rdfPath,'r') as rdfFile :
rdfStr = rdfFile.read()
rdfFile.close()
regexBaseUri = re.compile("http://rdf-unl.org.*?sentence.*?ontology")
rdfStr = rdfStr.replace('rdfs:label "TBD : phrase en langue naturelle"@inv ;',
'<https://unl.tetras-libre.fr/rdf/schema#has_id> "{}" ;\n'.format(textID.split('/')[-2])+'rdfs:label """{}"""@fr ;\n'.format(frStr)+' rdfs:label """{}"""@en ;\n'.format(enStr))
baseUri = regexBaseUri.search(rdfStr).group()
oldPrefix = baseUri.rsplit('/', 1)[0]
rdfStr = rdfStr.replace(oldPrefix+'#ontology', newPrefix.rsplit('/', 1)[0]+'#ontology')
rdfStr = rdfStr.replace(oldPrefix+'#', "http://unsel.rdf-unl.org/uw_lexeme#")
rdfStr = "# baseURI: "+baseUri+"\n @prefix : <"+baseUri.replace("ontology","")+"> .\n"+rdfStr
rdfStr = rdfStr.replace(oldPrefix, newPrefix)
with open(rdfPath,'w') as rdfFile :
rdfStr = rdfFile.write(rdfStr)
rdfFile.close()
def replaceInplace(filePath, searchText, replaceText):
#read input file
fin = open(filePath, "rt")
#read file contents to string
data = fin.read()
#replace all occurrences of the required string
data = data.replace(searchText, replaceText)
#close the input file
fin.close()
#open the input file in write mode
fin = open(filePath, "wt")
#overrite the input file with the resulting data
fin.write(data)
#close the file
fin.close()
def createStoreDirectory(unlStr, srsaRef):
storeDir = storeBase+srsaRef+"/current/"
regexFr = re.compile("{org:fr}\n(.*?)\n{/org}",re.MULTILINE|re.DOTALL)
try:
frStr = regexFr.search(unlStr).group(1)
except AttributeError:
frStr = ''
enStr = ''
# Create a directory named after 'Référence'
try :
os.makedirs(storeDir)
except FileExistsError:
pass
# Add english translation to unl code
unlStr = unlStr.replace("{/org}", "{{/org}}\n{{en}}\n{enStr}\n{{/en}}".format(enStr=enStr))
# Write UNL code to a file
with open(storeDir+srsaRef+'.unl','w') as unlFile:
unlFile.write(unlStr)
os.chmod(storeDir+srsaRef+'.unl',0o766)
# Send UNL code to https://unl.demo.tetras-libre.fr/unl2rdf to get SVG and RDF
#svg, rdf = unl2dotWeb(unlStr)
# Use unltools jar to create ttl and dot file from unl
unl2stuff(storeDir+srsaRef+'.unl', '/opt/dashboards/tools/unl2rdf-app-0.9.jar', storeDir+srsaRef, 'rdf')
postEditRdf(storeDir+srsaRef+'.ttl', frStr, enStr)
unl2stuff(storeDir+srsaRef+'.unl', '/opt/dashboards/tools/unl2rdf-app-0.9.jar', storeDir+srsaRef, 'dot')
# Generate svg and png
graphviz.render('dot', 'svg', storeDir+srsaRef+'.dot')
graphviz.render('dot', 'png', storeDir+srsaRef+'.dot')
# Rename generated svg and png so they are not named like file.dot.svg
svgList = glob(storeDir+srsaRef+"*.svg")
for svgPath in svgList:
svgNewPath = svgPath.replace(".dot","")
os.rename(svgPath, svgNewPath)
pngList = glob(storeDir+srsaRef+"*.png")
for pngPath in pngList:
pngNewPath = pngPath.replace(".dot","")
os.rename(pngPath, pngNewPath)
# Add full text sentences to the svg
replaceInplace(storeDir+srsaRef+'.svg', '</svg>','''<text x="0" y="-40">fr : {fr}</text>
<text x="0" y="-20">en : {en}</text>
</svg>'''.format(fr=frStr, en=enStr))
#svgWithTxt = svg.replace('</svg>','''<text x="0" y="-40">fr : {fr}</text>
#<text x="0" y="-20">en : {en}</text>
#</svg>'''.format(fr=frStr, en=enStr))
#with open(storeDir+srsaRef+'.svg','w') as svgFile:
# svgFile.write(svgWithTxt)
#os.chmod(storeDir+srsaRef+'.svg',0o766)
#with open(storeDir+srsaRef+'.ttl','w') as rdfFile:
# rdfFile.write(rdf)
os.chmod(storeDir+srsaRef+'.ttl',0o766)
os.chmod(storeDir+srsaRef+'.svg',0o766)
os.chmod(storeDir+srsaRef+'.png',0o766)
os.chmod(storeDir+srsaRef+'.dot',0o766)
os.chmod(storeDir+srsaRef+'.unl',0o766)
# Convert svg to png and write to a file
#try:
# svg2png(bytestring=svgWithTxt, write_to=storeDir+srsaRef+'.png')
#except :
# pass
shutil.copytree(storeDir, storeBase+srsaRef+"/orig/")
with open(storeBase+srsaRef+"/current/"+srsaRef+'.comments','w') as commentFile:
commentFile.write("[David] : Saisissez vos commentaires en commençant par votre nom, n'oubliez pas d'enregistrer : ")
os.chmod(storeBase+srsaRef+"/current/"+srsaRef+'.comments',0o766)
def writeUnlFiles(unlStr, storePrefix):
srsaRef = selectDir.value
with open(storePrefix+'.unl','w') as unlFile:
unlFile.write(unlStr)
unlFile.close()
def createFolderFromUnselInter(srsaRef):
url = 'https://lingwarium.org/UNseL-inter/GrapheUNL.txt'
unlStr = urlopen(url).read().decode('utf-8').replace('[P:1]','').replace('[/P]','')
createStoreDirectory(unlStr, srsaRef)
```
%% Cell type:code id:7b32d69a-52fb-4b9d-8cd9-5fb45c177284 tags:
``` python
#######################################################################################################
# Extraction
#######################################################################################################
def run_command(cmd):
with Popen(cmd, stdout=PIPE, stderr=PIPE, universal_newlines=True) as p:
p.poll()
p.stdout.flush()
p.stderr.flush()
stdout, stderr = p.communicate()
return p.returncode, stdout, stderr
# Run SHACL infer sh script. mode argument can take the values 'infer' or 'validate'
def shaclInfer(ttlPath, mode, ttlRulesPath = ''):
if ttlRulesPath == '':
cmd = ['sh', shaclBase+'/shacl'+mode+'.sh', '-datafile', ttlPath]
else:
cmd = ['sh', shaclBase+'/shacl'+mode+'.sh', '-datafile', ttlPath, '-shapesfile', ttlRulesPath]
#cmd = ' '.join(cmd)
#!{cmd}
code, out, err = run_command(cmd)
if code != 0:
print("Error in SHACL script: \n\n"+err)
else:
return(out)
def export_result(g):
export_file = 'output.ttl'
g.serialize(destination=export_file, base=base_uri, format='turtle')
#shaclInfer('/opt/dashboards/tetras-lab-unl-demos/demo-cctp-40.ttl', 'infer')
```
%% Cell type:code id:5c7164d7-c074-4aa3-9776-0cc5cc8f18f7 tags:
``` python
#==============================================================================
# TENET: prepare work data
#------------------------------------------------------------------------------
# Prepare work data for extraction processing.
#==============================================================================
#==============================================================================
# Parameters
#==============================================================================
# Working directories
CONFIG_DIR = "/opt/dashboards/tools/tenet/config/"
FRAME_DIR = "/opt/dashboards/tools/tenet/frame/"
CORPUS_DIR = storeBase
OUTPUT_DIR = "output/"
# Config Definition
TURTLE_SUFFIX = ".ttl"
frame_file = "system-ontology.ttl"
dash_file = "dash-data-shapes.ttl" # data from "http://datashapes.org/dash.ttl"
schema_file = "unl-rdf-schema.ttl"
semantic_net_file = "semantic-net.ttl"
cts_file = "transduction-schemes.ttl"
c_param_file = "config-parameters.ttl"
# Dev Tests
base_uri = "https://unsel.tetras-libre.fr/tenet/working"
#==============================================================================
# Graph Initialization
#==============================================================================
def load_config(work_graph):
file_ref = CONFIG_DIR + schema_file
work_graph.parse(file_ref)
file_ref = CONFIG_DIR + semantic_net_file
work_graph.parse(file_ref)
file_ref = CONFIG_DIR + dash_file
work_graph.parse(file_ref)
file_ref = CONFIG_DIR + c_param_file
work_graph.parse(file_ref)
def load_frame(work_graph):
file_ref = FRAME_DIR + frame_file
work_graph.parse(file_ref)
#def define_namespace(work_graph):
# print("-- Namespace Definition:")
#
# sys_uri = "https://unsel.tetras-libre.fr/tenet/frame/system-ontology/"
# concept_classes = ["agent"]
# for concept in concept_classes:
# new_prefix = "sys-" + concept
# new_uri = URIRef(sys_uri + concept + '#')
# work_graph.namespace_manager.bind(new_prefix, new_uri)
# print("----- " + new_prefix + ": " + new_uri)
# print(list(work_graph.namespace_manager.namespaces()))
def load_sentences(work_graph, corpus):
target_ref = CORPUS_DIR + corpus + '/current/*.ttl'
for file_ref in glob(target_ref):
if 'factoid' not in file_ref :
# Patch for new UWs with for instance .§A in restrictions
work_graph.parse(file_ref)
def load_cts(work_graph):
file_ref = CONFIG_DIR + cts_file
work_graph.parse(file_ref)
#==============================================================================
# Result (export)
#==============================================================================
def export_result(work_graph, export_ref, export_file):
work_graph.serialize(destination=export_file,
base=base_uri + '/' + export_ref,
format='turtle')
def finalize_export_file(export_file):
""" finalize the export file by adding some useful prefixes """
with open(export_file, "rt") as file:
x = file.read()
with open(export_file, "wt") as file:
x = x.replace(
"@prefix sys: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/> .",
"""
@prefix sys: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/> .
@prefix sys-class: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/class/> .
@prefix sys-property: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/property/> .
@prefix sys-relation: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/relation/> .
@prefix sys-Event: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/Event#> .
@prefix sys-event: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/eventObjectProperty#> .
@prefix sys-State_Property: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/State_Property#> .
@prefix sys-stateProperty: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/statePropertyObjectProperty#> .
@prefix sys-abstract_thing: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/abstract_thing#> .
@prefix sys-action_verb: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/action_verb#> .
@prefix sys-agent: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/agent#> .
@prefix sys-attributive_verb: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/attributive_verb#> .
@prefix sys-component: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/component#> .
@prefix sys-message: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/message#> .
@prefix sys-place: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/place#> .
""")
file.write(x)
#==============================================================================
# Main Function
#==============================================================================
def createTenetGraph(corpus):
try:
work_graph = Graph()
load_config(work_graph)
load_frame(work_graph)
#define_namespace(work_graph)
load_cts(work_graph)
load_sentences(work_graph, corpus)
output_file = extractionGraph
export_result(work_graph, corpus, output_file)
finalize_export_file(output_file)
return(work_graph)
except Exception as e :
print("!!! An exception occurred importing rdf graphs for extraction !!!\n"+str(e))
def addSentenceInTenetGraph(work_graph, sentence_ref):
""" TODO: add a sentence to work_graph """
pass
```
%% Cell type:code id:f66bfcd2-f2b9-4603-b1f2-d4fb643c8c3c tags:
``` python
clearExecutionInstances = """
PREFIX cts: <https://unsel.tetras-libre.fr/tenet/transduction-schemes#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
DELETE {?x rdf:type ?c}
WHERE {
?c rdfs:subClassOf* cts:Transduction_Schemes .
?x rdf:type ?c .
}
"""
addExecutionInstance = """
PREFIX cts: <https://unsel.tetras-libre.fr/tenet/transduction-schemes#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
INSERT DATA {{<exec_instance> rdf:type {}}}
"""
def owl2vowl(ttlFilePath, importList=[]):
# Run java parser
if importList == []:
cmd = ['java', '-jar', owl2vowlPath,
'-file', ttlFilePath]
else:
cmd = ['java', '-jar', owl2vowlPath,
'-file', ttlFilePath,
'-dependencies'] + importList
with Popen(cmd, stdout=PIPE, stderr=STDOUT) as p:
p.wait()
p.stdout.flush()
if p.returncode != 0:
print("Error in owl2vowl: \n\n"+p.stdout.read().decode())
outFileName = ttlFilePath.split('/')[-1].replace('ttl','json')
os.rename(outFileName, '/opt/webvowl/'+outFileName)
def applyInferStep(uuidStr, graph, step):
step_ref = "cts:" + step
dest_file = workDir + uuidStr + '-' + step + ".ttl"
base_ref = "http://" + uuidStr + '/' + step
graph.update(clearExecutionInstances)
graph.update(addExecutionInstance.format(step_ref)) # ex. : step = 'cts:generation'
graph.serialize(destination=dest_file, base=base_ref, format='turtle') # serialize graph before inference
work_file = dest_file
inferResult = shaclInfer(work_file, 'infer') # apply SHACL inference
graph.parse(data=inferResult) # update graph with inference
graph.serialize(destination=dest_file, base=base_ref, format='turtle') # serialize graph after inference
#graph.serialize(destination=dest_file, base=base_ref, format='turtle') # serialize graph after inference
return graph, inferResult
```
%% Cell type:code id:40b54849-9333-4819-b953-6e816ffe474c tags:
``` python
#######################################################################################################
# Validation
#######################################################################################################
def pyshaclValidate():
from pyshacl import validate
data_file = open('tmp.ttl').read()
shapes_file = open('test-shacl-construct.shapes-order.ttl').read()
conforms, v_graph, v_text = validate(data_file, shacl_graph=shapes_file)
print(conforms)
print(v_graph)
print(v_text)
def loadFactoids(directory):
ttlList = glob(directory+"/*/current/*_factoid.ttl")
g = Graph()
for ttl in ttlList :
g.parse(ttl)
g.parse('/opt/dashboards/tools/tenet/frame/system-ontology.ttl')
return(g)
def loadSentences(directory):
ttlList = glob(directory+"/*/current/*.ttl")
g = Graph()
for ttl in ttlList :
if 'factoid' not in ttl:
g.parse(ttl)
g.parse('/opt/dashboards/tools/tenet/config/unl-rdf-schema.ttl')
return(g)
possibleUnderspecificationQuery ='''
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX unl: <https://unl.tetras-libre.fr/rdf/schema#>
PREFIX net: <https://unsel.tetras-libre.fr/tenet/semantic-net#>
PREFIX cprm: <https://unsel.tetras-libre.fr/tenet/config/parameters#>
PREFIX req: <https://unsel.tetras-libre.fr/tenet/frame/requirement-ontology#>
PREFIX sys: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/>
PREFIX fprm: <https://unsel.tetras-libre.fr/tenet/frame/parameters#>
SELECT ?thisId (CONCAT('Parle-t-on bien de tous les "', ?entityLabel, '" possibles ? (', ?subEntities, ')' ) AS ?message)
WHERE {
FILTER(?count>1)
{SELECT ?this ?thisId ?entityLabel (GROUP_CONCAT(?subEntityLabel;SEPARATOR=", ") AS ?subEntities) (COUNT(?subEntityLabel) AS ?count)
WHERE {
?subEntity rdfs:subClassOf ?entity ; rdfs:label ?subEntityLabel .
{SELECT ?this ?entity ?thisId ?entityLabel
WHERE {
BIND("##ID##" AS ?thisId)
?this a unl:UNL_Sentence ; unl:has_id ?thisId .
?entity sys:from_structure ?this ;
rdfs:subClassOf+ sys:Structure ;
rdfs:label ?entityLabel .
FILTER (
!EXISTS {?subEntity1 rdfs:subClassOf ?entity; sys:from_structure ?this}
)
}}
}
GROUP BY ?this ?thisId ?entityLabel }
}
'''
possibleClassEquivalenceQuery = '''PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX unl: <https://unl.tetras-libre.fr/rdf/schema#>
PREFIX net: <https://unsel.tetras-libre.fr/tenet/semantic-net#>
PREFIX cprm: <https://unsel.tetras-libre.fr/tenet/config/parameters#>
PREFIX req: <https://unsel.tetras-libre.fr/tenet/frame/requirement-ontology#>
PREFIX sys: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/>
PREFIX fprm: <https://unsel.tetras-libre.fr/tenet/frame/parameters#>
SELECT (CONCAT(?messageTMP, ?sentenceList, ')') AS ?message) ?sentenceList
WHERE {
FILTER(regex(?sentenceList, "##ID##" ))
{SELECT ?messageTMP (GROUP_CONCAT(?sentenceId; SEPARATOR=", ") AS ?sentenceList)
WHERE {
SELECT DISTINCT ?messageTMP ?sentenceId
WHERE {
FILTER (?count = 1)
?subEntity rdfs:subClassOf ?this ; rdfs:label ?subEntityLabel ; sys:from_structure ?subEntitySentence .
?this rdfs:label ?thisLabel ; sys:from_structure ?thisSentence .
BIND(CONCAT('"', ?subEntityLabel, '" est la seule sous classe de "', ?thisLabel, '". Ces classes sont-elles équivalentes ? <br/>(voir les exigences ') AS ?messageTMP)
{
?thisSentence unl:has_id ?thisSentenceId .
BIND (?thisSentenceId AS ?sentenceId)}
UNION
{?subEntitySentence unl:has_id ?subEntitySentenceId .
BIND (?subEntitySentenceId AS ?sentenceId)}
FILTER(NOT EXISTS {?subEntity sys:from_structure ?thisSentence})
{SELECT ?this (COUNT(?subClass) AS ?count)
WHERE {
?this rdfs:subClassOf+ sys:Structure .
?subClass rdfs:subClassOf ?this
} GROUP BY ?this }
} ORDER BY ?sentenceId
} GROUP BY ?messageTMP }
}'''
unfeaturedDomainOrRangeQuery = '''PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX unl: <https://unl.tetras-libre.fr/rdf/schema#>
PREFIX net: <https://unsel.tetras-libre.fr/tenet/semantic-net#>
PREFIX cprm: <https://unsel.tetras-libre.fr/tenet/config/parameters#>
PREFIX req: <https://unsel.tetras-libre.fr/tenet/frame/requirement-ontology#>
PREFIX sys: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/>
PREFIX fprm: <https://unsel.tetras-libre.fr/tenet/frame/parameters#>
SELECT ?sentenceId (CONCAT(?messageTMP, GROUP_CONCAT(?featureLabel ; SEPARATOR=', ')) AS ?message)
WHERE {
SELECT DISTINCT ?sentenceId ?featureLabel (CONCAT( 'Dans cette exigence, "', ?unfeaturedLabel, '" pourrait être précisé par : ') AS ?messageTMP)
WHERE {
{
?p rdfs:subPropertyOf+ sys:Property ;
rdfs:domain ?featured ;
rdfs:range ?unfeatured .
}
UNION
{
?p rdfs:subPropertyOf+ sys:Property ;
rdfs:domain ?unfeatured ;
rdfs:range ?featured .
}
#?p rdfs:label ?pLabel .
?featured sys:has_feature ?feature .
FILTER(NOT EXISTS {
?unfeatured sys:has_feature ?feature
})
?featuredInstance a ?featured ; sys:from_structure ?sentence.
?unfeaturedInstance a ?unfeatured ; sys:from_structure ?sentence.
BIND("##ID##" AS ?sentenceId)
?sentence unl:has_id ?sentenceId .
FILTER(NOT EXISTS {
?featuredInstance a ?featured2 .
?featured2 rdfs:subClassOf ?featured .
})
FILTER(NOT EXISTS {
?unfeaturedInstance a ?unfeatured2 .
?unfeatured2 rdfs:subClassOf ?unfeatured .
})
?featured rdfs:label ?featuredLabel .
?unfeatured rdfs:label ?unfeaturedLabel .
?feature rdfs:label ?featureLabel .
}
} GROUP BY ?sentenceId ?messageTMP
'''
unfeaturedDomainOrRangeWithRefQuery ='''PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX unl: <https://unl.tetras-libre.fr/rdf/schema#>
PREFIX net: <https://unsel.tetras-libre.fr/tenet/semantic-net#>
PREFIX cprm: <https://unsel.tetras-libre.fr/tenet/config/parameters#>
PREFIX req: <https://unsel.tetras-libre.fr/tenet/frame/requirement-ontology#>
PREFIX sys: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/>
PREFIX fprm: <https://unsel.tetras-libre.fr/tenet/frame/parameters#>
SELECT ?sentenceId (CONCAT('"', ?unfeaturedLabel, '" pourrait être précisé par un ou plusieurs attributs parmis : ', ?featureList, '. <br/>(exemples de référence : ', GROUP_CONCAT(?sentence2id ; SEPARATOR=', '), ').') AS ?message)
WHERE {
{SELECT DISTINCT ?sentenceId ?unfeaturedLabel ?sentence2id (GROUP_CONCAT(?featureLabel ; SEPARATOR=', ') AS ?featureList) #
WHERE {
SELECT DISTINCT ?sentenceId ?sentence2id ?unfeaturedLabel ?featureLabel ?otherwiseFeaturedLabel ?featured2label
WHERE {
{
?p rdfs:subPropertyOf+ sys:Property ;
rdfs:domain ?featured ;
rdfs:range ?unfeatured .
}
UNION
{
?p rdfs:subPropertyOf+ sys:Property ;
rdfs:domain ?unfeatured ;
rdfs:range ?featured .
}
#?p rdfs:label ?pLabel .
?featured sys:has_feature ?feature .
FILTER(NOT EXISTS {
?unfeatured sys:has_feature ?feature
})
?featuredInstance a ?featured ; sys:from_structure ?sentence.
?unfeaturedInstance a ?unfeatured ; sys:from_structure ?sentence.
BIND("##ID##" AS ?sentenceId)
?sentence unl:has_id ?sentenceId .
?otherwiseFeatured rdfs:subClassOf ?unfeatured ; sys:has_feature ?feature2 ; rdfs:label ?otherwiseFeaturedLabel.
?otherwiseFeaturedInstance a ?otherwiseFeatured ; sys:from_structure ?sentence2.
?sentence2 unl:has_id ?sentence2id .
{?otherwiseFeaturedInstance ?p2 ?featuredInstance2} UNION { ?featuredInstance2 ?p2 ?otherwiseFeaturedInstance}
?featuredInstance2 a ?featured2 .
?featured2 sys:has_feature ?feature2 ; rdfs:label ?featured2label.
FILTER(NOT EXISTS {
?featuredInstance a ?featured2 .
?featured2 rdfs:subClassOf ?featured .
})
FILTER(NOT EXISTS {
?unfeaturedInstance a ?unfeatured2 .
?unfeatured2 rdfs:subClassOf ?unfeatured .
})
?featured rdfs:label ?featuredLabel .
?unfeatured rdfs:label ?unfeaturedLabel .
?feature rdfs:label ?featureLabel .
}
} GROUP BY ?sentenceId ?unfeaturedLabel ?sentence2id}
} GROUP BY ?sentenceId ?unfeaturedLabel ?featureList
'''
queryTypeDic = {'error':[possibleClassEquivalenceQuery],
'warning':[possibleUnderspecificationQuery],
'info':[unfeaturedDomainOrRangeWithRefQuery]}
# On charge les factoïdes puis les exigences pour vérification des nouvelles phrases
g = loadFactoids(storeBase)
sparqlUpdate.setQuery('INSERT DATA {'+g.serialize(format='nt')+'}')
sparqlLog = sparqlUpdate.query()
g.serialize(destination='/opt/dashboards/store/extraction2.ttl', format='turtle')
owl2vowl('/opt/dashboards/store/extraction2.ttl')
g = loadSentences(storeBase)
sparqlUpdate.setQuery('INSERT DATA {'+g.serialize(format='nt')+'}')
sparqlLog = sparqlUpdate.query()
# Requête de post-traitement pour propager les instances aux sous classes pour chaque phrase
# -> Il faudra sans doute revoir la classe qui accueille les instances à l'extraction
def instToSubclasses():
instToSubclassesQuery = """PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX sys: <https://unsel.tetras-libre.fr/tenet/frame/system-ontology/>
INSERT {?inst a ?subClass .}
WHERE {
?class rdfs:subClassOf* sys:Structure ; sys:from_structure ?sentence .
?subClass rdfs:subClassOf+ ?class ; sys:from_structure ?sentence .
?inst a ?class ; sys:from_structure ?sentence .
}"""
sparqlUpdate.setQuery(instToSubclassesQuery)
sparqlLog = sparqlUpdate.query()
instToSubclasses()
#factoidList = [x.replace('..','/opt/dashboards') for x in glob(storeBase+"/*/current/*factoid.ttl")]
#owl2vowl(FRAME_DIR + frame_file, importList=factoidList)
def createVerificationMessages(directory):
reqId = directory.split('/')[-1]
for messageType in ['error', 'warning', 'info']:
#print(messageType)
messagesStr = ''
for queryTMP in queryTypeDic[messageType] :
query = queryTMP.replace("##ID##",reqId)
sparqlQuery.setQuery(query)
results = sparqlQuery.query().convert()
for result in results["results"]["bindings"]:
message = result["message"]["value"]
if message not in messagesStr :
#print("printing")
messagesStr+=message+"\n"
outFile = open(directory+'/current/'+reqId+'_'+messageType+'Messages.txt', 'w')
outFile.write(messagesStr)
outFile.close()
def createOnto(uuidStr):
# -- Initialization (creation of extraction graph)
step = 'init_graph'
dest_file = workDir + uuidStr + '-' + step + ".ttl"
base_ref = "http://" + uuidStr + '/' + step
graph = createTenetGraph(uuidStr)
graph.serialize(destination=dest_file, base=base_ref, format='turtle')
# -- Extraction
graph, _ = applyInferStep(uuidStr, graph, 'preprocessing')
graph, _ = applyInferStep(uuidStr, graph, 'net_extension')
graph, finalInferResult = applyInferStep(uuidStr, graph, 'generation_dga_patch')
# -- Write result
factoidPath = storeBase+uuidStr+'/current/'+uuidStr+'_factoid.ttl'
sentencePath = storeBase+uuidStr+'/current/'+uuidStr+'.ttl'
with open(factoidPath, 'w') as outfile:
outfile.write(finalInferResult)
outfile.close()
# -- Webvowl
owl2vowl(factoidPath)
# -- Verification
factoidGraph = Graph()
factoidGraph.parse(factoidPath)
factoidGraph.parse(sentencePath)
sparqlUpdate.setQuery('INSERT DATA {'+factoidGraph.serialize(format='nt')+'}')
sparqlLog = sparqlUpdate.query()
instToSubclasses()
createVerificationMessages(storeBase+uuidStr)
```
%% Cell type:code id:744abdb9-b3d6-4025-abc9-2f749644c3ed tags:
``` python
# Fonctions pour la mise à jour globale du corpus (ne pas exécuter en mode "Tableau de bord")
def updateAllFactoids():
dirList = glob('/opt/dashboards/store/CCTP-SRSA-IP-20210831/*')
dirList = ['/opt/dashboards/store/CCTP-SRSA-IP-20210831/SRSA-IP_STB_PHON_00100']
#dirList = glob('/opt/dashboards/store/CCTP-SRSA-IP-20210831/*')
for directory in dirList:
if directory.split('/')[-1] != '0_NONE':
print(directory)
reqId = directory.split('/')[-1]
createOnto(reqId)
#updateAllFactoids()
def updateAllVerificationMessages():
#dirList = ['/opt/dashboards/store/CCTP-SRSA-IP-20210831/SRSA-IP_STB_PHON_00100']
dirList = glob('/opt/dashboards/store/CCTP-SRSA-IP-20210831/*')
for directory in dirList:
if directory.split('/')[-1] != '0_NONE':
print(directory)
createVerificationMessages(directory)
#updateAllVerificationMessages()
#query = possibleUnderspecificationQuery.replace("##ID##","SRSA-IP_STB_PHON_00500")
#for r in g.query(query):
#print(r['message'])
```
%% Cell type:code id:ca43f1f2-42ef-4355-a2e2-e27351a51b96 tags:
``` python
#######################################################################################################
# Navigateur / éditeur de corpus UNL
#######################################################################################################
saveButtonClicks = 0
def main_pane(directory):
saveButtonClicks = 0
saveButton = pn.widgets.Button(name='Enregistrer', button_type='success', width = 100)
saveButtonDic = dict(button=saveButton)
saveCommentButton = pn.widgets.Button(name='Enregistrer', button_type='success', width = 100)
path = storeBase+directory+'/current/'
pathOrig = storeBase+directory+'/orig/'
svgPath = path+directory+'.svg'
pngPath = path+directory+'.png'
unlPath = path+directory+'.unl'
rdfPath = path+directory+'.ttl'
commentPath = path+directory+'.comments'
with open(commentPath) as commentFile:
commentStr = commentFile.read()
commentFile.close()
with open(unlPath) as unlFile:
unlStr = unlFile.read()
unlFile.close()
svgPathOrig = pathOrig+directory+'.svg'
pngPathOrig = pathOrig+directory+'.png'
unlPathOrig = pathOrig+directory+'.unl'
rdfPathOrig = pathOrig+directory+'.ttl'
with open(unlPathOrig) as unlFileOrig:
unlStrOrig = unlFileOrig.read()
unlFileOrig.close()
unlHtmlOrig = unlStrOrig.replace("\n","<br/>")
if unlStrOrig == unlStr:
modIndicator = ''
else:
modIndicator = ' <u>modifié</u>'
regexFr = re.compile("{org:fr}\n(.*?)\n{/org}",re.MULTILINE|re.DOTALL)
try:
frStr = regexFr.search(unlStr).group(1)
except AttributeError:
frStr = ''
regexEn = re.compile("{en}\n(.*?)\n{/en}",re.MULTILINE|re.DOTALL)
try:
enStr = regexEn.search(unlStr).group(1)
except AttributeError:
enStr = ''
unlOrig_html = pn.pane.HTML(unlHtmlOrig)
unl_input = pn.widgets.input.TextAreaInput(height=400)
unl_input.value = unlStr
comment_input = pn.widgets.input.TextAreaInput(height=300)
comment_input.value = commentStr
downloadSvg = pn.widgets.FileDownload(sizing_mode='stretch_width', file=svgPath, embed=True, name='Télécharger le graphe en SVG :')
downloadPng = pn.widgets.FileDownload(sizing_mode='stretch_width', file=pngPath, embed=True, name='Télécharger le graphe en PNG :')
downloadRdf = pn.widgets.FileDownload(sizing_mode='stretch_width', file=rdfPath, embed=True, name='Télécharger le code UNL-RDF :')
downloadUnl = pn.widgets.FileDownload(sizing_mode='stretch_width', file=unlPath, embed=True, name='Télécharger le code UNL :')
def compute_unl_graph_pane(button):
global saveButtonClicks
if saveButtonClicks != 0:
writeUnlFiles(unl_input.value, storeBase+selectDir.value+'/current/'+selectDir.value)
pane = pn.pane.PNG(pngPath, width = pane_width)
saveButtonClicks += 1
return(pane)
unl_graph_pane = pn.interact(lambda button : compute_unl_graph_pane(button), **saveButtonDic)
warnColumn = pn.Card(width=pane_width, header = "**Alertes pour l'exigence :**")
warnings = 0
try:
errorfile = open(storeBase+directory+'/current/'+directory+'_errorMessages.txt','r')
errorlist = errorfile.readlines()
errorfile.close()
for error in errorlist:
warnColumn.append(pn.pane.HTML('<div class="alert alert-danger">{}</div>'.format(error)))
if errorlist != [] :
warnings = 1
except :
pass
try:
warnfile = open(storeBase+directory+'/current/'+directory+'_warningMessages.txt','r')
warnlist = warnfile.readlines()
warnfile.close()
for warn in warnlist:
warnColumn.append(pn.pane.HTML('<div class="alert alert-warning">{}</div>'.format(warn)))
if warnlist != [] :
warnings = 1
except :
pass
try:
infofile = open(storeBase+directory+'/current/'+directory+'_infoMessages.txt','r')
infolist = infofile.readlines()
infofile.close()
for info in infolist:
warnColumn.append(pn.pane.HTML('<div class="alert alert-info">{}</div>'.format(info)))
if infolist != [] :
warnings = 1
except :
pass
if warnings == 0:
warnColumn.append(pn.pane.HTML('<div class="alert alert-info">Pas d\'anomalie détectée</div>'))
pane = pn.Column(
pn.Row(
pn.pane.HTML('<a href="https://unsel.tetras-lab.io/webvowl#{}_factoid" target="_blank"><button type="button" class="btn btn-outline-secondary btn-sm">Visualiser l\'ontologie construite</button><a>'.format(directory)),
pn.Column(pn.pane.HTML('<font size="tiny">Exigence sélectionnée : '+directory+'</font>'), sizing_mode='stretch_width'),
),
#pn.Card(
# pn.pane.HTML('''<iframe id="inlineFrameExample"
# title="Inline Frame Example"
# width="{}"
# height="800"
# src="https://unsel.tetras-lab.io/webvowl/#{}_factoid">
# </iframe>'''.format(pane_width,selectDir.value)),
# title = "Visualiser le factoid", width=pane_width, collapsed=True),
warnColumn,
pn.pane.HTML('FR : '+frStr),
pn.pane.HTML('EN : '+enStr),
unl_graph_pane[1],
pn.Card(pn.Column(saveCommentButton, comment_input, width = pane_width),
header='**Commentaires**',
collapsed=True, width = pane_width),
pn.Card(pn.Column(saveButton, unl_input, width = pane_width),
header='**Code UNL**'+modIndicator,
collapsed=True, width = pane_width),
pn.Card(pn.Column(
unlOrig_html,
pn.Card(pn.pane.PNG(pngPath, width = pane_width-20), header = "**Graphe d'origine**", width=pane_width-10)
),
header="**Code UNL d'origine**",
collapsed=True, width = pane_width),
pn.WidgetBox(
pn.Row(downloadPng, downloadSvg),
pn.Row(downloadUnl, downloadRdf),
width=pane_width,
),
width=pane_width,
)
def saveComments(event):
with open(commentPath, 'w') as commentFile:
commentFile.write(comment_input.value)
commentFile.close()
saveCommentButton.on_click(saveComments)
return(pane)
```
%% Cell type:code id:5d4ec56e-d0bb-44c8-975b-49d409b6b160 tags:
``` python
#######################################################################################################
# Navigation Interface
#######################################################################################################
pathList = glob(storeBase+'*')
dirList = sorted([x.split('/')[-1] for x in pathList])
#warningList = sorted(list(errorReqDic.keys())+list(warningReqDic.keys())+list(infoReqDic.keys()))
warningList = []
dirDic = {}
for directory in dirList:
if directory in warningList:
dirDic['**'+directory] = directory
else:
dirDic[directory] = directory
dirDic = dict(sorted(dirDic.items()))
selectDir = pn.widgets.Select(name='Sélectionnez une exigence : ', options=dirDic, width = 350)
selectDir.value = '0_NONE'
dir_selector = dict(directory=selectDir)#, save=saveButton)
#######################################################################################################
# Analyse avec UNseL-inter
#######################################################################################################
def extractOnClick(event):
uuidStr = "COMP"+str(uuid.uuid4())
createFolderFromUnselInter(uuidStr)
createOnto(uuidStr)
selectDir.options[uuidStr] = uuidStr
selectDir.value = uuidStr
##################################################
buttonExtract = pn.widgets.Button(name="Construire l'ontologie de la phrase (factoïd)", width=300)
buttonExtract.param.watch(extractOnClick, 'clicks')
unlNavigatorPane = pn.interact(lambda directory : main_pane(directory), **dir_selector)
#unl_graph_pane = pn.interact(lambda button : compute_unl_graph_pane(button), **saveButtonDic)
# Main interface
pn.Column(
pn.Card( pn.Row(
pn.Spacer(sizing_mode='stretch_width'),
pn.Column(
pn.pane.HTML('''<iframe id="inlineFrameExample"
title="Inline Frame Example"
width="1000"
height="600"
src="https://lingwarium.org/UNseL-inter/">
</iframe>'''),
buttonExtract),
pn.Spacer(sizing_mode='stretch_width'),
),
title = "Analyser une nouvelle exigence", width=pane_width+50, collapsed=True,),
pn.layout.Divider(),
pn.Card(
pn.Row(
pn.layout.HSpacer(),
pn.Column(
selectDir,
unlNavigatorPane[1],
),
pn.layout.HSpacer(),
),
title = "Naviguer dans les exigences", width=pane_width+50, collapsed=True
),
pn.layout.Divider(),
pn.Card(
pn.pane.HTML('''<iframe id="inlineFrameExample"
title="Inline Frame Example"
width="{}"
height="800"
src="https://unsel.tetras-lab.io/webvowl/#extraction2">
</iframe>'''.format(pane_width)),
pn.Row(
#pn.pane.HTML('<a href="https://unsel.tetras-lab.io/webvowl#extraction" target="_blank"><button type="button" class="btn btn-outline-secondary btn-sm">Visualiser l\'ontologie construite pour tout le corpus</button><a>'),
#pn.pane.HTML('<a href="https://unsel.tetras-lab.io/static/webvowl_1.1.7/index.html#extraction_SRSA-IP_STB_PHON_00100" target="_blank"><button type="button" class="btn btn-outline-secondary btn-sm">Visualiser l\'extraction de SRSA-IP_STB_PHON_00100</button><a>'),
pn.widgets.FileDownload(file='/opt/dashboards/static/extraction.ttl', embed=True)
), title = "Résultats sur le corpus", width=pane_width+50, collapsed=True
),
)
```
%% Cell type:code id:e06f5381-6c2d-4762-bcb9-a914fb5889e3 tags:
``` python
#list = glob('/opt/dashboards/store/CCTP-SRSA-IP-20210831/*')
#for d in list :
# print(d)
# uuidStr = d.replace('/opt/dashboards/store/CCTP-SRSA-IP-20210831/','')
# createOnto(uuidStr)
```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment