Skip to content
Snippets Groups Projects
Commit 319dc4ef authored by Aurélien Lamercerie's avatar Aurélien Lamercerie
Browse files

Clean and add data with turtle format (.ttl)

parent a2cc346e
Branches
No related tags found
No related merge requests found
Showing
with 0 additions and 4010 deletions
#!/usr/bin/env python
"""
disagree_btwn_sents.py
(Derived from AMRICA/disagree.py)
A tool for inspecting AMR data to id patterns of inter-annotator disagreement
or semantic inequivalence.
AMR input file expected in format where comments above each annotation indicate
the sentence like so:
# ::id DF-170-181103-888_2097.1 ::date 2013-09-16T07:15:31 ::annotator ANON-01 ::preferred
# ::tok This is a sentence .
(this / file
:is (an / AMR))
For monolingual disagreement, all annotations of some sentence should occur
consecutively in the monolingual annotation file. For bilingual, annotations
should be in the same order of sentences between the two files.
For bilingual disagreement, you can include a ::alignments field from jamr to help with
AMR-sentence alignment.
"""
import argparse
import argparse_config
import codecs
import networkx as nx
from networkx.readwrite import json_graph
import json
import os
import pygraphviz as pgz
# internal libraries
from compare_smatch import amr_metadata
from compare_smatch import smatch_graph
from compare_smatch.amr_alignment import Amr2AmrAligner
from compare_smatch.amr_alignment import default_aligner
from compare_smatch.smatch_graph import SmatchGraph
from smatch import smatch
cur_sent_id = 0
def hilight_disagreement(test_amrs, gold_amr, iter_num, aligner=default_aligner, gold_aligned_fh=None):
"""
Input:
gold_amr: gold AMR object
test_amrs: list of AMRs to compare to
Returns list of disagreement graphs for each gold-test AMR pair.
"""
amr_graphs = []
smatchgraphs = []
gold_label=u'b'
gold_amr.rename_node(gold_label)
(gold_inst, gold_rel1, gold_rel2) = gold_amr.get_triples2()
(gold_inst_t, gold_rel1_t, gold_rel2_t) = smatch_graph.amr2dict(gold_inst, gold_rel1, gold_rel2)
for a in test_amrs:
aligner.set_amrs(a, gold_amr)
test_label=u'a'
a.rename_node(test_label)
(test_inst, test_rel1, test_rel2) = a.get_triples2()
if gold_aligned_fh:
best_match = get_next_gold_alignments(gold_aligned_fh)
best_match_num = -1.0
else:
(best_match, best_match_num) = smatch.get_fh(test_inst, test_rel1, test_rel2,
gold_inst, gold_rel1, gold_rel2,
test_label, gold_label,
node_weight_fn=aligner.node_weight_fn, edge_weight_fn=aligner.edge_weight_fn,
iter_num=iter_num)
disagreement = SmatchGraph(test_inst, test_rel1, test_rel2, \
gold_inst_t, gold_rel1_t, gold_rel2_t, \
best_match, const_map_fn=aligner.const_map_fn)
amr_graphs.append((disagreement.smatch2graph(node_weight_fn=aligner.node_weight_fn,
edge_weight_fn=aligner.edge_weight_fn),
best_match_num))
smatchgraphs.append(disagreement)
return (amr_graphs, smatchgraphs)
def open_output_files(args):
json_fh = None
if args.json_out:
json_fh = codecs.open(args.json_out, 'w', encoding='utf8')
align_fh = None
if args.align_out:
align_fh = codecs.open(args.align_out, 'w', encoding='utf8')
return (json_fh, align_fh)
def close_output_files(json_fh, align_fh):
json_fh and json_fh.close()
align_fh and align_fh.close()
def get_next_gold_alignments(gold_aligned_fh):
match_hash = {}
line = gold_aligned_fh.readline().strip()
while (line):
if line.startswith('#'): # comment line
line = gold_aligned_fh.readline().strip()
continue
align = line.split('\t')
test_ind = int(align[0])
gold_ind = int(align[3])
if test_ind >= 0:
match_hash[test_ind] = gold_ind
line = gold_aligned_fh.readline().strip()
match = []
for (i, (k, v)) in enumerate(sorted(match_hash.items(), key=lambda x: x[0])):
assert i == k
match.append(v)
return match
def get_sent_info(metadata, dflt_id=None):
""" Return ID, sentence if available, and change metadata to reflect """
(sent_id, sent) = (None, None)
if 'tok' in metadata:
sent = metadata['tok']
else:
sent = metadata['snt']
if 'id' in metadata:
sent_id = metadata['id']
elif dflt_id is not None:
sent_id = dflt_id
else:
sent_id = "%d" % cur_sent_id
cur_sent_id += 1
(metadata['id'], metadata['tok']) = \
(sent_id, sent)
return (sent_id, sent)
def monolingual_main(args):
infile = codecs.open(args.infile, encoding='utf8')
gold_aligned_fh = None
if args.align_in:
gold_aligned_fh = codecs.open(args.align_in, encoding='utf8')
(json_fh, align_fh) = open_output_files(args)
amrs_same_sent = []
cur_id = ""
while True:
(amr_line, comments) = amr_metadata.get_amr_line(infile)
cur_amr = None
if amr_line:
cur_amr = amr_metadata.AmrMeta.from_parse(amr_line, comments)
get_sent_info(cur_amr.metadata)
if 'annotator' not in cur_amr.metadata:
cur_amr.metadata['annotator'] = ''
if not cur_id:
cur_id = cur_amr.metadata['id']
if cur_amr is None or cur_id != cur_amr.metadata['id']:
gold_amr = amrs_same_sent[0]
test_amrs = amrs_same_sent[1:]
if len(test_amrs) == 0:
test_amrs = [gold_amr] # single AMR view case
args.num_restarts = 1 # TODO make single AMR view more efficient
(amr_graphs, smatchgraphs) = hilight_disagreement(test_amrs, gold_amr, args.num_restarts)
gold_anno = gold_amr.metadata['annotator']
sent = gold_amr.metadata['tok']
if (args.verbose):
print("ID: %s\n Sentence: %s\n gold anno: %s" % (cur_id, sent, gold_anno))
for (a, (g, score)) in zip(test_amrs, amr_graphs):
test_anno = a.metadata['annotator']
if json_fh:
json_fh.write(json.dumps(g) + '\n')
if align_fh:
for sg in smatchgraphs:
align_fh.write("""# ::id %s\n# ::tok %s\n# ::gold_anno %s\n# ::test_anno %s""" % \
(cur_id, sent, gold_anno, test_anno))
align_fh.write('\n'.join(sg.get_text_alignments()) + '\n\n')
if (args.verbose):
print(" annotator %s score: %d" % (test_anno, score))
ag = nx.to_agraph(g)
ag.graph_attr['label'] = sent
ag.layout(prog=args.layout)
ag.draw('%s/%s_annotated_%s_%s.png' % (args.outdir, cur_id, gold_anno, test_anno))
amrs_same_sent = []
if cur_amr is not None:
cur_id = cur_amr.metadata['id']
else:
break
amrs_same_sent.append(cur_amr)
infile.close()
gold_aligned_fh and gold_aligned_fh.close()
close_output_files(json_fh, align_fh)
def xlang_main(args):
""" Disagreement graphs for aligned cross-language language. """
src_amr_fh = codecs.open(args.src_amr, encoding='utf8')
tgt_amr_fh = codecs.open(args.tgt_amr, encoding='utf8')
gold_aligned_fh = None
if args.align_in:
gold_aligned_fh = codecs.open(args.align_in, encoding='utf8')
(json_fh, align_fh) = open_output_files(args)
amrs_same_sent = []
aligner = Amr2AmrAligner(num_best=args.num_align_read, num_best_in_file=args.num_aligned_in_file)
while True:
(src_amr_line, src_comments) = amr_metadata.get_amr_line(src_amr_fh)
if src_amr_line == "":
break
(tgt_amr_line, tgt_comments) = amr_metadata.get_amr_line(tgt_amr_fh)
src_amr = amr_metadata.AmrMeta.from_parse(src_amr_line, src_comments, xlang=True)
tgt_amr = amr_metadata.AmrMeta.from_parse(tgt_amr_line, tgt_comments, xlang=True)
(cur_id, src_sent) = get_sent_info(src_amr.metadata)
(tgt_id, tgt_sent) = get_sent_info(tgt_amr.metadata, dflt_id=cur_id)
assert cur_id == tgt_id
(amr_graphs, smatchgraphs) = hilight_disagreement([tgt_amr], src_amr, args.num_restarts, aligner=aligner, gold_aligned_fh=gold_aligned_fh)
if json_fh:
json_fh.write(json.dumps(amr_graphs[0]) + '\n')
if align_fh:
align_fh.write("""# ::id %s\n# ::src_snt %s\n# ::tgt_snt %s\n""" % (cur_id, src_sent, tgt_sent))
align_fh.write('\n'.join(smatchgraphs[0].get_text_alignments()) + '\n\n')
if (args.verbose):
print("ID: %s\n Sentence: %s\n Sentence: %s\n Score: %f" % (cur_id, src_sent, tgt_sent, amr_graphs[0][1]))
#raw_input("Press enter to continue: ")
ag = nx.to_agraph(amr_graphs[0][0])
ag.graph_attr['label'] = "%s\n%s" % (src_sent, tgt_sent)
ag.layout(prog=args.layout)
ag.draw('%s/%s.png' % (args.outdir, cur_id))
src_amr_fh.close()
tgt_amr_fh.close()
gold_aligned_fh and gold_aligned_fh.close()
close_output_files(json_fh, align_fh)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("-c", "--conf_file", help="Specify config file")
parser.add_argument('-i', '--infile', help='amr input file')
parser.add_argument('-o', '--outdir', help='image output directory')
parser.add_argument('-v', '--verbose', action='store_true')
parser.add_argument('-s', '--src_amr',
help='In bitext mode, source language AMR file.')
parser.add_argument('-t', '--tgt_amr',
help='In bitext mode, target language AMR file.')
parser.add_argument('--align_src2tgt',
help='In bitext mode, GIZA alignment .NBEST file (see GIZA++ -nbestalignments opt) with source as vcb1.')
parser.add_argument('--align_tgt2src',
help='In bitext mode, GIZA alignment .NBEST file (see GIZA++ -nbestalignments opt) with target as vcb1.')
parser.add_argument('--num_align_read', type=int,
help='N to read from GIZA NBEST file.')
parser.add_argument('--num_aligned_in_file', type=int, default=1,
help='N printed to GIZA NBEST file.')
parser.add_argument('-j', '--json_out',
help='File to dump json graphs to.')
parser.add_argument('--num_restarts', type=int, default=5,
help='Number of random restarts to execute during hill-climbing algorithm.')
parser.add_argument('--align_out',
help="Human-readable alignments output file")
parser.add_argument('--align_in',
help="Alignments from human-editable text file, as from align_out")
parser.add_argument('--layout', default='dot',
help='Graphviz output layout')
# TODO make interactive option and option to process a specific range
args_conf = parser.parse_args()
if args_conf.conf_file:
argparse_config.read_config_file(parser, args_conf.conf_file)
args = parser.parse_args()
if args.no_verbose:
args.verbose = False
if not args.num_align_read:
args.num_align_read = args.num_aligned_in_file
if not os.path.exists(args.outdir):
os.makedirs(args.outdir)
xlang_main(args)
\ No newline at end of file
argparse
rdflib
numpy
#!/usr/bin/env python
"""
smatch-table.py
This file is from the code for smatch, available at:
http://amr.isi.edu/download/smatch-v1.0.tar.gz
http://amr.isi.edu/smatch-13.pdf
"""
import sys
import subprocess
from smatch import amr
from smatch import smatch
import os
import random
import time
from compare_smatch import amr_metadata
#import optparse
# import argparse #argparse only works for python 2.7. If you are using older versin of Python, you can use optparse instead.
#import locale
verbose = False # global variable, verbose output control
single_score = True # global variable, single score output control
pr_flag = False # global variable, output precision and recall
ERROR_LOG = sys.stderr
match_num_dict = {} # key: match number tuples value: the matching number
isi_dir_pre = "/nfs/web/isi.edu/cgi-bin/div3/mt/save-amr"
def build_arg_parser():
"""Build an argument parser using argparse"""
parser = argparse.ArgumentParser(
description="Smatch calculator -- arguments")
parser.add_argument(
'-f',
nargs=2,
required=True,
type=argparse.FileType('r'),
help='Two files containing AMR pairs. AMRs in each file are separated by a single blank line')
parser.add_argument(
'-o',
'--outfile',
help='Output')
parser.add_argument(
'-r',
type=int,
default=4,
help='Restart number (Default:4)')
parser.add_argument(
'-v',
action='store_true',
help='Verbose output (Default:False)')
parser.add_argument(
'--ms',
action='store_true',
default=False,
help='Output multiple scores (one AMR pair a score) instead of a single document-level smatch score (Default: False)')
parser.add_argument(
'--pr',
action='store_true',
default=False,
help="Output precision and recall as well as the f-score. Default: false")
return parser
def build_arg_parser2():
"""Build an argument parser using optparse"""
usage_str = "Smatch calculator -- arguments"
parser = optparse.OptionParser(usage=usage_str)
#parser.add_option("-h","--help",action="help",help="Smatch calculator -- arguments")
parser.add_option(
"-f",
"--files",
nargs=2,
dest="f",
type="string",
help='Two files containing AMR pairs. AMRs in each file are separated by a single blank line. This option is required.')
parser.add_option(
"-o",
"--outfile",
nargs=1,
dest="o",
type="string",
help='Output file.')
parser.add_option(
"-r",
"--restart",
dest="r",
type="int",
help='Restart number (Default: 4)')
parser.add_option(
"-v",
"--verbose",
action='store_true',
dest="v",
help='Verbose output (Default:False)')
parser.add_option(
"--ms",
"--multiple_score",
action='store_true',
dest="ms",
help='Output multiple scores (one AMR pair a score) instead of a single document-level smatch score (Default: False)')
parser.add_option(
'--pr',
"--precision_recall",
action='store_true',
dest="pr",
help="Output precision and recall as well as the f-score. Default: false")
parser.set_defaults(r=4, v=False, ms=False, pr=False)
return parser
def main(args):
"""Main function of the smatch calculation program"""
global verbose
global iter_num
global single_score
global pr_flag
global match_num_dict
# set the restart number
iter_num = args.r + 1
verbose = False
if args.ms:
single_score = False
if args.v:
verbose = True
if args.pr:
pr_flag = True
total_match_num = 0
total_test_num = 0
total_gold_num = 0
sent_num = 1
prev_amr1 = ""
outfile = open(args.outfile, 'w')
if not single_score:
outfile.write("Sentence\tText")
if pr_flag:
outfile.write("\tPrecision\tRecall")
outfile.write("\tSmatch\n")
while True:
cur_amr1 = smatch.get_amr_line(args.f[0])
(cur_amr2, comments) = amr_metadata.get_amr_line(args.f[1])
if cur_amr1 == "" and cur_amr2 == "":
break
if(cur_amr1 == ""):
# GULLY CHANGED THIS.
# IF WE RUN OUT OF AVAILABLE AMRS FROM FILE 1,
# REUSE THE LAST AVAILABLE AMR
cur_amr1 = prev_amr1
#print >> sys.stderr, "Error: File 1 has less AMRs than file 2"
#print >> sys.stderr, "Ignoring remaining AMRs"
#break
# print >> sys.stderr, "AMR 1 is empty"
# continue
if(cur_amr2 == ""):
print >> sys.stderr, "Error: File 2 has less AMRs than file 1"
print >> sys.stderr, "Ignoring remaining AMRs"
break
# print >> sys.stderr, "AMR 2 is empty"
# continue
prev_amr1 = cur_amr1
amr1 = amr.AMR.parse_AMR_line(cur_amr1)
amr2 = amr.AMR.parse_AMR_line(cur_amr2)
# We were getting screwy SMATCH scores from
# using the amr_metadata construct
meta_enabled_amr = amr_metadata.AmrMeta.from_parse(cur_amr2, comments)
test_label = "a"
gold_label = "b"
amr1.rename_node(test_label)
amr2.rename_node(gold_label)
(test_inst, test_rel1, test_rel2) = amr1.get_triples2()
(gold_inst, gold_rel1, gold_rel2) = amr2.get_triples2()
if verbose:
print "AMR pair", sent_num
print >> sys.stderr, "Instance triples of AMR 1:", len(test_inst)
print >> sys.stderr, test_inst
# print >> sys.stderr,"Relation triples of AMR 1:",len(test_rel)
print >> sys.stderr, "Relation triples of AMR 1:", len(test_rel1) + len(test_rel2)
print >>sys.stderr, test_rel1
print >> sys.stderr, test_rel2
# print >> sys.stderr, test_rel
print >> sys.stderr, "Instance triples of AMR 2:", len(gold_inst)
print >> sys.stderr, gold_inst
# print >> sys.stderr,"Relation triples of file 2:",len(gold_rel)
print >> sys.stderr, "Relation triples of AMR 2:", len(
gold_rel1) + len(gold_rel2)
#print >> sys.stderr,"Relation triples of file 2:",len(gold_rel1)+len(gold_rel2)
print >> sys.stderr, gold_rel1
print >> sys.stderr, gold_rel2
# print >> sys.stderr, gold_rel
if len(test_inst) < len(gold_inst):
(best_match,
best_match_num) = smatch.get_fh(test_inst,
test_rel1,
test_rel2,
gold_inst,
gold_rel1,
gold_rel2,
test_label,
gold_label)
if verbose:
print >> sys.stderr, "AMR pair ", sent_num
print >> sys.stderr, "best match number", best_match_num
print >> sys.stderr, "best match", best_match
else:
(best_match,
best_match_num) = smatch.get_fh(gold_inst,
gold_rel1,
gold_rel2,
test_inst,
test_rel1,
test_rel2,
gold_label,
test_label)
if verbose:
print >> sys.stderr, "Sent ", sent_num
print >> sys.stderr, "best match number", best_match_num
print >> sys.stderr, "best match", best_match
if not single_score:
#(precision,
# recall,
# best_f_score) = smatch.compute_f(best_match_num,
# len(test_rel1) + len(test_inst) + len(test_rel2),
# len(gold_rel1) + len(gold_inst) + len(gold_rel2))
outfile.write( str(meta_enabled_amr.metadata.get("tok", None)) )
#if pr_flag:
# outfile.write( "\t%.2f" % precision )
# outfile.write( "\t%.2f" % recall )
#outfile.write( "\t%.2f" % best_f_score )
print sent_num
outfile.write( "\n" )
total_match_num += best_match_num
total_test_num += len(test_rel1) + len(test_rel2) + len(test_inst)
total_gold_num += len(gold_rel1) + len(gold_rel2) + len(gold_inst)
match_num_dict.clear()
sent_num += 1 # print "F-score:",best_f_score
if verbose:
print >> sys.stderr, "Total match num"
print >> sys.stderr, total_match_num, total_test_num, total_gold_num
if single_score:
(precision, recall, best_f_score) = smatch.compute_f(
total_match_num, total_test_num, total_gold_num)
if pr_flag:
print "Precision: %.2f" % precision
print "Recall: %.2f" % recall
print "Document F-score: %.2f" % best_f_score
args.f[0].close()
args.f[1].close()
outfile.close()
if __name__ == "__main__":
parser = None
args = None
if sys.version_info[:2] != (2, 7):
if sys.version_info[0] != 2 or sys.version_info[1] < 5:
print >> ERROR_LOG, "Smatch only supports python 2.5 or later"
exit(1)
import optparse
if len(sys.argv) == 1:
print >> ERROR_LOG, "No argument given. Please run smatch.py -h to see the argument descriptions."
exit(1)
# requires version >=2.3!
parser = build_arg_parser2()
(args, opts) = parser.parse_args()
# handling file errors
# if not len(args.f)<2:
# print >> ERROR_LOG,"File number given is less than 2"
# exit(1)
file_handle = []
if args.f is None:
print >> ERROR_LOG, "smatch.py requires -f option to indicate two files containing AMR as input. Please run smatch.py -h to see the argument descriptions."
exit(1)
if not os.path.exists(args.f[0]):
print >> ERROR_LOG, "Given file", args.f[0], "does not exist"
exit(1)
else:
file_handle.append(codecs.open(args.f[0], encoding='utf8'))
if not os.path.exists(args.f[1]):
print >> ERROR_LOG, "Given file", args.f[1], "does not exist"
exit(1)
else:
file_handle.append(codecs.open(args.f[1], encoding='utf8'))
args.f = tuple(file_handle)
else: # version 2.7
import argparse
parser = build_arg_parser()
args = parser.parse_args()
main(args)
There is also a pdf version of this documentation: smatch_guide.pdf (with the same content but in the same directory.
Smatch Tool Guideline
Shu Cai 03/20/2013
Smatch is a tool to evaluate the semantic overlap between semantic feature structures. It can be used to compute the inter agreements of AMRs, and the agreement between an automatic-generated AMR and a gold AMR. For multiple AMR pairs, the smatch tool can provide a weighted, overall score for all the AMR pairs.
I. Content and web demo pages
The directory contains the Smatch code (mostly Python and some Perl) as well as a guide for Smatch.
Smatch Webpages
Smatch tool webpage: http://amr.isi.edu/eval/smatch/compare.html (A quick tutorial can be found on the page)
- input: two AMRs.
- output: the smatch score and the matching/unmatching triples.
Smatch table tool webpage: http://amr.isi.edu/eval/smatch/table.html
- input: AMR IDs and users.
- output: a table which consists of the smatch scores of every pair of users.
II. Installation
Python (version 2.5 or later) is required to run smatch tool. Python 2.7 is recommended. No compilation is necessary.
If a user wants to run smatch tool outside the current locations, they can just copy the whole directory. Running the latest smatch tools requires the following files: amr.py (a library called by smatch.py), smatch.py, smatch-table.py. Running the old versions of smatch requires Perl installed, and
esem-format-check.pl,smatch-v0.x.py (x<5), smatch-table-v0.x.py (x<3).
III. Usage
Smatch tool consists of two program written in python.
1. smatch.py: for computing the smatch score(s) for multiple AMRs created by two different groups.
Input: two files which contain AMRs. Each file may contain multiple AMRs, and every two AMRs are separated by a blank line. AMRs can be one-per-line or have multiple lines, as long as there is no blank line in one AMR.
Input file format: see test_input1.txt, test_input2.txt in the smatch tool folder. AMRs are separated by one or more blank lines, so no blank lines are allowed inside an AMR. Lines starting with a hash (#) will be ignored.
Output: Smatch score(s) computed
Usage: python smatch.py [-h] -f F F [-r R] [-v] [-ms]
arguments:
-h: help
-f: two files which contain multiple AMRs. A blank line is used to separate two AMRs. Required arguments.
-r: restart numer of the heuristic search during computation, optional. Default value: 4. This argument must be a positive integer. Large restart number will reduce the chance of search error, but also increase the running time. Small restart number will reduce the running time as well as increase the change of search error. The default value is by far the best trade-off. User can set a large number if the AMR length is long (search space is large) and user does not need very high speed.
-v: verbose output, optional. Default value: false. The verbose information includes the triples of each AMR, the matching triple number found for each iterations, and the best matching triple number. It is useful when you try to understand how the program works. User will not need this option most of the time.
--ms: multiple score, optional. Adding this option will result in a single smatch score for each AMR pair. Otherwise it will output one single weighted score based on all pairs of AMRs. AMRs are weighted according to their number of triples.
Default value: false
--pr: Output precision and recall as well as the f-score. Default:false
A typical (and most common) example of running smatch.py:
python smatch.py -f test_input1.txt test_input2.txt
The release includes sample files test_input1.txt and test_input2.txt, so you should be able to run the above command as is. The above command should about the following line:
Document F-score: 0.81
2. smatch-table.py: it calls the smatch library to compute the smatch scores for a group of users and multiple AMR IDs, and output a table to show the AMR score between each pair of users.
Input: AMR ID list and User list. AMR ID list can be stored in a file (-fl file) or given by the command line (-f AMR_ID1, AMR_ID2,...). User list are given by the command line (-p user1,user2,..). If no users are given, the program searches for all the users who annotates all AMRs we require. The user number should be at least 2.
Input file format: AMR ID list (see sample_file_list the smatch tool folder)
Output: A table which shows the overall AMR score between every pair of users.
Usage: python smatch-table.py [-h] [--fl FL] [-f F [F ...]] [-p [P [P ...]]]
[--fd FD] [-r R] [-v]
optional arguments:
-h, --help show this help message and exit
--fl FL AMR ID list file (a file which contains one line of AMR IDs, separated by blank space)
-f F [F ...] AMR IDs (at least one). If we already have valid AMR ID list file, this option will be ignored.
-p [P [P ...]] User list (It can be unspecified. When the list is none, the program searches for all the users who annotates all AMRs we require) It is meaningless to give only one user since smatch-table computes agreement between each pair of users. So the number of P is at least 2.
--fd FD AMR File directory. Default=location on isi file system
-r R Restart number (Default:4), same as the -r option in smatch.py
-v Verbose output (Default:False), same as the -v option in smatch.py
A typical example of running smatch-table.py:
python smatch-table.py --fd $amr_root_dir --fl sample_file_list -p ulf knight
which will compare files
$amr_root_dir/ulf/nw_wsj_0001_1.txt $amr_root_dir/knight/nw_wsj_0001_1.txt
$amr_root_dir/ulf/nw_wsj_0001_2.txt $amr_root_dir/knight/nw_wsj_0001_2.txt
etc.
Note: smatch-table.py computes smatch scores for every pair of users, so its speed can be slow when the number of user is large or when -P option is not set (in this case we compute smatch scores for all users who annotates the AMRs we require).
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
from rdflib import Graph, URIRef, Literal
from rdflib.namespace import RDFS, XSD
g = Graph()
semweb = URIRef('http://dbpedia.org/resource/Semantic_Web')
type = g.value(semweb, RDFS.label)
[
{
"root": {
"domain": {
"op1": {
"@id": "g",
"@type": "gene",
"name": {
"@id": "n",
"@type": "name",
"op1": "KRAS"
}
},
"@id": "a",
"@type": "and",
"op2": {
"@id": "g2",
"@type": "gene",
"name": {
"@id": "n2",
"@type": "name",
"op1": "PIK3CA"
}
},
"op3": {
"@id": "g3",
"@type": "gene",
"name": {
"@id": "n3",
"@type": "name",
"op1": "BRAF"
}
}
},
"@id": "o",
"@type": "oncogene",
"location": {
"@id": "c",
"@type": "cancer"
}
},
"has-date": "2014-08-13T14:22:25",
"@context": {
"and": "http://amr.isi.edu/rdf/core-amr#and",
"oncogene": "http://amr.isi.edu/rdf/core-amr#oncogene",
"kill-01": "https://verbs.colorado.edu/propbank#kill-01",
"name": "http://amr.isi.edu/rdf/core-amr#name",
"cancer": "http://amr.isi.edu/rdf/core-amr#cancer",
"most": "http://amr.isi.edu/rdf/core-amr#most",
"@base": "http://amr.isi.edu/amr_data/bio.ras_0001_1#",
"human": "http://amr.isi.edu/rdf/core-amr#human",
"gene": "http://amr.isi.edu/entity-types#gene",
"root": "http://amr.isi.edu/rdf/core-amr#root",
"frequent": "http://amr.isi.edu/rdf/core-amr#frequent",
"mutate-01": "https://verbs.colorado.edu/propbank#mutate-01"
},
"@id": "bio.ras_0001_1",
"@type": "http://amr.isi.edu/rdf/core-amr#AMR",
"has-sentence": "The most frequently mutated oncogenes in the deadliest cancers responsible for human mortality are KRAS , PIK3CA and BRAF ."
}
]
\ No newline at end of file
<http://amr.isi.edu/amr_data/bio.ras_0001_1#k> <http://amr.isi.edu/rdf/core-amr#ARG1> <http://amr.isi.edu/amr_data/bio.ras_0001_1#h> .
<http://amr.isi.edu/amr_data/bio.ras_0001_1#root01> <http://amr.isi.edu/rdf/core-amr#has-sentence> "The most frequently mutated oncogenes in the deadliest cancers responsible for human mortality are KRAS , PIK3CA and BRAF ." .
<http://amr.isi.edu/amr_data/bio.ras_0001_1#f> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://amr.isi.edu/rdf/core-amr#frequent> .
<http://amr.isi.edu/amr_data/bio.ras_0001_1#n> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://amr.isi.edu/rdf/core-amr#name> .
<http://amr.isi.edu/amr_data/bio.ras_0001_1#g2> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://amr.isi.edu/entity-types#gene> .
<http://amr.isi.edu/amr_data/bio.ras_0001_1#a> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://amr.isi.edu/rdf/core-amr#and> .
<http://amr.isi.edu/amr_data/bio.ras_0001_1#o> <http://amr.isi.edu/rdf/core-amr#location> <http://amr.isi.edu/amr_data/bio.ras_0001_1#c> .
<http://amr.isi.edu/amr_data/bio.ras_0001_1#root01> <http://amr.isi.edu/rdf/core-amr#has-id> "bio.ras_0001_1" .
<http://amr.isi.edu/amr_data/bio.ras_0001_1#m> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://amr.isi.edu/rdf/core-amr#most> .
<http://amr.isi.edu/amr_data/bio.ras_0001_1#f> <http://amr.isi.edu/rdf/core-amr#degree> <http://amr.isi.edu/amr_data/bio.ras_0001_1#m3> .
<http://amr.isi.edu/amr_data/bio.ras_0001_1#n2> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://amr.isi.edu/rdf/core-amr#name> .
<http://amr.isi.edu/amr_data/bio.ras_0001_1#n3> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://amr.isi.edu/rdf/core-amr#name> .
<http://amr.isi.edu/amr_data/bio.ras_0001_1#root01> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://amr.isi.edu/rdf/core-amr#AMR> .
<http://amr.isi.edu/amr_data/bio.ras_0001_1#a> <http://amr.isi.edu/rdf/core-amr#op3> <http://amr.isi.edu/amr_data/bio.ras_0001_1#g3> .
<http://amr.isi.edu/amr_data/bio.ras_0001_1#k> <http://amr.isi.edu/rdf/core-amr#ARG0> <http://amr.isi.edu/amr_data/bio.ras_0001_1#c> .
<http://amr.isi.edu/amr_data/bio.ras_0001_1#a> <http://amr.isi.edu/rdf/core-amr#op1> <http://amr.isi.edu/amr_data/bio.ras_0001_1#g> .
<http://amr.isi.edu/amr_data/bio.ras_0001_1#n2> <http://amr.isi.edu/rdf/core-amr#op1> "PIK3CA" .
<http://amr.isi.edu/amr_data/bio.ras_0001_1#g> <http://amr.isi.edu/rdf/core-amr#name> <http://amr.isi.edu/amr_data/bio.ras_0001_1#n> .
<http://amr.isi.edu/amr_data/bio.ras_0001_1#m3> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://amr.isi.edu/rdf/core-amr#most> .
<http://amr.isi.edu/amr_data/bio.ras_0001_1#o> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://amr.isi.edu/rdf/core-amr#oncogene> .
<http://amr.isi.edu/amr_data/bio.ras_0001_1#m2> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://verbs.colorado.edu/propbank#mutate-01> .
<http://amr.isi.edu/amr_data/bio.ras_0001_1#k> <http://amr.isi.edu/rdf/core-amr#degree> <http://amr.isi.edu/amr_data/bio.ras_0001_1#m> .
<http://amr.isi.edu/amr_data/bio.ras_0001_1#m2> <http://amr.isi.edu/rdf/core-amr#frequency> <http://amr.isi.edu/amr_data/bio.ras_0001_1#f> .
<http://amr.isi.edu/amr_data/bio.ras_0001_1#k> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://verbs.colorado.edu/propbank#kill-01> .
<http://amr.isi.edu/amr_data/bio.ras_0001_1#h> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://amr.isi.edu/rdf/core-amr#human> .
<http://amr.isi.edu/amr_data/bio.ras_0001_1#g3> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://amr.isi.edu/entity-types#gene> .
<http://amr.isi.edu/amr_data/bio.ras_0001_1#root01> <http://amr.isi.edu/rdf/core-amr#root> <http://amr.isi.edu/amr_data/bio.ras_0001_1#o> .
<http://amr.isi.edu/amr_data/bio.ras_0001_1#c> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://amr.isi.edu/rdf/core-amr#cancer> .
<http://amr.isi.edu/amr_data/bio.ras_0001_1#n3> <http://amr.isi.edu/rdf/core-amr#op1> "BRAF" .
<http://amr.isi.edu/amr_data/bio.ras_0001_1#o> <http://amr.isi.edu/rdf/core-amr#TOP> <http://amr.isi.edu/amr_data/bio.ras_0001_1#o> .
<http://amr.isi.edu/amr_data/bio.ras_0001_1#o> <http://amr.isi.edu/rdf/core-amr#domain> <http://amr.isi.edu/amr_data/bio.ras_0001_1#a> .
<http://amr.isi.edu/amr_data/bio.ras_0001_1#g2> <http://amr.isi.edu/rdf/core-amr#name> <http://amr.isi.edu/amr_data/bio.ras_0001_1#n2> .
<http://amr.isi.edu/amr_data/bio.ras_0001_1#g> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://amr.isi.edu/entity-types#gene> .
<http://amr.isi.edu/amr_data/bio.ras_0001_1#n> <http://amr.isi.edu/rdf/core-amr#op1> "KRAS" .
<http://amr.isi.edu/amr_data/bio.ras_0001_1#m2> <http://amr.isi.edu/rdf/core-amr#ARG1> <http://amr.isi.edu/amr_data/bio.ras_0001_1#o> .
<http://amr.isi.edu/amr_data/bio.ras_0001_1#g3> <http://amr.isi.edu/rdf/core-amr#name> <http://amr.isi.edu/amr_data/bio.ras_0001_1#n3> .
<http://amr.isi.edu/amr_data/bio.ras_0001_1#root01> <http://amr.isi.edu/rdf/core-amr#has-date> "2014-08-13T14:22:25" .
<http://amr.isi.edu/amr_data/bio.ras_0001_1#a> <http://amr.isi.edu/rdf/core-amr#op2> <http://amr.isi.edu/amr_data/bio.ras_0001_1#g2> .
# ::id bio.ras_0001_1 ::date 2014-08-13T14:22:25
# ::snt The most frequently mutated oncogenes in the deadliest cancers responsible for human mortality are KRAS , PIK3CA and BRAF .
(o / oncogene
:domain (a / and
:op1 (g / gene :name (n / name :op1 "KRAS"))
:op2 (g2 / gene :name (n2 / name :op1 "PIK3CA"))
:op3 (g3 / gene :name (n3 / name :op1 "BRAF")))
:location (c / cancer
:ARG0-of (k / kill-01
:ARG1 (h / human)
:degree (m / most)))
:ARG1-of (m2 / mutate-01
:frequency (f / frequent
:degree (m3 / most))))
@prefix ns1: <http://amr.isi.edu/rdf/core-amr#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
<http://amr.isi.edu/amr_data/pmid_1177_7939.32#root01> a ns1:AMR ;
ns1:has-date "2015-02-27T00:14:25" ;
ns1:has-id "pmid_1177_7939.32" ;
ns1:has-sentence "In previous studies, we showed that Sos-1, E3b1, and Eps8 could form a trimeric complex in vivo upon concomitant overexpression of the three proteins." ;
ns1:root <http://amr.isi.edu/amr_data/pmid_1177_7939.32#s> .
<http://amr.isi.edu/amr_data/pmid_1177_7939.32#c> a ns1:concomitant .
<http://amr.isi.edu/amr_data/pmid_1177_7939.32#e2> a <http://amr.isi.edu/entity-types#enzyme> ;
ns1:name <http://amr.isi.edu/amr_data/pmid_1177_7939.32#n3> .
<http://amr.isi.edu/amr_data/pmid_1177_7939.32#f> a <https://verbs.colorado.edu/propbank#form-01> ;
ns1:ARG0 <http://amr.isi.edu/amr_data/pmid_1177_7939.32#a> ;
ns1:ARG1 <http://amr.isi.edu/amr_data/pmid_1177_7939.32#m> ;
ns1:condition <http://amr.isi.edu/amr_data/pmid_1177_7939.32#o> ;
ns1:manner <http://amr.isi.edu/amr_data/pmid_1177_7939.32#i> ;
ns1:mod <http://amr.isi.edu/amr_data/pmid_1177_7939.32#p> .
<http://amr.isi.edu/amr_data/pmid_1177_7939.32#i> a ns1:in-vivo .
<http://amr.isi.edu/amr_data/pmid_1177_7939.32#m> a <http://amr.isi.edu/entity-types#macro-molecular-complex> ;
ns1:mod <http://amr.isi.edu/amr_data/pmid_1177_7939.32#t> .
<http://amr.isi.edu/amr_data/pmid_1177_7939.32#n> a ns1:name ;
ns1:op1 "Sos-1" .
<http://amr.isi.edu/amr_data/pmid_1177_7939.32#n2> a ns1:name ;
ns1:op1 "E3b1" .
<http://amr.isi.edu/amr_data/pmid_1177_7939.32#n3> a ns1:name ;
ns1:op1 "Eps8" .
<http://amr.isi.edu/amr_data/pmid_1177_7939.32#o> a <https://verbs.colorado.edu/propbank#overexpress-00> ;
ns1:ARG2 <http://amr.isi.edu/amr_data/pmid_1177_7939.32#a> ;
ns1:manner <http://amr.isi.edu/amr_data/pmid_1177_7939.32#c> .
<http://amr.isi.edu/amr_data/pmid_1177_7939.32#p> a ns1:possible .
<http://amr.isi.edu/amr_data/pmid_1177_7939.32#p2> a <http://amr.isi.edu/entity-types#protein> ;
ns1:name <http://amr.isi.edu/amr_data/pmid_1177_7939.32#n> .
<http://amr.isi.edu/amr_data/pmid_1177_7939.32#p3> a <http://amr.isi.edu/entity-types#protein> ;
ns1:name <http://amr.isi.edu/amr_data/pmid_1177_7939.32#n2> .
<http://amr.isi.edu/amr_data/pmid_1177_7939.32#p5> a ns1:previous .
<http://amr.isi.edu/amr_data/pmid_1177_7939.32#s2> a ns1:study ;
ns1:time <http://amr.isi.edu/amr_data/pmid_1177_7939.32#p5> .
<http://amr.isi.edu/amr_data/pmid_1177_7939.32#t> a ns1:trimeric .
<http://amr.isi.edu/amr_data/pmid_1177_7939.32#w> a ns1:we .
<http://amr.isi.edu/amr_data/pmid_1177_7939.32#a> a ns1:and ;
ns1:op1 <http://amr.isi.edu/amr_data/pmid_1177_7939.32#p2> ;
ns1:op2 <http://amr.isi.edu/amr_data/pmid_1177_7939.32#p3> ;
ns1:op3 <http://amr.isi.edu/amr_data/pmid_1177_7939.32#e2> .
<http://amr.isi.edu/amr_data/pmid_1177_7939.32#s> a <https://verbs.colorado.edu/propbank#show-01> ;
ns1:ARG0 <http://amr.isi.edu/amr_data/pmid_1177_7939.32#w> ;
ns1:ARG1 <http://amr.isi.edu/amr_data/pmid_1177_7939.32#f> ;
ns1:TOP <http://amr.isi.edu/amr_data/pmid_1177_7939.32#s> ;
ns1:medium <http://amr.isi.edu/amr_data/pmid_1177_7939.32#s2> .
# ::id pmid_1177_7939.32 ::date 2015-02-27T00:14:25 ::authors mrizea
# ::snt In previous studies, we showed that Sos-1, E3b1, and Eps8 could form a trimeric complex in vivo upon concomitant overexpression of the three proteins.
# ::note Sentence+ loaded by script SntLoaderUlf1.7.pl
# ::save-date Wed Apr 1, 2015 ::user bbadarau ::file pmid_1177_7939_32.txt
(s / show-01
:ARG0 (w / we)
:ARG1 (f / form-01
:ARG0 (a / and
:op1 (p2 / protein :name (n / name :op1 "Sos-1"))
:op2 (p3 / protein :name (n2 / name :op1 "E3b1"))
:op3 (e2 / enzyme :name (n3 / name :op1 "Eps8")))
:ARG1 (m / macro-molecular-complex
:mod (t / trimeric))
:mod (p / possible)
:manner (i / in-vivo)
:condition (o / overexpress-00
:ARG2 a
:manner (c / concomitant)))
:medium (s2 / study
:time (p5 / previous)))
\ No newline at end of file
# ::id test-1
# ::snt The sun is a star.
(s / star
:domain (s2 / sun))
# ::id test-2
# ::snt Earth is a planet.
(p / planet
:domain p
:name (n / name
:op1 "Earth"))
This diff is collapsed.
# ::id test-1
# ::snt The sun is a star.
(s / star
:domain (s2 / sun))
# ::id test-2
# ::snt Earth is a planet.
(p / planet
:domain p
:name (n / name
:op1 "Earth"))
UNIPROT http://www.uniprot.org/uniprot/
PUBCHEM https://pubchem.ncbi.nlm.nih.gov/compound/
GO http://amigo.geneontology.org/amigo/term/GO:
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment