Skip to content
Snippets Groups Projects
Commit d90bb4a4 authored by Malo Revel's avatar Malo Revel
Browse files

Add stats script

parent 05c9704c
Branches
No related tags found
No related merge requests found
stats.py 0 → 100644
# Computes some stats about a corpus
import sys
import os
def check_and_incr(text, token, dic):
if token in text:
if token not in dic: dic[token] = 0
dic[token] += 1
class Stats:
""" Computes statistics about a corpus, such as the number of modalities and the number of actions """
modalities = ["odrl:permission", "odrl:obligation", "odrl:prohibition"]
actions = ["cc:Distribution", "odrl:play", "odrl:run", "cc:Reproduction",
"odrl:use", "odrl:display", "odrl:copy", "odrl:sell", "odrl:modify", "odrl:derive"]
def __init__(self):
self.nb_mods = {}
self.nb_actions = {}
self.nb_ands = 0
self.nb_nots = 0
def count_modalities(self, text):
for mod in self.modalities:
check_and_incr(text, mod, self.nb_mods)
def count_actions(self, text):
for act in self.actions:
check_and_incr(text, act, self.nb_actions)
def count_and(self, text):
count_and = {" and ": 0}
check_and_incr(text, " and ", count_and)
self.nb_ands += count_and[" and "]
def count_not(self, text):
count_not = {" not ": 0, " cannot ": 0}
check_and_incr(text, " not ", count_not)
check_and_incr(text, " cannot ", count_not)
self.nb_nots += max(count_not[" not "], count_not[" cannot "])
def __str__(self):
s = "Number of modalities:\n"
for uri in self.nb_mods:
s += f"\t{uri}: {self.nb_mods[uri]}\n"
s += "Number of actions:\n"
for uri in self.nb_actions:
s += f"\t{uri}: {self.nb_actions[uri]}\n"
s += f"Number of and: {self.nb_ands}\nNumber of not: {self.nb_nots}\n"
return s
def main_stats(path_sentences, path_odrl):
stats = Stats()
for fname in os.listdir(path_odrl):
if fname.endswith(".ttl"):
with open(path_odrl + fname, "r") as fodrl:
odrl_text = fodrl.read()
stats.count_modalities(odrl_text)
stats.count_actions(odrl_text)
for fname in os.listdir(path_sentences):
if fname.endswith(".txt"):
with open(path_sentences + fname, "r") as ftext:
sent_text = ftext.read()
stats.count_and(sent_text)
stats.count_not(sent_text)
return stats
if __name__ == "__main__":
if len(sys.argv) < 3:
print(f"Usage: python3 {sys.argv[0]} <path_corpus_sentences> <path_corpus_odrl>")
exit(1)
path_sentences = sys.argv[1]
path_odrl = sys.argv[2]
stats = main_stats(path_sentences, path_odrl)
print(stats)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment