Skip to content
Snippets Groups Projects
Commit 90130c9b authored by David Rouquet's avatar David Rouquet
Browse files

Merge branch '23-parse-activities' into 'main'

Resolve "Parseur par type d'activité"

Closes #23

See merge request !5
parents 0cbb82c2 b28f7da6
No related branches found
No related tags found
1 merge request!5Resolve "Parseur par type d'activité"
Showing
with 4438 additions and 172 deletions
...@@ -2,17 +2,24 @@ ...@@ -2,17 +2,24 @@
# Default value for version env var # Default value for version env var
if [[ -z "$VERSION" ]]; then if [[ -z "$VERSION" ]]; then
# VERSION="macao_12"
VERSION="full" VERSION="full"
fi fi
# First parameter overrides version if set # First parameter overrides version if set
version_regex='^(macao_12|macao_3|each|full)$'
if [[ "$1" == "12" || "$1" == "3" ]]; then if [[ "$1" == "12" || "$1" == "3" ]]; then
VERSION="macao_$1" VERSION="macao_$1"
shift shift
elif [[ "$1" = "each" || "$1" = "full" ]]; then elif [[ "$1" =~ $version_regex ]]; then
VERSION="$1" VERSION="$1"
shift shift
fi fi
export VERSION export VERSION
if [[ ! "$VERSION" =~ $version_regex ]]; then
echo "Invalid version '$VERSION'"
exit 1
fi
# Special version value "each" is processed by calling the script itself once # Special version value "each" is processed by calling the script itself once
# for each actual version # for each actual version
if [[ "$VERSION" = "each" && "$1" != "shell" && "$1" != "status" && "$1" != "setup" ]]; then if [[ "$VERSION" = "each" && "$1" != "shell" && "$1" != "status" && "$1" != "setup" ]]; then
...@@ -35,8 +42,14 @@ print_usage() { ...@@ -35,8 +42,14 @@ print_usage() {
cat <<EOF cat <<EOF
Usage: $(basename "$0") [version] <command> [args] Usage: $(basename "$0") [version] <command> [args]
COMMANDS VERSION
Specifies which part of the MACAO repository to use: "macao_12", "macao_3"
(or simply "12" and "3"), "each" for both in separate result dirs
and "full" for both merged in a single result dir.
If not specified, uses the value of the "VERSION" environment variable,
or "12" by default. Some commands do not support every version.
COMMANDS
status status
Print useful info about the current environment. Print useful info about the current environment.
shell [-p|--pyenv] shell [-p|--pyenv]
...@@ -50,7 +63,8 @@ index-extensions ...@@ -50,7 +63,8 @@ index-extensions
Index all files by extension Index all files by extension
count-all [-f|--force] count-all [-f|--force]
Count many types of Macao objects. Count many types of Macao objects.
If -f or --force is given, refresh indexes before counting (equivalent to count-streams and index-extensions) If -f or --force is given, refresh indexes before counting (equivalent
to count-streams and index-extensions)
setup setup
Initialize Python environment required by extractors Initialize Python environment required by extractors
setup-debug setup-debug
...@@ -131,6 +145,7 @@ index_extensions() { ...@@ -131,6 +145,7 @@ index_extensions() {
allfiles="$(find . -path '**/.idea' -prune -o -type f -print)" allfiles="$(find . -path '**/.idea' -prune -o -type f -print)"
cd - || exit cd - || exit
# List all extensions, then for each one, filter the index for files with this extension # List all extensions, then for each one, filter the index for files with this extension
# ( Perl expression courtesy of https://stackoverflow.com/a/1842270 )
perl -ne 'print $1 if m/\.([^.\/]+)$/**//' <<<"$allfiles" | sort -u | while read -r ext; do perl -ne 'print $1 if m/\.([^.\/]+)$/**//' <<<"$allfiles" | sort -u | while read -r ext; do
echo "[$ext]" >>"$out_file" echo "[$ext]" >>"$out_file"
grep -E ".*\.$ext\$" <<<"$allfiles" | sort >>"$out_file" grep -E ".*\.$ext\$" <<<"$allfiles" | sort >>"$out_file"
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -242,3 +242,19 @@ ...@@ -242,3 +242,19 @@
:ExerciceTAT :ExerciceTAT
) ; ) ;
]. ].
### Addition from Eliott
### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/Reponse
:Champ rdf:type owl:Class ;
rdfs:subClassOf :MacaoContenu .
### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/Reponse
:Reponse rdf:type owl:Class ;
rdfs:subClassOf :MacaoContenu .
### http://www.semanticweb.org/eliott/ontologies/2024/4/macao/aReponseIncorrecte
:aSegment rdf:type owl:ObjectProperty ;
rdfs:domain :ExerciceTAT ;
rdfs:range :Segment .
#!/bin/bash
# Check if the correct number of arguments is provided
if [ "$#" -ne 2 ]; then
echo "Usage: $0 <in_folder> <out_folder>"
exit 1
fi
IN_FOLDER="$1"
OUT_FOLDER="$2"
# Check if the input and output folders exist
if [ ! -d "$IN_FOLDER" ]; then
echo "Input folder does not exist: $IN_FOLDER"
exit 1
fi
if [ ! -d "$OUT_FOLDER" ]; then
echo "Output folder does not exist: $OUT_FOLDER"
exit 1
fi
# Iterate over each file in the input folder
for IN_FILE in "$IN_FOLDER"/*; do
# Extract the base name of the file (without extension)
BASE_NAME=$(basename "$IN_FILE")
BASE_NAME="${BASE_NAME%.*}"
# Search for a file with the same base name in the output folder and its subfolders
OUT_FILE=$(find "$OUT_FOLDER" -type f -name "${BASE_NAME}.*")
if [ -n "$OUT_FILE" ]; then
# Extract the extension of the output file
OUT_EXT="${OUT_FILE##*.}"
# Convert the input file to the same format as the output file
magick "$IN_FILE" "${OUT_FILE%.*}.$OUT_EXT"
echo "Replaced $OUT_FILE with converted $IN_FILE"
else
echo "No matching file found in output folder for $BASE_NAME"
fi
done
import filecmp
from lxml import etree from lxml import etree
from rdflib import RDFS, Graph, Literal, URIRef from rdflib import RDFS, Graph, Literal, URIRef
from rdflib.namespace import OWL, RDF from rdflib.namespace import OWL, RDF
...@@ -146,14 +144,6 @@ def is_subsection(id: str): ...@@ -146,14 +144,6 @@ def is_subsection(id: str):
) )
def compare_files(f1: str, f2: str):
log.info(
"Files {} and {} {}.".format(
f1, f2, "are identical" if filecmp.cmp(f1, f2) else "differ"
)
)
def main(): def main():
g = create_graph() g = create_graph()
......
This diff is collapsed.
...@@ -117,7 +117,32 @@ class TestObjectCount(unittest.TestCase): ...@@ -117,7 +117,32 @@ class TestObjectCount(unittest.TestCase):
?subj :correct ?correct . ?subj :correct ?correct .
?subj :html ?html . ?subj :html ?html .
}""", }""",
(180, 106, 180 + 106), # The minus values are to account for missed gaps in TAT activities
# (see warnings when running the extraction), which are caused
# by a known but tricky bug
(258 - 30, 161, 258 - 30 + 161),
)
# Segments TAT
self.assertCount(
"""SELECT * WHERE {
?subj a :Segment .
?subj :index ?index .
?subj :text ?text .
MINUS { ?subj a :Champ }
}""",
(28, 42, 28 + 42),
)
# Champs TAT
self.assertCount(
"""SELECT * WHERE {
?subj a :Champ ;
a :Segment .
?subj :index ?index .
?subj :selection ?selection .
}""",
(16, 18, 16 + 18),
) )
def assertCount( def assertCount(
......
This diff is collapsed.
...@@ -15,11 +15,9 @@ template mt:activite(?act) { ...@@ -15,11 +15,9 @@ template mt:activite(?act) {
?desc st:nl() ?desc st:nl()
'<div class="commentaireInfoGroup">' st:nl() '<div class="commentaireInfoGroup" hidden="True">' st:nl()
group { group {
'<div class="commentaireInfo">' st:nl()
?info_comment ?info_comment
'</div>' st:nl()
} }
'</div>' st:nl() '</div>' st:nl()
......
This diff is collapsed.
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment