Select Git revision
GalleryViewThumbnail.js
mcli 9.50 KiB
#!/usr/bin/env bash
# Default value for version env var
if [[ -z "$VERSION" ]]; then
VERSION="each"
fi
# First parameter overrides version if set
if [[ "$1" == "12" || "$1" == "3" ]]; then
VERSION="macao_$1"
shift
elif [[ "$1" = "each" || "$1" = "full" ]]; then
VERSION="$1"
shift
fi
export VERSION
# Special version value "each" is processed by calling the script itself once
# for each actual version
if [[ "$VERSION" = "each" && "$1" != "shell" && "$1" != "status" && "$1" != "setup" ]]; then
echo "macao_12:" && "$0" 12 "$@"
echo "macao_3:" && "$0" 3 "$@"
exit
fi
export MACAO_ROOT="${MACAO_ROOT:-$(dirname "$(realpath "$0")")}"
export SCRIPTS_DIR="$MACAO_ROOT/tetras_extraction/script"
SOURCES_DIR="$MACAO_ROOT/Basilisk/MACAO"
[[ "$VERSION" != "each" && "$VERSION" != "full" ]] && SOURCES_DIR="$SOURCES_DIR/$VERSION"
export SOURCES_DIR
RESULTS_DIR="$MACAO_ROOT/tetras_extraction/result"
[[ "$VERSION" != "each" ]] && RESULTS_DIR="$RESULTS_DIR/$VERSION"
export RESULTS_DIR
print_usage() {
cat <<EOF
Usage: $(basename "$0") [version] <command> [args]
COMMANDS
status
Print useful info about the current environment.
shell [-p|--pyenv]
Open a shell with mcli's environment variables set, including PATH.
If -p or --pyenv is specified, also enter the Python virtual env.
list-streams <file>
List audio streams in <file>
count-streams [<file>]
Count audio streams in <file>, or from all SWF files if none is given
index-extensions
Index all files by extension
count-all [-f|--force]
Count many types of Macao objects.
If -f or --force is given, refresh indexes before counting (equivalent to count-streams and index-extensions)
setup
Initialize Python environment required by extractors
setup-debug
(Re)create .env file used by the Python debugger launch config
extract-rdf
Run the extractor to generate RDF from text sources
extract-mp3 [-y|--yes-overwrite]
Extract audio streams from all Flash SWF files
convert
Run the full conversion process (extract -> transform -> export)
help
Print this help and exit
EOF
}
# Check if $1 is a regular file, otherwise if it's a filename (and not a path)
# look for this filename in fallback directory $2
check_file() {
file="$1"
fallback="$2"
[[ -f "$file" ]] && echo "$file" && return
if [[ -d "$fallback" && "$(basename "$file")" = "$file" ]]; then
file="$fallback/$file"
[[ -f "$file" ]] && echo "$file" && return
fi
echo "No such file '$1'" >&2
return 1
}
activate_venv() {
if ! source "$SCRIPTS_DIR/venv/bin/activate"; then
echo "Python venv not found, did you run setup first?" >&2
exit 1
fi
}
# List MP3 streams in a file
list_streams() {
ffprobe -i "$1" 2>&1 | grep -E 'Stream.*Audio: mp3'
}
count_streams() {
if [[ -n "$1" ]]; then
file="$(check_file "$1" "$SOURCES_DIR/contenu/media")"
echo "Indexing streams from $file ..." >&2
echo "$(list_streams "$file" | wc -l) $(basename "$file")"
else
mkdir -p "$RESULTS_DIR/indexes"
echo "Indexing streams from all SWFs (this may take some time) ..." >&2
out_file="$RESULTS_DIR/indexes/swf_streams_count.txt"
if [[ "$VERSION" = "full" ]]; then
for version in macao_12 macao_3; do
for file in "$SOURCES_DIR/$version/contenu/media/"*.swf; do
echo "$(list_streams "$file" | wc -l) $version/$(basename "$file")"
done
done | sort -rn >"$out_file"
else
for file in "$SOURCES_DIR/contenu/media/"*.swf; do
echo "$(list_streams "$file" | wc -l) $(basename "$file")"
done | sort -rn >"$out_file"
fi
cat "$out_file"
fi
}
index_extensions() {
mkdir -p "$RESULTS_DIR/indexes"
out_file="$RESULTS_DIR/indexes/index_per_extension.txt"
echo -n "" >"$out_file" # Clear out file
# Index all files, with a cd trick to get relative paths
cd "$SOURCES_DIR" || exit
allfiles="$(find . -path '**/.idea' -prune -o -type f -print)"
cd - || exit
# List all extensions, then for each one, filter the index for files with this extension
perl -ne 'print $1 if m/\.([^.\/]+)$/**//' <<<"$allfiles" | sort -u | while read -r ext; do
echo "[$ext]" >>"$out_file"
grep -E ".*\.$ext\$" <<<"$allfiles" | sort >>"$out_file"
echo "" >>"$out_file"
done
cat "$out_file"
echo "Indexed all files by extension to $out_file" >&2
}
count_all() {
indexes_dir="$RESULTS_DIR/indexes"
mkdir -p "$indexes_dir"
index="$indexes_dir/index_per_extension.txt"
swf_index="$indexes_dir/swf_streams_count.txt"
content_dir="$SOURCES_DIR/contenu"
out_file="$indexes_dir/count-all.txt"
if [[ ! -f "$index" ]]; then
echo "$index not found, use --force or run index-extensions before"
exit 1
fi
if [[ ! -f "$swf_index" ]]; then
echo "$swf_index not found, use --force or run count-streams before"
exit 1
fi
if [[ "$VERSION" = "macao_3" ]]; then
nb_mod="$(grep -c '<item identifier="seq' "$SOURCES_DIR/imsmanifest.xml")"
nb_subs="$(grep -Ec 'act[0-9]+.html' "$index")"
else
nb_mod="$(grep -c '<item identifier="MosMod' "$SOURCES_DIR/imsmanifest.xml")"
nb_subs="$(grep -Ec 'MosEtp[0-9]+.html' "$index")"
fi
nb_pages_all="$(grep -Ec '/contenu/.*\.htm' "$index")"
nb_pages="$(grep -Ec '/contenu/pages/pg[0-9]+\.html$' "$index")"
nb_pages_special="$((nb_pages_all - nb_pages))"
nb_courses="$(grep -rI 'new Cours(' "$content_dir" | wc -l)"
nb_exo="$(grep -rIE "new Exercice[[:alpha:]]*\(" "$content_dir" | wc -l)"
nb_qm="$(grep -rI 'new ExerciceQM(' "$content_dir" | wc -l)"
nb_qcu="$(grep -rIE "new ExerciceQC\(['|\"]QCU['|\"]" "$content_dir" | wc -l)"
nb_qcm="$(grep -rIE "new ExerciceQC\(['|\"]QCM['|\"]" "$content_dir" | wc -l)"
nb_tat="$(grep -rI 'new ExerciceTAT(' "$content_dir" | wc -l)"
nb_gd="$(grep -rI 'new ExerciceGD(' "$content_dir" | wc -l)"
nb_exo_total="$((nb_qm + nb_qcu + nb_qcm + nb_tat + nb_gd))"
nb_exo_other="$((nb_exo - nb_exo_total))"
nb_act="$((nb_courses + nb_exo_total))"
nb_flash="$(grep -Ec '/contenu/media/.*\.swf$' "$index")"
nb_flash_0="$(grep -Ec '^0 ' "$swf_index")"
nb_flash_1="$(grep -Ec '^1 ' "$swf_index")"
nb_flash_mult="$((nb_flash - nb_flash_0 - nb_flash_1))"
nb_png="$(grep -Ec '/contenu/media/.*\.png$' "$index")"
nb_jpg="$(grep -Ec '/contenu/media/.*\.jpg$' "$index")"
nb_gif="$(grep -Ec '/contenu/media/.*\.gif$' "$index")"
nb_img="$((nb_png + nb_jpg + nb_gif))"
nb_media="$(find "$content_dir/media/" -maxdepth 1 -type f | wc -l)"
nb_media_total="$((nb_flash + nb_img))"
nb_media_other="$((nb_media - nb_media_total))"
cat >"$out_file" <<EOF
modules: $nb_mod
sous-parties: $nb_subs
pages: $nb_pages_all
normales: $nb_pages
spéciales: $nb_pages_special
activités: $nb_act
cours: $nb_courses
exercices: $nb_exo_total
QCU: $nb_qcu
QCM: $nb_qcm
QM: $nb_qm
TAT: $nb_tat
GD: $nb_gd
other: $nb_exo_other
media: $nb_media_total
images: $nb_img
png: $nb_png
jpg: $nb_jpg
gif: $nb_gif
flash: $nb_flash
no audio: $nb_flash_0
1 audio: $nb_flash_1
2+ audio: $nb_flash_mult
other: $nb_media_other
EOF
cat "$out_file"
}
action="$1"
shift
case "$action" in
list-streams)
[[ -z "$1" ]] && echo "Usage: list-streams <file>" && exit 1
file="$(check_file "$1" "$SOURCES_DIR/contenu/media")"
list_streams "$file"
;;
count-streams)
count_streams "$@"
;;
index-extensions)
index_extensions
;;
count-all)
if [[ "$1" = "-f" || "$1" = "--force" ]]; then
index_extensions >/dev/null
count_streams >/dev/null
fi
count_all
;;
setup)
"$SCRIPTS_DIR/setup.sh"
;;
setup-debug)
envfile="$SCRIPTS_DIR/.env"
echo "VERSION='$VERSION'" > "$envfile"
echo "MACAO_ROOT='$MACAO_ROOT'" >> "$envfile"
echo "SOURCES_DIR='$SOURCES_DIR'" >> "$envfile"
echo "SCRIPTS_DIR='$SCRIPTS_DIR'" >> "$envfile"
echo "RESULTS_DIR='$RESULTS_DIR'" >> "$envfile"
;;
convert)
activate_venv
python "$SCRIPTS_DIR/src/main.py"
;;
extract)
activate_venv
python "$SCRIPTS_DIR/src/extract.py"
;;
transform)
activate_venv
python "$SCRIPTS_DIR/src/transform.py"
;;
export)
activate_venv
python "$SCRIPTS_DIR/src/export.py"
;;
extract-mp3)
for audio_file in "$SOURCES_DIR/contenu/media/"*.swf; do
"$SCRIPTS_DIR/extract_mp3.sh" "$@" --output-dir "$RESULTS_DIR/audio" "$audio_file"
done
;;
shell)
if [[ "$VERSION" = "each" ]]; then
echo "Subcommand 'shell' not supported for version '$VERSION'" >&2
exit 1
fi
if [[ "$1" = "-p" || "$1" = "--pyenv" ]]; then
activate_venv
fi
export PATH="$PATH:$MACAO_ROOT"
cd "$MACAO_ROOT"
$SHELL
;;
status)
echo "VERSION=$VERSION"
echo "MACAO_ROOT=$MACAO_ROOT"
echo "SOURCES_DIR=$SOURCES_DIR"
echo "SCRIPTS_DIR=$SCRIPTS_DIR"
echo "RESULTS_DIR=$RESULTS_DIR"
echo ""
echo "Python virtual env: ${VIRTUAL_ENV:-"not set"}"
echo -n "mcli: "
if which mcli >/dev/null; then echo "available"; else echo "not in PATH"; fi
echo ""
cd "$MACAO_ROOT" && git status
;;
*)
echo "Unknown command '$action'"
print_usage
;;
esac