Select Git revision
CompanionArea.js
mcli 10.82 KiB
#!/usr/bin/env bash
# Default value for version env var
if [[ -z "$VERSION" ]]; then
# VERSION="macao_12"
VERSION="full"
fi
# First parameter overrides version if set
version_regex='^(macao_12|macao_3|each|full)$'
if [[ "$1" == "12" || "$1" == "3" ]]; then
VERSION="macao_$1"
shift
elif [[ "$1" =~ $version_regex ]]; then
VERSION="$1"
shift
fi
export VERSION
if [[ ! "$VERSION" =~ $version_regex ]]; then
echo "Invalid version '$VERSION'"
exit 1
fi
# Special version value "each" is processed by calling the script itself once
# for each actual version
if [[ "$VERSION" = "each" && "$1" != "shell" && "$1" != "status" && "$1" != "setup" ]]; then
echo "macao_12:" && "$0" 12 "$@"
echo "macao_3:" && "$0" 3 "$@"
exit
fi
export MACAO_ROOT="${MACAO_ROOT:-$(dirname "$(realpath "$0")")}"
export SCRIPTS_DIR="$MACAO_ROOT/tetras_extraction/script"
SOURCES_DIR="$MACAO_ROOT/Basilisk/MACAO"
[[ "$VERSION" != "each" && "$VERSION" != "full" ]] && SOURCES_DIR="$SOURCES_DIR/$VERSION"
export SOURCES_DIR
RESULTS_DIR="$MACAO_ROOT/tetras_extraction/result"
[[ "$VERSION" != "each" ]] && RESULTS_DIR="$RESULTS_DIR/$VERSION"
export RESULTS_DIR
print_usage() {
cat <<EOF
Usage: $(basename "$0") [version] <command> [args]
VERSION
Specifies which part of the MACAO repository to use: "macao_12", "macao_3"
(or simply "12" and "3"), "each" for both in separate result dirs
and "full" for both merged in a single result dir.
If not specified, uses the value of the "VERSION" environment variable,
or "12" by default. Some commands do not support every version.
COMMANDS
status
Print useful info about the current environment.
shell [-p|--pyenv]
Open a shell with mcli's environment variables set, including PATH.
If -p or --pyenv is specified, also enter the Python virtual env.
list-streams <file>
List audio streams in <file>
count-streams [<file>]
Count audio streams in <file>, or from all SWF files if none is given
index-extensions
Index all files by extension
count-all [-f|--force]
Count many types of Macao objects.
If -f or --force is given, refresh indexes before counting (equivalent
to count-streams and index-extensions)
setup
Initialize Python environment required by extractors
setup-debug
(Re)create .env file used by the Python debugger launch config
extract
Run the extract stage, to generate RDF from text sources
transform
Run the transform stage, to complete and clean-up the RDF data
export
Run the export stage, to generate Macao-Hugo content pages
convert
Run the full conversion process (extract -> transform -> export)
test
Run simple tests on the extracted RDF data
extract-mp3 [-y|--yes-overwrite]
Extract audio streams from all Flash SWF files
help
Print this help and exit
EOF
}
# Check if $1 is a regular file, otherwise if it's a filename (and not a path)
# look for this filename in fallback directory $2
check_file() {
file="$1"
fallback="$2"
[[ -f "$file" ]] && echo "$file" && return
if [[ -d "$fallback" && "$(basename "$file")" = "$file" ]]; then
file="$fallback/$file"
[[ -f "$file" ]] && echo "$file" && return
fi
echo "No such file '$1'" >&2
return 1
}
activate_venv() {
if ! source "$SCRIPTS_DIR/venv/bin/activate"; then
echo "Python venv not found, did you run setup first?" >&2
exit 1
fi
}
# List MP3 streams in a file
list_streams() {
ffprobe -i "$1" 2>&1 | grep -E 'Stream.*Audio: mp3'
}
count_streams() {
if [[ -n "$1" ]]; then
file="$(check_file "$1" "$SOURCES_DIR/contenu/media")"
echo "Indexing streams from $file ..." >&2
echo "$(list_streams "$file" | wc -l) $(basename "$file")"
else
mkdir -p "$RESULTS_DIR/indexes"
echo "Indexing streams from all SWFs (this may take some time) ..." >&2
out_file="$RESULTS_DIR/indexes/swf_streams_count.txt"
if [[ "$VERSION" = "full" ]]; then
for version in macao_12 macao_3; do
for file in "$SOURCES_DIR/$version/contenu/media/"*.swf; do
echo "$(list_streams "$file" | wc -l) $version/$(basename "$file")"
done
done | sort -rn >"$out_file"
else
for file in "$SOURCES_DIR/contenu/media/"*.swf; do
echo "$(list_streams "$file" | wc -l) $(basename "$file")"
done | sort -rn >"$out_file"
fi
cat "$out_file"
fi
}
index_extensions() {
mkdir -p "$RESULTS_DIR/indexes"
out_file="$RESULTS_DIR/indexes/index_per_extension.txt"
echo -n "" >"$out_file" # Clear out file
# Index all files, with a cd trick to get relative paths
cd "$SOURCES_DIR" || exit
allfiles="$(find . -path '**/.idea' -prune -o -type f -print)"
cd - || exit
# List all extensions, then for each one, filter the index for files with this extension
# ( Perl expression courtesy of https://stackoverflow.com/a/1842270 )
perl -ne 'print $1 if m/\.([^.\/]+)$/**//' <<<"$allfiles" | sort -u | while read -r ext; do
echo "[$ext]" >>"$out_file"
grep -E ".*\.$ext\$" <<<"$allfiles" | sort >>"$out_file"
echo "" >>"$out_file"
done
cat "$out_file"
echo "Indexed all files by extension to $out_file" >&2
}
count_all() {
indexes_dir="$RESULTS_DIR/indexes"
mkdir -p "$indexes_dir"
index="$indexes_dir/index_per_extension.txt"
swf_index="$indexes_dir/swf_streams_count.txt"
content_dir="$SOURCES_DIR/contenu"
out_file="$indexes_dir/count-all.txt"
if [[ ! -f "$index" ]]; then
echo "$index not found, use --force or run index-extensions before"
exit 1
fi
if [[ ! -f "$swf_index" ]]; then
echo "$swf_index not found, use --force or run count-streams before"
exit 1
fi
if [[ "$VERSION" = "macao_3" ]]; then
nb_mod="$(grep -c '<item identifier="seq' "$SOURCES_DIR/imsmanifest.xml")"
nb_subs="$(grep -Ec 'act[0-9]+.html' "$index")"
else
nb_mod="$(grep -c '<item identifier="MosMod' "$SOURCES_DIR/imsmanifest.xml")"
nb_subs="$(grep -Ec 'MosEtp[0-9]+.html' "$index")"
fi
nb_pages_all="$(grep -Ec '/contenu/.*\.htm' "$index")"
nb_pages="$(grep -Ec '/contenu/pages/pg[0-9]+\.html$' "$index")"
nb_pages_special="$((nb_pages_all - nb_pages))"
nb_courses="$(grep -rI 'new Cours(' "$content_dir" | wc -l)"
nb_exo="$(grep -rIE "new Exercice[[:alpha:]]*\(" "$content_dir" | wc -l)"
nb_qm="$(grep -rI 'new ExerciceQM(' "$content_dir" | wc -l)"
nb_qcu="$(grep -rIE "new ExerciceQC\(['|\"]QCU['|\"]" "$content_dir" | wc -l)"
nb_qcm="$(grep -rIE "new ExerciceQC\(['|\"]QCM['|\"]" "$content_dir" | wc -l)"
nb_tat="$(grep -rI 'new ExerciceTAT(' "$content_dir" | wc -l)"
nb_gd="$(grep -rI 'new ExerciceGD(' "$content_dir" | wc -l)"
nb_exo_total="$((nb_qm + nb_qcu + nb_qcm + nb_tat + nb_gd))"
nb_exo_other="$((nb_exo - nb_exo_total))"
nb_act="$((nb_courses + nb_exo_total))"
nb_flash="$(grep -Ec '/contenu/media/.*\.swf$' "$index")"
nb_flash_0="$(grep -Ec '^0 ' "$swf_index")"
nb_flash_1="$(grep -Ec '^1 ' "$swf_index")"
nb_flash_mult="$((nb_flash - nb_flash_0 - nb_flash_1))"
nb_png="$(grep -Ec '/contenu/media/.*\.png$' "$index")"
nb_jpg="$(grep -Ec '/contenu/media/.*\.jpg$' "$index")"
nb_gif="$(grep -Ec '/contenu/media/.*\.gif$' "$index")"
nb_img="$((nb_png + nb_jpg + nb_gif))"
nb_media="$(find "$content_dir/media/" -maxdepth 1 -type f | wc -l)"
nb_media_total="$((nb_flash + nb_img))"
nb_media_other="$((nb_media - nb_media_total))"
cat >"$out_file" <<EOF
modules: $nb_mod
sous-parties: $nb_subs
pages: $nb_pages_all
normales: $nb_pages
spéciales: $nb_pages_special
activités: $nb_act
cours: $nb_courses
exercices: $nb_exo_total
QCU: $nb_qcu
QCM: $nb_qcm
QM: $nb_qm
TAT: $nb_tat
GD: $nb_gd
other: $nb_exo_other
media: $nb_media_total
images: $nb_img
png: $nb_png
jpg: $nb_jpg
gif: $nb_gif
flash: $nb_flash
no audio: $nb_flash_0
1 audio: $nb_flash_1
2+ audio: $nb_flash_mult
other: $nb_media_other
EOF
cat "$out_file"
}
action="$1"
shift
case "$action" in
list-streams)
[[ -z "$1" ]] && echo "Usage: list-streams <file>" && exit 1
file="$(check_file "$1" "$SOURCES_DIR/contenu/media")"
list_streams "$file"
;;
count-streams)
count_streams "$@"
;;
index-extensions)
index_extensions
;;
count-all)
if [[ "$1" = "-f" || "$1" = "--force" ]]; then
index_extensions >/dev/null
count_streams >/dev/null
fi
count_all
;;
setup)
"$SCRIPTS_DIR/setup.sh"
;;
setup-debug)
envfile="$SCRIPTS_DIR/.env"
echo "VERSION='$VERSION'" > "$envfile"
echo "MACAO_ROOT='$MACAO_ROOT'" >> "$envfile"
echo "SOURCES_DIR='$SOURCES_DIR'" >> "$envfile"
echo "SCRIPTS_DIR='$SCRIPTS_DIR'" >> "$envfile"
echo "RESULTS_DIR='$RESULTS_DIR'" >> "$envfile"
;;
convert)
#activate_venv
python "$SCRIPTS_DIR/src/main.py"
;;
extract)
#activate_venv
python "$SCRIPTS_DIR/src/extract.py"
;;
transform)
#activate_venv
python "$SCRIPTS_DIR/src/transform.py"
;;
export)
#activate_venv
python "$SCRIPTS_DIR/src/export.py"
;;
test)
#activate_venv
python "$SCRIPTS_DIR/src/test.py"
;;
extract-mp3)
for version in macao_12 macao_3; do
for audio_file in "$SOURCES_DIR/$version/contenu/media/"*.swf; do
"$SCRIPTS_DIR/extract_mp3.sh" "$@" --output-dir "$RESULTS_DIR/audio" "$audio_file"
done
done
;;
copy-images)
mkdir "$RESULTS_DIR/img/"
for version in macao_12 macao_3; do
for type in gif jpg png; do
cp "$SOURCES_DIR/$version/contenu/media/"*".$type" "$RESULTS_DIR/img/"
done
done
;;
shell)
if [[ "$VERSION" = "each" ]]; then
echo "Subcommand 'shell' not supported for version '$VERSION'" >&2
exit 1
fi
if [[ "$1" = "-p" || "$1" = "--pyenv" ]]; then
activate_venv
fi
export PATH="$PATH:$MACAO_ROOT"
cd "$MACAO_ROOT"
$SHELL
;;
status)
echo "VERSION=$VERSION"
echo "MACAO_ROOT=$MACAO_ROOT"
echo "SOURCES_DIR=$SOURCES_DIR"
echo "SCRIPTS_DIR=$SCRIPTS_DIR"
echo "RESULTS_DIR=$RESULTS_DIR"
echo ""
echo "Python virtual env: ${VIRTUAL_ENV:-"not set"}"
echo -n "mcli: "
if which mcli >/dev/null; then echo "available"; else echo "not in PATH"; fi
echo ""
cd "$MACAO_ROOT" && git status
;;
help)
print_usage
;;
*)
echo "Unknown command '$action'"
print_usage
exit 1
;;
esac