Skip to content
Snippets Groups Projects
Commit dd96ff8a authored by Eliott Sammier's avatar Eliott Sammier
Browse files

Add counting to CLI tool #4

parent d2af6682
No related branches found
No related tags found
No related merge requests found
...@@ -17,6 +17,9 @@ count-streams [<file>] ...@@ -17,6 +17,9 @@ count-streams [<file>]
Count audio streams in <file>, or from all SWF files if none is given Count audio streams in <file>, or from all SWF files if none is given
index-extensions index-extensions
Index all files by extension Index all files by extension
count-all [-f|--force]
Count many types of Macao objects.
If -f or --force is given, refresh indexes before counting (equivalent to count-streams and index-extensions)
setup setup
Initialize Python environment required by extractors Initialize Python environment required by extractors
extract-rdf extract-rdf
...@@ -50,30 +53,22 @@ list_streams() { ...@@ -50,30 +53,22 @@ list_streams() {
ffprobe -i "$1" 2>&1 | grep -E 'Stream.*Audio: mp3' ffprobe -i "$1" 2>&1 | grep -E 'Stream.*Audio: mp3'
} }
action="$1" count_streams() {
shift
case "$action" in
list-streams)
[[ -z "$1" ]] && echo "Usage: list-streams <file>" && exit 1
file="$(check_file "$1" "$SOURCES_DIR/contenu/media")"
list_streams "$file"
;;
count-streams)
if [[ -n "$1" ]]; then if [[ -n "$1" ]]; then
file="$(check_file "$1" "$SOURCES_DIR/contenu/media")" file="$(check_file "$1" "$SOURCES_DIR/contenu/media")"
echo "Indexing streams from $file ..." echo "Indexing streams from $file ..." >&2
echo "$(list_streams "$file" | wc -l) $(basename "$file")" echo "$(list_streams "$file" | wc -l) $(basename "$file")"
else else
echo "Indexing streams from all SWFs (this may take some time) ..." echo "Indexing streams from all SWFs (this may take some time) ..." >&2
out_file="$RESULTS_DIR/indexes/swf_streams_count.txt" out_file="$RESULTS_DIR/indexes/swf_streams_count.txt"
for file in "$SOURCES_DIR/contenu/media/"*.swf; do for file in "$SOURCES_DIR/contenu/media/"*.swf; do
echo "$(list_streams "$file" | wc -l) $(basename "$file")" echo "$(list_streams "$file" | wc -l) $(basename "$file")"
done | sort -rn >"$out_file" done | sort -rn >"$out_file"
cat "$out_file" cat "$out_file"
fi fi
;; }
index-extensions)
index_extensions() {
out_file="$RESULTS_DIR/indexes/index_per_extension.txt" out_file="$RESULTS_DIR/indexes/index_per_extension.txt"
echo -n "" >"$out_file" # Clear out file echo -n "" >"$out_file" # Clear out file
# Index all files, with a cd trick to get relative paths # Index all files, with a cd trick to get relative paths
...@@ -88,6 +83,102 @@ index-extensions) ...@@ -88,6 +83,102 @@ index-extensions)
done done
cat "$out_file" cat "$out_file"
echo "Indexed all files by extension to $out_file" >&2 echo "Indexed all files by extension to $out_file" >&2
}
count_all() {
index="$RESULTS_DIR/indexes/index_per_extension.txt"
swf_index="$RESULTS_DIR/indexes/swf_streams_count.txt"
content_dir="$SOURCES_DIR/contenu"
out_file="$RESULTS_DIR/indexes/count-all.txt"
if [[ ! -f "$index" ]]; then
echo "$index not found, use --force or run index-extensions before"
exit 1
fi
if [[ ! -f "$swf_index" ]]; then
echo "$swf_index not found, use --force or run count-streams before"
exit 1
fi
nb_mod="$(grep -c '<item identifier="MosMod' "$SOURCES_DIR/imsmanifest.xml")"
nb_subs="$(grep -Ec 'MosEtp[0-9]+.html' "$index")"
nb_pages_all="$(grep -Ec '/contenu/.*\.htm' "$index")"
nb_pages="$(grep -Ec '/contenu/pages/pg[0-9]+\.html$' "$index")"
nb_pages_special="$((nb_pages_all - nb_pages))"
nb_courses="$(grep -rI 'new Cours(' "$content_dir" | wc -l)"
nb_exo="$(grep -rIE "new Exercice[[:alpha:]]*\(" "$content_dir" | wc -l)"
nb_qm="$(grep -rI 'new ExerciceQM(' "$content_dir" | wc -l)"
nb_qcu="$(grep -rIE "new ExerciceQC\(['|\"]QCU['|\"]" "$content_dir" | wc -l)"
nb_qcm="$(grep -rIE "new ExerciceQC\(['|\"]QCM['|\"]" "$content_dir" | wc -l)"
nb_tat="$(grep -rI 'new ExerciceTAT(' "$content_dir" | wc -l)"
nb_gd="$(grep -rI 'new ExerciceGD(' "$content_dir" | wc -l)"
nb_exo_total="$((nb_qm + nb_qcu + nb_qcm + nb_tat + nb_gd))"
nb_exo_other="$((nb_exo - nb_exo_total))"
nb_flash="$(grep -Ec '/contenu/media/.*\.swf$' "$index")"
nb_flash_0="$(grep -Ec '^0 ' "$swf_index")"
nb_flash_1="$(grep -Ec '^1 ' "$swf_index")"
nb_flash_mult="$((nb_flash - nb_flash_0 - nb_flash_1))"
nb_png="$(grep -Ec '/contenu/media/.*\.png$' "$index")"
nb_jpg="$(grep -Ec '/contenu/media/.*\.jpg$' "$index")"
nb_gif="$(grep -Ec '/contenu/media/.*\.gif$' "$index")"
nb_img="$((nb_png + nb_jpg + nb_gif))"
nb_media="$(find "$content_dir/media/" -maxdepth 1 -type f | wc -l)"
nb_media_total="$((nb_flash + nb_img))"
nb_media_other="$((nb_media - nb_media_total))"
cat >"$out_file" <<EOF
modules: $nb_mod
sous-parties: $nb_subs
pages: $nb_pages_all
normales: $nb_pages
spéciales: $nb_pages_special
activités: $nb_act
cours: $nb_courses
exercices: $nb_exo_total
QCU: $nb_qcu
QCM: $nb_qcm
QM: $nb_qm
TAT: $nb_tat
GD: $nb_gd
other: $nb_exo_other
media: $nb_media_total
images: $nb_img
png: $nb_png
jpg: $nb_jpg
gif: $nb_gif
flash: $nb_flash
no audio: $nb_flash_0
1 audio: $nb_flash_1
2+ audio: $nb_flash_mult
other: $nb_media_other
EOF
cat "$out_file"
}
action="$1"
shift
case "$action" in
list-streams)
[[ -z "$1" ]] && echo "Usage: list-streams <file>" && exit 1
file="$(check_file "$1" "$SOURCES_DIR/contenu/media")"
list_streams "$file"
;;
count-streams)
count_streams "$@"
;;
index-extensions)
index_extensions
;;
count-all)
if [[ "$1" = "-f" || "$1" = "--force" ]]; then
index_extensions >/dev/null
count_streams >/dev/null
fi
count_all
;; ;;
setup) setup)
"$SCRIPTS_DIR/setup.sh" "$SCRIPTS_DIR/setup.sh"
......
modules: 8
sous-parties: 18
pages: 151
normales: 132
spéciales: 19
activités:
cours: 59
exercices: 73
QCU: 39
QCM: 9
QM: 8
TAT: 12
GD: 5
other: 0
media: 436
images: 190
png: 153
jpg: 6
gif: 31
flash: 246
no audio: 7
1 audio: 211
2+ audio: 28
other: 1
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment