diff --git a/mcli b/mcli index db9de2a1fbcc1f6d69a722b8c73807e4f52c72d0..f6a2565aa4291b2ca7094bda6a563e7e735b3420 100755 --- a/mcli +++ b/mcli @@ -17,6 +17,9 @@ count-streams [<file>] Count audio streams in <file>, or from all SWF files if none is given index-extensions Index all files by extension +count-all [-f|--force] + Count many types of Macao objects. + If -f or --force is given, refresh indexes before counting (equivalent to count-streams and index-extensions) setup Initialize Python environment required by extractors extract-rdf @@ -50,30 +53,22 @@ list_streams() { ffprobe -i "$1" 2>&1 | grep -E 'Stream.*Audio: mp3' } -action="$1" -shift - -case "$action" in -list-streams) - [[ -z "$1" ]] && echo "Usage: list-streams <file>" && exit 1 - file="$(check_file "$1" "$SOURCES_DIR/contenu/media")" - list_streams "$file" - ;; -count-streams) +count_streams() { if [[ -n "$1" ]]; then file="$(check_file "$1" "$SOURCES_DIR/contenu/media")" - echo "Indexing streams from $file ..." + echo "Indexing streams from $file ..." >&2 echo "$(list_streams "$file" | wc -l) $(basename "$file")" else - echo "Indexing streams from all SWFs (this may take some time) ..." + echo "Indexing streams from all SWFs (this may take some time) ..." >&2 out_file="$RESULTS_DIR/indexes/swf_streams_count.txt" for file in "$SOURCES_DIR/contenu/media/"*.swf; do echo "$(list_streams "$file" | wc -l) $(basename "$file")" done | sort -rn >"$out_file" cat "$out_file" fi - ;; -index-extensions) +} + +index_extensions() { out_file="$RESULTS_DIR/indexes/index_per_extension.txt" echo -n "" >"$out_file" # Clear out file # Index all files, with a cd trick to get relative paths @@ -88,6 +83,102 @@ index-extensions) done cat "$out_file" echo "Indexed all files by extension to $out_file" >&2 +} + +count_all() { + index="$RESULTS_DIR/indexes/index_per_extension.txt" + swf_index="$RESULTS_DIR/indexes/swf_streams_count.txt" + content_dir="$SOURCES_DIR/contenu" + out_file="$RESULTS_DIR/indexes/count-all.txt" + + if [[ ! -f "$index" ]]; then + echo "$index not found, use --force or run index-extensions before" + exit 1 + fi + if [[ ! -f "$swf_index" ]]; then + echo "$swf_index not found, use --force or run count-streams before" + exit 1 + fi + + nb_mod="$(grep -c '<item identifier="MosMod' "$SOURCES_DIR/imsmanifest.xml")" + nb_subs="$(grep -Ec 'MosEtp[0-9]+.html' "$index")" + nb_pages_all="$(grep -Ec '/contenu/.*\.htm' "$index")" + nb_pages="$(grep -Ec '/contenu/pages/pg[0-9]+\.html$' "$index")" + nb_pages_special="$((nb_pages_all - nb_pages))" + + nb_courses="$(grep -rI 'new Cours(' "$content_dir" | wc -l)" + nb_exo="$(grep -rIE "new Exercice[[:alpha:]]*\(" "$content_dir" | wc -l)" + nb_qm="$(grep -rI 'new ExerciceQM(' "$content_dir" | wc -l)" + nb_qcu="$(grep -rIE "new ExerciceQC\(['|\"]QCU['|\"]" "$content_dir" | wc -l)" + nb_qcm="$(grep -rIE "new ExerciceQC\(['|\"]QCM['|\"]" "$content_dir" | wc -l)" + nb_tat="$(grep -rI 'new ExerciceTAT(' "$content_dir" | wc -l)" + nb_gd="$(grep -rI 'new ExerciceGD(' "$content_dir" | wc -l)" + nb_exo_total="$((nb_qm + nb_qcu + nb_qcm + nb_tat + nb_gd))" + nb_exo_other="$((nb_exo - nb_exo_total))" + + nb_flash="$(grep -Ec '/contenu/media/.*\.swf$' "$index")" + nb_flash_0="$(grep -Ec '^0 ' "$swf_index")" + nb_flash_1="$(grep -Ec '^1 ' "$swf_index")" + nb_flash_mult="$((nb_flash - nb_flash_0 - nb_flash_1))" + + nb_png="$(grep -Ec '/contenu/media/.*\.png$' "$index")" + nb_jpg="$(grep -Ec '/contenu/media/.*\.jpg$' "$index")" + nb_gif="$(grep -Ec '/contenu/media/.*\.gif$' "$index")" + nb_img="$((nb_png + nb_jpg + nb_gif))" + nb_media="$(find "$content_dir/media/" -maxdepth 1 -type f | wc -l)" + nb_media_total="$((nb_flash + nb_img))" + nb_media_other="$((nb_media - nb_media_total))" + + cat >"$out_file" <<EOF +modules: $nb_mod +sous-parties: $nb_subs +pages: $nb_pages_all + normales: $nb_pages + spéciales: $nb_pages_special +activités: $nb_act + cours: $nb_courses + exercices: $nb_exo_total + QCU: $nb_qcu + QCM: $nb_qcm + QM: $nb_qm + TAT: $nb_tat + GD: $nb_gd + other: $nb_exo_other +media: $nb_media_total + images: $nb_img + png: $nb_png + jpg: $nb_jpg + gif: $nb_gif + flash: $nb_flash + no audio: $nb_flash_0 + 1 audio: $nb_flash_1 + 2+ audio: $nb_flash_mult + other: $nb_media_other +EOF + cat "$out_file" +} + +action="$1" +shift + +case "$action" in +list-streams) + [[ -z "$1" ]] && echo "Usage: list-streams <file>" && exit 1 + file="$(check_file "$1" "$SOURCES_DIR/contenu/media")" + list_streams "$file" + ;; +count-streams) + count_streams "$@" + ;; +index-extensions) + index_extensions + ;; +count-all) + if [[ "$1" = "-f" || "$1" = "--force" ]]; then + index_extensions >/dev/null + count_streams >/dev/null + fi + count_all ;; setup) "$SCRIPTS_DIR/setup.sh" diff --git a/tetras_extraction/macao_12/result/indexes/count-all.txt b/tetras_extraction/macao_12/result/indexes/count-all.txt new file mode 100644 index 0000000000000000000000000000000000000000..b3192d6897dbc4964e7c6e451dab0df9db3c1b5f --- /dev/null +++ b/tetras_extraction/macao_12/result/indexes/count-all.txt @@ -0,0 +1,24 @@ +modules: 8 +sous-parties: 18 +pages: 151 + normales: 132 + spéciales: 19 +activités: + cours: 59 + exercices: 73 + QCU: 39 + QCM: 9 + QM: 8 + TAT: 12 + GD: 5 + other: 0 +media: 436 + images: 190 + png: 153 + jpg: 6 + gif: 31 + flash: 246 + no audio: 7 + 1 audio: 211 + 2+ audio: 28 + other: 1