#!/usr/bin/env bash

# Default value for version env var
if [[ -z "$VERSION" ]]; then
    # VERSION="macao_12"
    VERSION="full"
fi
# First parameter overrides version if set
version_regex='^(macao_12|macao_3|each|full)$'
if [[ "$1" == "12" || "$1" == "3" ]]; then
    VERSION="macao_$1"
    shift
elif [[ "$1" =~ $version_regex ]]; then
    VERSION="$1"
    shift
fi
export VERSION
if [[ ! "$VERSION" =~ $version_regex ]]; then
    echo "Invalid version '$VERSION'"
    exit 1
fi

# Special version value "each" is processed by calling the script itself once
# for each actual version
if [[ "$VERSION" = "each" && "$1" != "shell" && "$1" != "status" && "$1" != "setup" ]]; then
    echo "macao_12:" && "$0" 12 "$@"
    echo "macao_3:" && "$0" 3 "$@"
    exit
fi

export MACAO_ROOT="${MACAO_ROOT:-$(dirname "$(realpath "$0")")}"
export SCRIPTS_DIR="$MACAO_ROOT/tetras_extraction/script"

SOURCES_DIR="$MACAO_ROOT/Basilisk/MACAO"
[[ "$VERSION" != "each" && "$VERSION" != "full" ]] && SOURCES_DIR="$SOURCES_DIR/$VERSION"
export SOURCES_DIR
RESULTS_DIR="$MACAO_ROOT/tetras_extraction/result"
[[ "$VERSION" != "each" ]] && RESULTS_DIR="$RESULTS_DIR/$VERSION"
export RESULTS_DIR

print_usage() {
    cat <<EOF
Usage: $(basename "$0") [version] <command> [args]

VERSION
    Specifies which part of the MACAO repository to use: "macao_12", "macao_3"
    (or simply "12" and "3"), "each" for both in separate result dirs
    and "full" for both merged in a single result dir.
    If not specified, uses the value of the "VERSION" environment variable,
    or "12" by default. Some commands do not support every version.

COMMANDS
    status
        Print useful info about the current environment.
    shell [-p|--pyenv]
        Open a shell with mcli's environment variables set, including PATH.
        If -p or --pyenv is specified, also enter the Python virtual env.
    list-streams <file>
        List audio streams in <file>
    count-streams [<file>]
        Count audio streams in <file>, or from all SWF files if none is given
    index-extensions
        Index all files by extension
    count-all [-f|--force]
        Count many types of Macao objects.
        If -f or --force is given, refresh indexes before counting (equivalent
        to count-streams and index-extensions)
    setup
        Initialize Python environment required by extractors
    setup-debug
        (Re)create .env file used by the Python debugger launch config
    extract
        Run the extract stage, to generate RDF from text sources
    transform
        Run the transform stage, to complete and clean-up the RDF data
    export
        Run the export stage, to generate Macao-Hugo content pages
    convert
        Run the full conversion process (extract -> transform -> export)
    test
        Run simple tests on the extracted RDF data
    extract-mp3 [-y|--yes-overwrite]
        Extract audio streams from all Flash SWF files
    help
        Print this help and exit
EOF
}

# Check if $1 is a regular file, otherwise if it's a filename (and not a path)
# look for this filename in fallback directory $2
check_file() {
    file="$1"
    fallback="$2"
    [[ -f "$file" ]] && echo "$file" && return
    if [[ -d "$fallback" && "$(basename "$file")" = "$file" ]]; then
        file="$fallback/$file"
        [[ -f "$file" ]] && echo "$file" && return
    fi
    echo "No such file '$1'" >&2
    return 1
}

activate_venv() {
    if ! source "$SCRIPTS_DIR/venv/bin/activate"; then
        echo "Python venv not found, did you run setup first?" >&2
        exit 1
    fi
}

# List MP3 streams in a file
list_streams() {
    ffprobe -i "$1" 2>&1 | grep -E 'Stream.*Audio: mp3'
}

count_streams() {
    if [[ -n "$1" ]]; then
        file="$(check_file "$1" "$SOURCES_DIR/contenu/media")"
        echo "Indexing streams from $file ..." >&2
        echo "$(list_streams "$file" | wc -l) $(basename "$file")"
    else
        mkdir -p "$RESULTS_DIR/indexes"
        echo "Indexing streams from all SWFs (this may take some time) ..." >&2
        out_file="$RESULTS_DIR/indexes/swf_streams_count.txt"
        if [[ "$VERSION" = "full" ]]; then
            for version in macao_12 macao_3; do
                for file in "$SOURCES_DIR/$version/contenu/media/"*.swf; do
                    echo "$(list_streams "$file" | wc -l) $version/$(basename "$file")"
                done
            done | sort -rn >"$out_file"
        else
            for file in "$SOURCES_DIR/contenu/media/"*.swf; do
                echo "$(list_streams "$file" | wc -l) $(basename "$file")"
            done | sort -rn >"$out_file"
        fi
        cat "$out_file"
    fi
}

index_extensions() {
    mkdir -p "$RESULTS_DIR/indexes"
    out_file="$RESULTS_DIR/indexes/index_per_extension.txt"
    echo -n "" >"$out_file" # Clear out file
    # Index all files, with a cd trick to get relative paths
    cd "$SOURCES_DIR" || exit
    allfiles="$(find . -path '**/.idea' -prune -o -type f -print)"
    cd - || exit
    # List all extensions, then for each one, filter the index for files with this extension
    # ( Perl expression courtesy of https://stackoverflow.com/a/1842270 )
    perl -ne 'print $1 if m/\.([^.\/]+)$/**//' <<<"$allfiles" | sort -u | while read -r ext; do
        echo "[$ext]" >>"$out_file"
        grep -E ".*\.$ext\$" <<<"$allfiles" | sort >>"$out_file"
        echo "" >>"$out_file"
    done
    cat "$out_file"
    echo "Indexed all files by extension to $out_file" >&2
}

count_all() {
    indexes_dir="$RESULTS_DIR/indexes"
    mkdir -p "$indexes_dir"
    index="$indexes_dir/index_per_extension.txt"
    swf_index="$indexes_dir/swf_streams_count.txt"
    content_dir="$SOURCES_DIR/contenu"
    out_file="$indexes_dir/count-all.txt"

    if [[ ! -f "$index" ]]; then
        echo "$index not found, use --force or run index-extensions before"
        exit 1
    fi
    if [[ ! -f "$swf_index" ]]; then
        echo "$swf_index not found, use --force or run count-streams before"
        exit 1
    fi

    if [[ "$VERSION" = "macao_3" ]]; then
        nb_mod="$(grep -c '<item identifier="seq' "$SOURCES_DIR/imsmanifest.xml")"
        nb_subs="$(grep -Ec 'act[0-9]+.html' "$index")"
    else
        nb_mod="$(grep -c '<item identifier="MosMod' "$SOURCES_DIR/imsmanifest.xml")"
        nb_subs="$(grep -Ec 'MosEtp[0-9]+.html' "$index")"
    fi
    nb_pages_all="$(grep -Ec '/contenu/.*\.htm' "$index")"
    nb_pages="$(grep -Ec '/contenu/pages/pg[0-9]+\.html$' "$index")"
    nb_pages_special="$((nb_pages_all - nb_pages))"

    nb_courses="$(grep -rI 'new Cours(' "$content_dir" | wc -l)"
    nb_exo="$(grep -rIE "new Exercice[[:alpha:]]*\(" "$content_dir" | wc -l)"
    nb_qm="$(grep -rI 'new ExerciceQM(' "$content_dir" | wc -l)"
    nb_qcu="$(grep -rIE "new ExerciceQC\(['|\"]QCU['|\"]" "$content_dir" | wc -l)"
    nb_qcm="$(grep -rIE "new ExerciceQC\(['|\"]QCM['|\"]" "$content_dir" | wc -l)"
    nb_tat="$(grep -rI 'new ExerciceTAT(' "$content_dir" | wc -l)"
    nb_gd="$(grep -rI 'new ExerciceGD(' "$content_dir" | wc -l)"
    nb_exo_total="$((nb_qm + nb_qcu + nb_qcm + nb_tat + nb_gd))"
    nb_exo_other="$((nb_exo - nb_exo_total))"
    nb_act="$((nb_courses + nb_exo_total))"

    nb_flash="$(grep -Ec '/contenu/media/.*\.swf$' "$index")"
    nb_flash_0="$(grep -Ec '^0 ' "$swf_index")"
    nb_flash_1="$(grep -Ec '^1 ' "$swf_index")"
    nb_flash_mult="$((nb_flash - nb_flash_0 - nb_flash_1))"

    nb_png="$(grep -Ec '/contenu/media/.*\.png$' "$index")"
    nb_jpg="$(grep -Ec '/contenu/media/.*\.jpg$' "$index")"
    nb_gif="$(grep -Ec '/contenu/media/.*\.gif$' "$index")"
    nb_img="$((nb_png + nb_jpg + nb_gif))"
    nb_media="$(find "$content_dir/media/" -maxdepth 1 -type f | wc -l)"
    nb_media_total="$((nb_flash + nb_img))"
    nb_media_other="$((nb_media - nb_media_total))"

    cat >"$out_file" <<EOF
modules:        $nb_mod
sous-parties:   $nb_subs
pages:          $nb_pages_all
    normales:       $nb_pages
    spéciales:      $nb_pages_special
activités:      $nb_act
    cours:          $nb_courses
    exercices:      $nb_exo_total
        QCU:            $nb_qcu
        QCM:            $nb_qcm
        QM:             $nb_qm
        TAT:            $nb_tat
        GD:             $nb_gd
        other:          $nb_exo_other
media:          $nb_media_total
    images:         $nb_img
        png:            $nb_png
        jpg:            $nb_jpg
        gif:            $nb_gif
    flash:          $nb_flash
        no audio:       $nb_flash_0
        1 audio:        $nb_flash_1
        2+ audio:       $nb_flash_mult
    other:          $nb_media_other
EOF
    cat "$out_file"
}

action="$1"
shift

case "$action" in
list-streams)
    [[ -z "$1" ]] && echo "Usage: list-streams <file>" && exit 1
    file="$(check_file "$1" "$SOURCES_DIR/contenu/media")"
    list_streams "$file"
    ;;
count-streams)
    count_streams "$@"
    ;;
index-extensions)
    index_extensions
    ;;
count-all)
    if [[ "$1" = "-f" || "$1" = "--force" ]]; then
        index_extensions >/dev/null
        count_streams >/dev/null
    fi
    count_all
    ;;
setup)
    "$SCRIPTS_DIR/setup.sh"
    ;;
setup-debug)
    envfile="$SCRIPTS_DIR/.env"
    echo "VERSION='$VERSION'" > "$envfile"
    echo "MACAO_ROOT='$MACAO_ROOT'" >> "$envfile"
    echo "SOURCES_DIR='$SOURCES_DIR'" >> "$envfile"
    echo "SCRIPTS_DIR='$SCRIPTS_DIR'" >> "$envfile"
    echo "RESULTS_DIR='$RESULTS_DIR'" >> "$envfile"
    ;;
convert)
    #activate_venv
    python "$SCRIPTS_DIR/src/main.py"
    ;;
extract)
    #activate_venv
    python "$SCRIPTS_DIR/src/extract.py"
    ;;
transform)
    #activate_venv
    python "$SCRIPTS_DIR/src/transform.py"
    ;;
export)
    #activate_venv
    python "$SCRIPTS_DIR/src/export.py"
    ;;
test)
    #activate_venv
    python "$SCRIPTS_DIR/src/test.py"
    ;;
extract-mp3)
    for version in macao_12 macao_3; do
        for audio_file in "$SOURCES_DIR/$version/contenu/media/"*.swf; do
            "$SCRIPTS_DIR/extract_mp3.sh" "$@" --output-dir "$RESULTS_DIR/audio" "$audio_file"
        done
    done
    ;;
copy-images)
    mkdir "$RESULTS_DIR/img/"
    for version in macao_12 macao_3; do
        for type in gif jpg png; do
            cp "$SOURCES_DIR/$version/contenu/media/"*".$type" "$RESULTS_DIR/img/"
        done
    done
    ;;
shell)
    if [[ "$VERSION" = "each" ]]; then
        echo "Subcommand 'shell' not supported for version '$VERSION'" >&2
        exit 1
    fi
    if [[ "$1" = "-p" || "$1" = "--pyenv" ]]; then
        activate_venv
    fi
    export PATH="$PATH:$MACAO_ROOT"
    cd "$MACAO_ROOT"
    $SHELL
    ;;
status)
    echo "VERSION=$VERSION"
    echo "MACAO_ROOT=$MACAO_ROOT"
    echo "SOURCES_DIR=$SOURCES_DIR"
    echo "SCRIPTS_DIR=$SCRIPTS_DIR"
    echo "RESULTS_DIR=$RESULTS_DIR"
    echo ""
    echo "Python virtual env: ${VIRTUAL_ENV:-"not set"}"
    echo -n "mcli: "
    if which mcli >/dev/null; then echo "available"; else echo "not in PATH"; fi
    echo ""
    cd "$MACAO_ROOT" && git status
    ;;
help)
    print_usage
    ;;
*)
    echo "Unknown command '$action'"
    print_usage
    exit 1
    ;;
esac