#!/bin/bash

#
# Copyright (C) 2019 by the University of Southern California
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License,
# version 2, as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
#

function usage()
{
    echo "
NAME: 
    verfploeter_plotter - Plot verfploeter outputs in world map
DESCRIPTION:
    This tool generates anycast mapping from Verfploeter (or anything similar) 
        outputs.  It takes as input pcap files or tab-delimeted (fsdb) text files.
    Each anycast site should have a pcap or fsdb file.
    The default base directory is at current working directory.

    You can specify input directory or the tool will assume BASE/Input/ directory 
        for the input files.  Input files should be in YYYY-MM-DD-SITE.pcap or 
        YYYY-MM-DD-SITE.fsdb format, e.g. 2019-06-07-LAX.pcap or 2019-06-07-LAX.fsdb.

    You can specify the output directory or the outputs will be generated in the 
        BASE/Output/ directory.
    This tool needs a file which includes all the target hitlist.  
        Default is BASE/ip_list.txt.
        It also uses a geolocation database. A sample database is given at the base 
        directory.
    The tool generates the mapping in png and pdf formats along with some other 
        data files in BASE/Output/ directory.
    Sample inputs are in /usr/share/doc/verfploer_plotter/Input/ directory and 
        sample outputs are in the /usr/share/doc/verfploer_plotter/Output/ directory.
    
Usage: 
    verfploeter_plotter --year=YYYY --month=MM --day=DD --n=N --s=LAX,ARI,MIA --id=ID [options]

Options:
    --help             Prints help information
    --targets          Target hitlist, default is ip_list.txt
    --i                Input directory, default is Input
    --o                Output directory, default is Output
    --scale            Scale of the mapping, default is 1500
    --format           Format of the input files, default is pcap
    --database         Database file
    --dir              Base directory

Args:
    --year <YYYY>      Year of the pcap files
    --month <MM>       Month of the pcap files
    --day <DD>         Date of the pcap files
    --n <N>            No. of anycast sites
    --s <LIST>         List of the anycast sites separated by comma
    --id <ID>          ID of the ICMP replies
"
}

BASE_DIR=.
BINDIR=$(dirname $0)
ROOT="GENERAL"

SCALE=1500
LOAD_SCALE=1500
RADIUS_MAX=170000
FORMAT="pcap"


while [ "$1" != "" ]; do
    PARAM=`echo $1 | awk -F= '{print $1}'`
    VALUE=`echo $1 | awk -F= '{print $2}'`
    case $PARAM in
        -h | --help)
            usage
            exit
            ;;
        --year)
            YEAR=$VALUE
            ;;
        --month)
            MONTH=$VALUE
            ;;
        --day)
            DATE=$VALUE
            ;;
        --n)
            N_SITES=$VALUE
            ;;
        --s)
            SITES=$VALUE
            ;;
        --format)
            FORMAT=$VALUE
            ;;
        --database)
            DATABASE=$VALUE
            ;;
        --dir)
            BASE_DIR=$VALUE/
            ;;
        --i)
            DATA_SOURCE_DIR=$VALUE/
            ;;
        --o)
            DATA_DEST_DIR=$VALUE
            ;;
        --targets)
            TARGETS=$VALUE
            ;;
        --scale)
            SCALE=$VALUE
            ;;
        --id)
            ID=$VALUE
            ;;
        --root)
            ROOT=$VALUE
            ;;
        *)
            echo "ERROR: unknown parameter \"$PARAM\""
            usage
            exit 1
            ;;
    esac
    shift
done

if [[ -z "$YEAR" || -z "$MONTH" || -z "$DATE" || -z "$N_SITES" || -z "$SITES" ]]; then
    usage
    exit 1
fi

if [ "$FORMAT" == "pcap" ] && ! hash pingextract >& /dev/null; then
        {
                echo "verfploeter/pingextract is required for processing pcap files"
                echo "available from: https://ant.isi.edu/software/verfploeter/pingextract/index.html"
        } >&2
        exit 1
fi

TARGETS=${BASE_DIR}/ip_list.txt
DATA_SOURCE_DIR=${BASE_DIR}/Input/
DATA_DEST_DIR=${BASE_DIR}/Output
DATABASE=${BASE_DIR}/geolocation.fsdb.bz2
GEOBIN_TO_WORLDMAP=${BINDIR}/geobin_to_worldmap

SITE_LIST=(${SITES//,/ })

FILE_NAME=$YEAR-$MONTH-$DATE
DATE=$YEAR-$MONTH-$DATE

DATA_DEST_DIR=${DATA_DEST_DIR}/$YEAR/$DATE-$SITES/$ID/

# Check whether we have all the directories/files correctly. 
[ -d $DATA_DEST_DIR ] || mkdir -p $DATA_DEST_DIR

if [ ! -f ${TARGETS} ]; then
    echo "Target not found."
    exit 1
fi

if [ ! -f ${DATABASE} ]; then
    echo "Geolocation database not found."
    exit 1
fi

if [ ! -d ${BASE_DIR} ]; then
    echo "Base directory not found."
    exit 1
fi

FILE_EXIST() {
    if [ -f "${DATA_DEST_DIR}$1" ]; then
        echo "$1 exists, not over-writing"
        return 0
    else
        return 1
    fi
}

FILE_EXIST "${DATA_DEST_DIR}targets.fsdb"

if [ $? -eq 1 ]; then
    cat $TARGETS |
        dbcoldefine target |
        dbcolcreate block |
        dbroweval '@p = split(/\./, _target); _block = sprintf("%02x%02x%02x00", $p[0], $p[1], $p[2]);' |
        dbsort block > "${DATA_DEST_DIR}targets.fsdb"
fi

COUNTER=1
for ((index=0;index<$N_SITES;index++));
do
    CURRENT_SITE=${SITE_LIST[$index]}
    
    if [ $FORMAT == "pcap" ]; then
        FILE_EXIST "${FILE_NAME}.${CURRENT_SITE}.dat" || {
            if [ ! -f ${DATA_SOURCE_DIR}${FILE_NAME}-${CURRENT_SITE}.pcap ]; then
                echo "Input pcap file does not exist or not in right naming format."
                exit 1
            fi
                    pingextract --identifier $ID ${DATA_SOURCE_DIR}${FILE_NAME}-${CURRENT_SITE}.pcap \
                                > ${DATA_DEST_DIR}${FILE_NAME}.${CURRENT_SITE}.dat
        }

        FILE_EXIST "${FILE_NAME}.${CURRENT_SITE}.fsdb" || {
                    cat ${DATA_DEST_DIR}${FILE_NAME}.${CURRENT_SITE}.dat |
                            sed s/not-specified/${CURRENT_SITE}/ |
                            sed 's/|/ /g' |
                            dbcoldefine site time target run count |
                            dbcolcreate block |
                            dbroweval '@p = split(/\./, _target); _block = sprintf("%02x%02x%02x00", $p[0], $p[1], $p[2]);' |
                            dbcol block target site > "${DATA_DEST_DIR}${FILE_NAME}-${CURRENT_SITE}.fsdb"
        }
    fi

    if [ $index -eq 0 ]; then
        FILE_EXIST "${FILE_NAME}.PART${COUNTER}.fsdb" || {
            TEMP_DIR=${DATA_DEST_DIR}${FILE_NAME}-${CURRENT_SITE}.fsdb
            if [ $FORMAT == "fsdb" ]; then
                TEMP_DIR=${DATA_SOURCE_DIR}${FILE_NAME}-${CURRENT_SITE}.fsdb
                if [ ! -f ${TEMP_DIR} ]; then
                    echo "Input fsdb file does not exist or not in right naming format."
                    exit 1
                fi
            fi

            cat ${TEMP_DIR} |
                            dbcol -i ${TEMP_DIR} block site |
                            dbsort block |
                            dbjoin -a -S -i ${DATA_DEST_DIR}targets.fsdb -i - block > ${DATA_DEST_DIR}${FILE_NAME}.PART${COUNTER}.fsdb
        }
    else
        LAST_INDEX=0
        (( LAST_INDEX = COUNTER - 1 ))
        FILE_EXIST "${FILE_NAME}.PART${COUNTER}.fsdb" || {
            TEMP_DIR=${DATA_DEST_DIR}${FILE_NAME}-${CURRENT_SITE}.fsdb
            if [ $FORMAT == "fsdb" ]; then
                TEMP_DIR=${DATA_SOURCE_DIR}${FILE_NAME}-${CURRENT_SITE}.fsdb
            fi
            cat $TEMP_DIR |
                dbcol -i $TEMP_DIR block site |
                dbcolrename site site2 |
                dbsort block |
                dbjoin -a -S -i ${DATA_DEST_DIR}${FILE_NAME}.PART${LAST_INDEX}.fsdb -i - block |
                dbroweval 'if (_site eq "-" && _site2 ne "-") {_site = _site2;} 
                           elsif (_site ne "-" && _site2 ne "-") {_site = "multiple";}' |
                dbcol -v site2 >  ${DATA_DEST_DIR}${FILE_NAME}.PART${COUNTER}.fsdb
        }
    fi
    let 'COUNTER++'
done

FILE_EXIST "${FILE_NAME}.all.fsdb" || {
    cat ${DATA_DEST_DIR}${FILE_NAME}.PART${N_SITES}.fsdb |
        dbsort block |
        dbrowuniq block > ${DATA_DEST_DIR}${FILE_NAME}.all.fsdb
}


FILE_EXIST "${FILE_NAME}.all.geoloc.fsdb" || {
    bzcat $DATABASE |
        dbcol hexip latitude longitude |
        dbroweval '_hexip =~ s/..$/00/;' |
        dbcolrename hexip block latitude lat longitude long |
        dbfilealter -F D |
        dbjoin -t inner -S -i ${DATA_DEST_DIR}${FILE_NAME}.all.fsdb -i - block \
            > ${DATA_DEST_DIR}${FILE_NAME}.all.geoloc.fsdb
}

FILE_EXIST "${FILE_NAME}.all.geobin.fsdb" || {
    cat ${DATA_DEST_DIR}${FILE_NAME}.all.geoloc.fsdb |
        dbcolcreate rounded_lat rounded_long rounded_lat_long | 
        dbroweval -b 'sub round_ll { my($ll,$min,$prec) = @_; return int(($ll-$min)/$prec)*$prec+$min+$prec/2.0; }' \
           '_rounded_lat = round_ll(_lat,-90.0,2.0);
            _rounded_long = round_ll(_long,-180.0,2.0); \
            _rounded_lat_long = _rounded_lat . "," . _rounded_long;' \
        > ${DATA_DEST_DIR}${FILE_NAME}.all.geobin.fsdb
}

FILE_EXIST "${FILE_NAME}.geobin.counts.fsdb" || {
    cat ${DATA_DEST_DIR}${FILE_NAME}.all.geobin.fsdb |
        dbcol rounded_lat_long site block |
        dbsort rounded_lat_long site |
        dbrowuniq -c rounded_lat_long site \
        > ${DATA_DEST_DIR}${FILE_NAME}.geobin.counts.fsdb
}

SITE_ADD=''
SITE_JOIN=''
for ((index=0;index<$N_SITES;index++));
do
    CURRENT_SITE=${SITE_LIST[$index]}
    TEMP=$(( ${N_SITES} - 1 ))
    if [ $index -lt $TEMP ]; then
        SITE_ADD=${SITE_ADD}_site_${CURRENT_SITE}'+'
        SITE_JOIN=${SITE_JOIN}_site_${CURRENT_SITE}','
    else
        SITE_ADD=${SITE_ADD}_site_${CURRENT_SITE}
        SITE_JOIN=${SITE_JOIN}_site_${CURRENT_SITE}
    fi
done

FILE_EXIST "geobin.counts.pivoted_to_col.fsdb" || {
    cat ${DATA_DEST_DIR}${FILE_NAME}.geobin.counts.fsdb |
        dbroweval '_site = "none" if (_site eq "-");' |
        dbfilepivot  -k rounded_lat_long -p site -v count |
        dbcolcreate all_site_count site_counts |
        # If you want to see the IPs with multiple catchments, uncomment the following line and comment out the current next line.
        #dbroweval '_all_site_count ='${SITE_ADD}'+ _site_multiple; _site_counts = join(",",'${SITE_JOIN}',_site_multiple); _site_counts =~ s/-/0/g; ' |
        dbroweval '_all_site_count ='${SITE_ADD}'; _site_counts = join(",",'${SITE_JOIN}'); _site_counts =~ s/-/0/g; ' |
        dbcol rounded_lat_long site_counts all_site_count |
        dbfilealter -F t > ${DATA_DEST_DIR}geobin.counts.pivoted_to_col.fsdb
}


# If you want to see the IPs with multiple catchments, uncomment the following two lines.
# SITES=${SITES},MUL
# (( N_SITES = $N_SITES + 1 ))

FILE_EXIST "${FILE_NAME}.${SITES}.png" || {
    cat ${DATA_DEST_DIR}geobin.counts.pivoted_to_col.fsdb |
        ${GEOBIN_TO_WORLDMAP} --theme=lighter,wedges --value-col=site_counts \
            --radius-col=all_site_count --radius-scale=${SCALE} --radius-max=${RADIUS_MAX} \
            --url='' --dataset-name="DATE: $DATE, ID: $ID" --value-legend='site:'${SITES} \
            --value-max=${N_SITES}  -o ${DATA_DEST_DIR}${FILE_NAME}-id-$ID-${SITES}.png
}

FILE_EXIST "${FILE_NAME}.all.pdf" || {
    cat ${DATA_DEST_DIR}geobin.counts.pivoted_to_col.fsdb |
        ${GEOBIN_TO_WORLDMAP} --theme=lighter,wedges --value-col=site_counts \
            --radius-col=all_site_count --radius-scale=${SCALE} --radius-max=${RADIUS_MAX} \
            --url='' --dataset-name="DATE: $DATE, ID: $ID" --value-legend='site:'${SITES} \
            --value-max=${N_SITES} --format=pdf  -o ${DATA_DEST_DIR}${FILE_NAME}-id-$ID-${SITES}.pdf
}

# To delete the unnecessary generated files.

#for ((index=0;index<$N_SITES;index++));
#do
#    CURRENT_SITE=${SITE_LIST[$index]}
#    rm ${DATA_DEST_DIR}${FILE_NAME}.${CURRENT_SITE}.*
#done

#rm ${DATA_DEST_DIR}${FILE_NAME}.PART*
#rm ${DATA_DEST_DIR}${FILE_NAME}.all.geoloc.fsdb
#rm ${DATA_DEST_DIR}${FILE_NAME}.geobin.counts.fsdb

#rm ${DATA_DEST_DIR}targets.fsdb
