#!/bin/bash

function print_help_and_exit
{
cat >&2 << EOF

'voronota-contacts' script provides a way for calculating and querying interatomic contacts
with just one command (without the need to construct a pipeline from 'voronota' calls).

Basic options:
    --input | -i                   string   *  input structure file in PDB or mmCIF format
    --input-filter-query           string      input atoms filtering query parameters
    --contacts-query               string      contacts query parameters
    --contacts-query-additional    string      additional, preceeding query parameters, default is '--match-min-seq-sep 1'
    --cache-dir                    string      path to cache directory
    --sum-at-end                               flag to print sum of areas as the last line in output
    --tsv-output                               flag to output table in tab-separated values format with header
    --help | -h                                flag to display help message and exit

Advanced options:
    --output-drawing               string      output file with drawing script for PyMol
    --drawing-parameters           string      drawing parameters
    --wireframe-drawing                        flag to draw wireframe representation of contacts
    --multiple-models                          flag to handle multiple models in PDB file
    --use-hbplus                               flag to run 'hbplus' to tag H-bonds

Standard output (multiple lines):
    {contacting atom} {contacting atom} {contact area} {distance between atoms centers} {tags} {adjunct values}

EOF
exit 1
}

readonly ZEROARG=$0

if [[ $ZEROARG == *"/"* ]]
then
	cd "$(dirname ${ZEROARG})"
	export PATH="$(pwd):${PATH}"
	cd - &> /dev/null
fi

export LC_ALL=C

command -v voronota &> /dev/null || { echo >&2 "Error: 'voronota' executable not in binaries path"; exit 1; }
command -v voronota-resources &> /dev/null || { echo >&2 "Error: 'voronota-resources' executable not in binaries path"; exit 1; }

INFILE=""
INPUT_FILTER_QUERY_PARAMETERS=""
CONTACTS_QUERY_FIRST_PARAMETERS="--match-min-seq-sep 1"
CONTACTS_QUERY_SECOND_PARAMETERS=""
DRAWING_OUTPUT=""
DRAWING_PARAMETERS=""
CONTACTS_CACHE_DIRECTORY=""
MULTIPLE_MODELS_CHAINS_OPTION=""
SUM_AT_END=false
TSV_OUTPUT=false
WIREFRAME_DRAWING=false
USE_HBPLUS=false
HELP_MODE=false

while [[ $# > 0 ]]
do
	OPTION="$1"
	OPTARG="$2"
	shift
	case $OPTION in
	-i|--input)
		INFILE="$OPTARG"
		shift
		;;
	--input-filter-query)
		INPUT_FILTER_QUERY_PARAMETERS="$OPTARG"
		shift
		;;
	--contacts-query-additional)
		CONTACTS_QUERY_FIRST_PARAMETERS="$OPTARG"
		shift
		;;
	--contacts-query)
		CONTACTS_QUERY_SECOND_PARAMETERS="$OPTARG"
		shift
		;;
	--output-drawing)
		DRAWING_OUTPUT="$OPTARG"
		shift
		;;
	--drawing-parameters)
		DRAWING_PARAMETERS="$OPTARG"
		shift
		;;
	--cache-dir)
		CONTACTS_CACHE_DIRECTORY="$OPTARG"
		shift
		;;
	--multiple-models)
		MULTIPLE_MODELS_CHAINS_OPTION="--multimodel-chains"
		;;
	--sum-at-end)
		SUM_AT_END=true
		;;
	--tsv-output)
		TSV_OUTPUT=true
		;;
	--wireframe-drawing)
		WIREFRAME_DRAWING=true
		;;
	--use-hbplus)
		USE_HBPLUS=true
		;;
	-h|--help)
		HELP_MODE=true
		;;
	*)
		echo >&2 "Error: invalid command line option '$OPTION'"
		exit 1
		;;
	esac
done

if [ -z "$INFILE" ] || $HELP_MODE
then
	print_help_and_exit
fi

if $USE_HBPLUS
then
	command -v hbplus &> /dev/null || { echo >&2 "Warning: 'hbplus' executable not in binaries path, thus no 'hb' tags will be assigned"; USE_HBPLUS=false; }
fi

MD5SUM_COMMAND="md5sum"
if command -v md5sum &> /dev/null
then
	MD5SUM_COMMAND="md5sum"
else
	MD5SUM_COMMAND="md5"
fi
command -v $MD5SUM_COMMAND &> /dev/null || { echo >&2 "Error: 'md5sum' or 'md5' executable not in binaries path"; exit 1; }

if [ ! -s "$INFILE" ]
then
	echo >&2 "Error: input file does not exist"
	exit 1
fi

readonly TMPLDIR=$(mktemp -d)
trap "rm -r $TMPLDIR" EXIT

voronota-resources radii > "$TMPLDIR/radii"

if [ ! -s "$TMPLDIR/radii" ]
then
	echo >&2 "Error: failed to get the predefined atomic radii"
	exit 1
fi

{
	if [[ "$INFILE" == *".gz" ]]
	then
		zcat "$INFILE"
	else
		cat "$INFILE"
	fi
} \
| voronota get-balls-from-atoms-file \
  --input-format detect \
  --annotated $MULTIPLE_MODELS_CHAINS_OPTION \
  --radii-file $TMPLDIR/radii \
  --include-heteroatoms \
| voronota query-balls \
  --drop-altloc-indicators \
| voronota query-balls $INPUT_FILTER_QUERY_PARAMETERS \
> $TMPLDIR/balls

if [ ! -s "$TMPLDIR/balls" ]
then
	echo >&2 "Error: no atoms in input file"
	exit 1
fi

{
echo "Unfiltered contacts with tags version 4"
if $USE_HBPLUS
then
	echo "Using 'hbplus' output"
fi
} > $TMPLDIR/hashsalt

BALLS_MD5=""
if [ -n "$CONTACTS_CACHE_DIRECTORY" ]
then
	BALLS_MD5=$(cat $TMPLDIR/hashsalt $TMPLDIR/balls | $MD5SUM_COMMAND | awk '{print $1}')
	if [ -n "$BALLS_MD5" ]
	then
		BALLS_MD5="${BALLS_MD5}.voronota.contacts"
		if [ -s "$CONTACTS_CACHE_DIRECTORY/$BALLS_MD5" ]
		then
			cp $CONTACTS_CACHE_DIRECTORY/$BALLS_MD5 $TMPLDIR/all_contacts
		fi
	fi
fi

if [ -n "$DRAWING_OUTPUT" ] && [ -s "$TMPLDIR/all_contacts" ]
then
	GRAPHICS_TOKEN=$(cat $TMPLDIR/all_contacts | head -1 | awk '{print $8}')
	if [ -z "$GRAPHICS_TOKEN" ]
	then
		rm $TMPLDIR/all_contacts
	fi
fi

if [ ! -s "$TMPLDIR/all_contacts" ]
then
	OPTIONAL_DRAW_FLAG=""
	if [ -n "$DRAWING_OUTPUT" ]
	then
		OPTIONAL_DRAW_FLAG="--draw"
	fi
	
	OPTIONAL_HBPLUS_TAGS_SETTING=""
	if $USE_HBPLUS
	then
		mkdir -p "$TMPLDIR/hbplus_workdir"
		cat "$TMPLDIR/balls" | voronota write-balls-to-atoms-file --pdb-output "$TMPLDIR/hbplus_workdir/struct.pdb" > /dev/null
		( cd "$TMPLDIR/hbplus_workdir" ; hbplus ./struct.pdb &> /dev/null ; cd - &> /dev/null )
		if [ ! -s "$TMPLDIR/hbplus_workdir/struct.hb2" ]
		then
			echo >&2 "Error: failed to run hbplus"
			exit 1
		fi
		OPTIONAL_HBPLUS_TAGS_SETTING="--set-hbplus-tags $TMPLDIR/hbplus_workdir/struct.hb2"
	fi
	
	cat $TMPLDIR/balls \
	| voronota calculate-contacts --annotated $OPTIONAL_DRAW_FLAG \
	| voronota query-contacts --preserve-graphics $OPTIONAL_HBPLUS_TAGS_SETTING \
	| voronota query-contacts --preserve-graphics \
	  --set-tags 'sb' --match-first 'R<ASP,GLU>&A<OD1,OD2,OE1,OE2,OXT>' --match-second 'R<ARG,HIS,LYS>&A<NH1,NH2,ND1,NE2,NZ>' --match-max-dist 4.0 \
	| voronota query-contacts --preserve-graphics \
	  --set-tags 'ds' --match-first 'R<CYS>&A<SG>' --match-second 'R<CYS>&A<SG>' --match-max-dist 3.0 \
	> $TMPLDIR/all_contacts
	
	if [ ! -s "$TMPLDIR/all_contacts" ]
	then
		echo >&2 "Error: failed to calculate contacts"
		exit 1
	fi
	
	if [ -n "$CONTACTS_CACHE_DIRECTORY" ] && [ -n "$BALLS_MD5" ]
	then
		mkdir -p $CONTACTS_CACHE_DIRECTORY
		cp $TMPLDIR/all_contacts $CONTACTS_CACHE_DIRECTORY/$BALLS_MD5
	fi
fi

{
if [ -n "$DRAWING_OUTPUT" ]
then
	cat $TMPLDIR/all_contacts \
	| voronota query-contacts $CONTACTS_QUERY_FIRST_PARAMETERS \
	  --preserve-graphics \
	| voronota query-contacts $CONTACTS_QUERY_SECOND_PARAMETERS \
	  --preserve-graphics \
	> $TMPLDIR/all_contacts_for_drawing
	
	if $WIREFRAME_DRAWING
	then
		cat $TMPLDIR/all_contacts_for_drawing \
		| sed 's|_tfanc \S\+ \S\+ \S\+ \S\+ \S\+ \S\+|_lloop|g' \
		| sed 's|_tstrip \S\+ \S\+ \S\+ \S\+ \S\+ \S\+ \S\+ \S\+ \S\+ \S\+ \S\+ \S\+ \S\+ \S\+ \S\+ \S\+ \S\+ \S\+||g' \
		> $TMPLDIR/all_contacts_for_drawing_wireframe
		mv $TMPLDIR/all_contacts_for_drawing_wireframe $TMPLDIR/all_contacts_for_drawing
	fi
	
	cat $TMPLDIR/all_contacts_for_drawing \
	| voronota draw-contacts $DRAWING_PARAMETERS \
	  --drawing-for-pymol $DRAWING_OUTPUT \
	> /dev/null
	
	cat $TMPLDIR/all_contacts_for_drawing \
	| sed 's/^\(\S\+\s\+\S\+\s\+\S\+\s\+\S\+\s\+\S\+\s\+\S\+\).*$/\1/'
else
	cat $TMPLDIR/all_contacts \
	| voronota query-contacts $CONTACTS_QUERY_FIRST_PARAMETERS \
	| voronota query-contacts $CONTACTS_QUERY_SECOND_PARAMETERS
fi
} > $TMPLDIR/result_contacts

if $SUM_AT_END
then
	cat $TMPLDIR/result_contacts \
	| voronota query-contacts --summarize \
	> $TMPLDIR/result_contacts_summary
	
	cat $TMPLDIR/result_contacts $TMPLDIR/result_contacts_summary > $TMPLDIR/result_contacts_final
else
	cat $TMPLDIR/result_contacts > $TMPLDIR/result_contacts_final
fi

if $TSV_OUTPUT
then
	{
		echo "chain1 rnum1 ins1 anum1 alt1 rname1 aname1 chain2 rnum2 ins2 anum2 alt2 rname2 aname2 area dist tags adjuncts"
		cat $TMPLDIR/result_contacts_final | voronota expand-descriptors
	} | sed 's/\s\+/\t/g'
else
	cat $TMPLDIR/result_contacts_final | column -t
fi

