#!/bin/bash

# this now redirects to EBI UK
URL="ftp://ftp.ebi.edu.au/pub/databases/uniprot/current_release/knowledgebase/taxonomic_divisions/"

# where to put formatted database: /path/to/prokka/db/
ROOTDIR=$(dirname $0)

echo "*** NOTICE ***"
echo "This script is about to download data from the Internet and overwrite your existing Prokka BLAST databases."
echo "(press CTRL-C within 5 seconds to abort!)"
sleep 5

for K in Viruses Archaea Bacteria ; do

  # lowercase name for download
  KL=$(echo $K | tr 'A-Z' 'a-z')

  DAT="$URL/uniprot_sprot_$KL.dat.gz"


  DIR="$ROOTDIR/../db/kingdom/$K"
  mkdir -pv "$DIR"
  echo "Downloading $DAT and installing in $DIR"

  # pipe: download, gunzip, convert to fasta, format into database
  # no intermediate files!

  wget -q -O - "$DAT" | \
    gunzip | \
    "$ROOTDIR/prokka-uniprot_to_fasta_db" | \
    makeblastdb -in - -dbtype prot -title "Prokka $K" -hash_index -out "$DIR/sprot"

  # If you want a copy of the sprot file too (we do for git distribution)
  blastdbcmd -db "$DIR/sprot" -entry all > "$DIR/sprot"
    
  # If you want to use old BLAST:
  # formatdb -i stdin -p T -V T -t "Prokka $K" -n "$DIR/sprot"

done

