#!/bin/bash

usage="$(basename "$0") [-h] -r <reference> -i <fastq>

Align fastq/a formatted reads to a genome using minimap2.

    -h  show this help text.
    -r  reference, should be a fasta file. If correspondng bwa indices
        do not exist they will be created. (required).
    -i  fastq/a input reads (required).
    -I  split index every ~NUM input bases (default: 16G, this is larger
        than the usual minimap2 default).
    -a  aggresively extend gaps (sets -A1 -B2 -O2 -E1 for minimap2).
    -P  filter to only primary alignments (i.e. run samtools view -F 2308)
    -n  sort bam by read name.
    -c  chunk size. Input reads/contigs will be broken into chunks
        prior to alignment.
    -t  alignment threads (default: 1).
    -p  output file prefix (default: reads).
    -m  fill MD tag."

PREFIX="reads"
ALIGN_OPTS="-x map-ont"
INDEX_SIZE="16G"
THREADS=1
FILTER=""
SORT=""
CHUNK=""
rflag=false
iflag=false
while getopts ':hr:i:p:aPmnc:t:' option; do
  case "$option" in
    h  ) echo "$usage" >&2; exit;;
    r  ) rflag=true; REFERENCE=$OPTARG;;
    i  ) iflag=true; INPUT=$OPTARG;;
    I  ) INDEX_SIZE=$OPTARG;;
    P  ) FILTER="-F 2308";;
    m  ) ALIGN_OPTS="${ALIGN_OPTS} --MD";;
    n  ) SORT="-n";;
    p  ) PREFIX=$OPTARG;;
    a  ) ALIGN_OPTS="${ALIGN_OPTS} -A1 -B2 -O2 -E1";;
    c  ) CHUNK=$OPTARG;;
    t  ) THREADS=$OPTARG;;
    \? ) echo "Invalid option: -${OPTARG}." >&2; exit 1;;
    :  ) echo "Option -$OPTARG requires an argument." >&2; exit 1;;
  esac
done
shift $(($OPTIND - 1))

if ! $iflag || ! $rflag; then
  echo "$usage" >&2;
  echo "-i and -r must be specified." >&2;
  exit 1;
fi

minimap_exts=('.mmi')
num_minimap_exts=${#minimap_exts[@]}
missing=0
for ext in "${minimap_exts[@]}"; do
  minimap_index=${REFERENCE}${ext}
  if [[ ! -e ${minimap_index} ]]; then
    ((missing+=1))
  fi
done;

if [ "$missing" -eq 0 ]; then
  echo "Found minimap files." >&2
elif [ "$missing" -eq "$num_minimap_exts" ]; then
  echo "Constructing minimap index." >&2
  minimap2 -I ${INDEX_SIZE} ${ALIGN_OPTS} -d ${REFERENCE}.mmi ${REFERENCE}
else
  echo "Missing ${missing} index files. Clean up any files named
${REFERENCE}<EXT> where <EXT> is one of ${minimap_exts[*]}." >&2
  exit 1;
fi

if [ "$CHUNK" != "" ]; then
  echo "Splitting input into ${CHUNK} chunks." >&2
  split_fastx ${INPUT} ${INPUT}.chunks ${CHUNK}
  INPUT=${INPUT}.chunks
fi

minimap2 ${ALIGN_OPTS} -t ${THREADS} -a ${REFERENCE}.mmi ${INPUT} |
  samtools view -@ ${THREADS} -T ${REFERENCE} ${FILTER} -bS - |
  samtools sort -@ ${THREADS} ${SORT} -l 9 -o ${PREFIX}.bam -
samtools index ${PREFIX}.bam ${PREFIX}.bam.bai
