#!/usr/bin/env python

# ----------------------------------------------------------------------------
# Copyright (c) 2015, The Deblur Development Team.
#
# Distributed under the terms of the BSD 3-clause License.
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------

import click
from os import makedirs, mkdir
from os.path import join, isfile, exists, isdir
from shutil import rmtree
import logging
import sys
from glob import glob

from skbio.util import remove_files

from deblur.deblurring import (deblur, get_default_error_profile)
from deblur.parallel_deblur import parallel_deblur
from deblur.workflow import (launch_workflow, trim_seqs, dereplicate_seqs,
                             remove_artifacts_seqs,
                             multiple_sequence_alignment,
                             remove_chimeras_denovo_from_seqs,
                             split_sequence_file_on_sample_ids_to_files,
                             build_index_sortmerna,
                             get_files_for_table,
                             create_otu_table,
                             start_log, sequence_generator,
                             remove_artifacts_from_biom_table)

from deblur.support_files import pos_db, neg_db

__version__ = "0.9.2.1"


def error_dist_from_str(ctx, param, value):
    """Validate the error profile is either None or a list of floats
    and return it as a list of floats
    """
    if value is None:
        return value
    if not isinstance(value, str):
        return value
    try:
        error_dist = list(map(float, value.split(',')))
        return error_dist
    except ValueError:
        raise click.BadParameter('Error distribution must be a comma '
                                 'separated list of maximal error '
                                 'probability per hamming distance')


@click.group()
def deblur_cmds():
    # DEBLUR SEQUENCES COMMAND
    pass


@deblur_cmds.command()
@click.argument('seqs_fp', required=True,
                type=click.Path(resolve_path=True, readable=True, exists=True,
                                file_okay=True))
@click.option('--mean-error', '-m', required=False, type=float, default=0.005,
              show_default=True,
              help="The mean per nucleotide error, used for original "
              "sequence estimate. If "
              "not passed typical illumina error rate is used")
@click.option('--error-dist', '-d', required=False, type=str,
              default=get_default_error_profile(),
              show_default=True,
              callback=error_dist_from_str,
              help="A comma separated list of error probabilities for each "
                   "hamming distance. The length of the list determines the "
                   "number of hamming distances taken into account.")
@click.option('--indel-prob', '-i', required=False, type=float, default=0.01,
              help='Insertion/deletion (indel) probability '
                   '(same for N indels)')
@click.option('--indel-max', required=False, type=int, default=3,
              help="Maximal indel number")
@click.option('--log-level', required=False,
              type=click.IntRange(1, 5, clamp=True), default=2,
              show_default=True, help="Level of messages for log file"
              "(range 1-debug to 5-critical")
@click.option('--log-file', required=False,
              type=click.Path(resolve_path=True,
                              readable=True, exists=False,
                              dir_okay=True),
              default='deblur.log',
              show_default=True, help="log file name")
def deblur_seqs(seqs_fp, mean_error, error_dist, indel_prob,
                indel_max, log_level, log_file):
    """Clean read errors from Illumina reads"""
    start_log(level=log_level * 10, filename=log_file)
    logger = logging.getLogger(__name__)
    logger.info('deblur deblur_seqs started on file %s' % seqs_fp)

    # set the error distribution
    if error_dist is None:
        logger.info('using default error profile')
        # set to the default illumina read error profile
        error_dist = get_default_error_profile()

    logger.info('error_dist is : %s' % error_dist)

    seqs = deblur(sequence_generator(seqs_fp), mean_error, error_dist,
                  indel_prob, indel_max)

    output_path = "%s.clean" % seqs_fp
    with open(output_path, 'w') as f:
        for s in seqs:
            f.write(s.to_fasta())


# TRIM LENGTH COMMAND
@deblur_cmds.command()
@click.argument('seqs_fp', required=True,
                type=click.Path(resolve_path=True, readable=True, exists=True,
                                file_okay=True))
@click.argument('output_fp', required=True,
                type=click.Path(resolve_path=True, readable=True, exists=False,
                                file_okay=True))
@click.option('--trim-length', '-t', required=False, type=int, default=100,
              help="Sequence trim length")
@click.option('--log-level', required=False,
              type=click.IntRange(1, 5, clamp=True), default=2,
              show_default=True, help="Level of messages for log file"
              "(range 1-debug to 5-critical")
@click.option('--log-file', required=False,
              type=click.Path(resolve_path=True, readable=True,
                              exists=False, dir_okay=True),
              default='deblur.log',
              show_default=True, help="log file name")
def trim(seqs_fp, output_fp, trim_length, log_level, log_file):
    """Trim FASTA sequences"""
    start_log(level=log_level * 10, filename=log_file)
    logger = logging.getLogger(__name__)
    logger.info('deblur deblur_seqs started on file %s' % seqs_fp)
    with open(output_fp, 'w') as out_f:
        for label, seq in trim_seqs(sequence_generator(seqs_fp), trim_length):
            out_f.write(">%s\n%s\n" % (label, seq))


# SEQUENCE DEREPLICATION/SINGLETON REMOVAL COMMAND
@deblur_cmds.command()
@click.argument('seqs_fp', required=True,
                type=click.Path(resolve_path=True, readable=True, exists=True,
                                file_okay=True))
@click.argument('output_fp', required=True,
                type=click.Path(resolve_path=True, readable=True, exists=False,
                                file_okay=True))
@click.option('--min-size', required=False, type=int, default=2,
              show_default=True, help="Discard sequences with an abundance "
              "value smaller than min-size")
@click.option('--log-level', required=False,
              type=click.IntRange(1, 5, clamp=True), default=2,
              show_default=True, help="Level of messages for log file"
              "(range 1-debug to 5-critical")
@click.option('--log-file', required=False,
              type=click.Path(resolve_path=True, readable=True,
                              exists=False, dir_okay=True),
              default='deblur.log',
              show_default=True, help="log file name")
def dereplicate(seqs_fp, output_fp, min_size, log_level, log_file):
    """
    Dereplicate FASTA sequences.

    Dereplicate input FASTA sequences and remove clusters
    with fewer than minimum number of occurrences (set by --min-size).
    """
    start_log(level=log_level * 10, filename=log_file)
    dereplicate_seqs(seqs_fp=seqs_fp,
                     output_fp=output_fp,
                     min_size=min_size)


# ARTIFACT REMOVAL COMMAND
@deblur_cmds.command()
@click.argument('seqs_fp', required=True,
                type=click.Path(resolve_path=True, readable=True, exists=True,
                                file_okay=True))
@click.argument('output_dir', required=True,
                type=click.Path(resolve_path=True, readable=True, exists=False,
                                file_okay=True))
@click.option('--ref-fp', required=False, multiple=True,
              default=[], show_default=False,
              type=click.Path(resolve_path=True, readable=True, exists=True,
                              file_okay=True),
              help="Keep all sequences aligning to this FASTA database "
                   "(for multiple databases, use "
                   "--ref-fp db1.fa --ref-fp db2.fa ..)\n"
                   "default for positive filtering is: %s\n"
                   "default for negative filtering is: %s" %
                   (pos_db, neg_db))
@click.option('--ref-db-fp', required=False, multiple=True,
              type=click.Path(resolve_path=True, readable=True, exists=False,
                              file_okay=True),
              help="Keep all sequences aligning to this indexed "
                   "database. For multiple databases, the order "
                   "must follow that of --ref-fp, for example, "
                   "--ref-db-fp db1.idx --ref-db-fp db2.idx ..")
@click.option('--negate', '-n', required=False, default=False, is_flag=True,
              show_default=True, type=bool,
              help="Discard all sequences aligning to the database "
                   "passed via --ref-fp option")
@click.option('--threads-per-sample', '-t', required=False, type=int,
              default=1, show_default=True,
              help="Number of threads to use per sample (0 to use all)")
@click.option('--log-level', required=False,
              type=click.IntRange(1, 5, clamp=True), default=2,
              show_default=True, help="Level of messages for log file"
              "(range 1-debug to 5-critical")
@click.option('--log-file', required=False,
              type=click.Path(resolve_path=True, readable=True,
                              exists=False, dir_okay=True),
              default='deblur.log',
              show_default=True, help="log file name")
def remove_artifacts(seqs_fp, output_dir, ref_fp, ref_db_fp,
                     negate, threads_per_sample, log_level, log_file):
    """
    Filter artifacts.

    Filter artifacts from input sequences based on database(s)
    provided with the --ref-fp (raw FASTA file) or
    --ref-db-fp (indexed database) options.
    """
    start_log(level=log_level * 10, filename=log_file)
    logger = logging.getLogger(__name__)
    logger.info('---------------------------------')
    logger.info('remove_artifacts')
    logger.info('number of database files %d' % len(ref_fp))
    # get the default values for the ref_fp if empty
    if len(ref_fp) == 0:
        if negate:
            ref_fp = [neg_db]
            logger.debug('Using default negative filtering file %s' % ref_fp)
        else:
            ref_fp = [pos_db]
            logger.debug('Using default positive filtering file %s' % ref_fp)

    if ref_db_fp:
        if len(ref_fp) != len(ref_db_fp):
            logger.critical("The number of FASTA reference databases "
                            "does not match the number of indexed "
                            "reference databases")
            raise ValueError("The number of FASTA reference databases "
                             "does not match the number of indexed "
                             "reference databases")
    else:
        logger.warn('sortmerna database does not exist, '
                    'creating it into dir %s' % output_dir)
        ref_db_fp = build_index_sortmerna(
            ref_fp=ref_fp,
            working_dir=output_dir)

    remove_artifacts_seqs(seqs_fp=seqs_fp,
                          ref_fp=ref_fp,
                          working_dir=output_dir,
                          ref_db_fp=ref_db_fp,
                          negate=negate,
                          threads=threads_per_sample)


# INDEX ARTIFACT DB COMMAND
@deblur_cmds.command()
@click.argument('ref_fp', required=True,
                type=click.Path(resolve_path=True, readable=True, exists=False,
                                file_okay=True))
@click.argument('output_dir', required=True,
                type=click.Path(resolve_path=True, readable=True, exists=False,
                                file_okay=True))
@click.option('--log-level', required=False,
              type=click.IntRange(1, 5, clamp=True), default=2,
              show_default=True, help="Level of messages for log file"
              "(range 1-debug to 5-critical")
@click.option('--log-file', required=False,
              type=click.Path(resolve_path=True, readable=True,
                              exists=False, dir_okay=True),
              default='deblur.log',
              show_default=True, help="log file name")
def build_db_index(ref_fp, output_dir, log_level, log_file):
    """
    Preapare the artifacts database

    Input:
    ref_fp - the fasta sequence database for artifact removal
    output_dir - the directory to where to write the indexed database
    """
    start_log(level=log_level * 10, filename=log_file)
    logger = logging.getLogger(__name__)
    logger.info('---------------------------------')
    logger.info('build_db_index')
    logger.info('for database file %s, into dir %s' % (ref_fp, output_dir))
    build_index_sortmerna(ref_fp=[ref_fp], working_dir=output_dir)
    logger.info('done build_db_index')


# MULTIPLE SEQUENCE ALIGNMENT COMMAND
@deblur_cmds.command()
@click.argument('seqs_fp', required=True,
                type=click.Path(resolve_path=True, readable=True, exists=True,
                                file_okay=True))
@click.argument('output_fp', required=True,
                type=click.Path(resolve_path=True, readable=True, exists=False,
                                file_okay=True))
@click.option('--threads-per-sample', '-a', required=False, type=int,
              default=1, show_default=True,
              help="Number of threads to use per sample (0 to use all)")
@click.option('--log-level', required=False,
              type=click.IntRange(1, 5, clamp=True), default=2,
              show_default=True, help="Level of messages for log file"
              "(range 1-debug to 5-critical")
@click.option('--log-file', required=False,
              type=click.Path(resolve_path=True, readable=True,
                              exists=False, dir_okay=True),
              default='deblur.log',
              show_default=True, help="log file name")
def multiple_seq_alignment(seqs_fp, output_fp, threads_per_sample,
                           log_level, log_file):
    """Multiple sequence alignment"""
    start_log(level=log_level * 10, filename=log_file)
    alignment = multiple_sequence_alignment(seqs_fp,
                                            threads=threads_per_sample)

    with open(output_fp, 'w') as f:
        f.write(alignment.to_fasta())


# DE NOVO CHIMERA REMOVAL COMMAND
@deblur_cmds.command()
@click.argument('seqs_fp', required=True,
                type=click.Path(resolve_path=True, readable=True, exists=True,
                                file_okay=True))
@click.argument('output_fp', required=True,
                type=click.Path(resolve_path=True, readable=True,
                                exists=False, file_okay=True))
@click.option('--log-level', required=False,
              type=click.IntRange(1, 5, clamp=True), default=2,
              show_default=True, help="Level of messages for log file"
              "(range 1-debug to 5-critical")
@click.option('--log-file', required=False,
              type=click.Path(resolve_path=True, readable=True,
                              exists=False, dir_okay=True),
              default='deblur.log',
              show_default=True, help="log file name")
def remove_chimeras_denovo(seqs_fp, output_fp, log_level, log_file):
    """Remove chimeras de novo using UCHIME (VSEARCH implementation)"""
    start_log(level=log_level * 10, filename=log_file)
    remove_chimeras_denovo_from_seqs(seqs_fp, output_fp)


# GENERATE BIOM TABLE COMMAND
@deblur_cmds.command()
@click.argument('seqs_fp', required=True,
                type=click.Path(resolve_path=True, readable=True, exists=True,
                                file_okay=True))
@click.argument('output_biom_fp', required=True,
                type=click.Path(resolve_path=True, readable=True,
                                exists=False, file_okay=True))
@click.option('--min-reads', required=False, type=int, default=0,
              show_default=True, help="In final biom table - keep only "
              "sequences appearing at least min-reads in all samples "
              "combined.")
@click.option('--file_type', required=False, type=str,
              default='.fasta.trim.derep.no_artifacts.msa.deblur.no_chimeras',
              show_default=True,
              help='ending of files to be added to the biom table')
@click.option('--log-level', required=False,
              type=click.IntRange(1, 5, clamp=True), default=2,
              show_default=True, help="Level of messages for log file"
              "(range 1-debug to 5-critical")
@click.option('--log-file', required=False,
              type=click.Path(resolve_path=True, readable=True,
                              exists=False, dir_okay=True),
              default='deblur.log',
              show_default=True, help="log file name")
def build_biom_table(seqs_fp, output_biom_fp, min_reads, file_type, log_level,
                     log_file):
    """Generate a BIOM table from a directory of chimera removed fasta files
    Parameters
    ----------
    seqs_fp : str
      the path to the directory containing the chimera removed fasta files
    output_biom_fp : str
      the path where to save the output biom table file ('final.biom')
    file_type : str
      the files type to add to the table
      (default='.trim.derep.no_artifacts.msa.deblur.no_chimeras',
      can be '.fasta' or '.fa' if needed)
    """
    start_log(level=log_level * 10, filename=log_file)
    output_filename = 'final.biom'
    output_fp = join(output_biom_fp, output_filename)
    outputfasta_filename = 'final.seq.fa'
    outputfasta_fp = join(output_biom_fp, outputfasta_filename)

    samples = get_files_for_table(seqs_fp, file_type)
    create_otu_table(output_fp, samples,
                     outputfasta_fp=outputfasta_fp, minreads=min_reads)


# LAUNCH FULL DEBLUR PIPELINE COMMAND
@deblur_cmds.command()
@click.option('--seqs-fp', required=True,
              type=click.Path(resolve_path=True, readable=True, exists=True,
                              file_okay=True, dir_okay=True),
              help="Demultiplexed FASTA file including all samples, "
              "or a directory of .fasta files")
@click.option('--output-dir', required=True,
              type=click.Path(resolve_path=True, readable=True, exists=False,
                              dir_okay=True),
              help="Directory path to store output including BIOM table")
@click.option('--pos-ref-fp', required=False, multiple=True,
              default=[], show_default=False,
              type=click.Path(resolve_path=True, readable=True, exists=True,
                              file_okay=True),
              help="Positive (16S) filtering database. "
                   "Keep all sequences aligning to this FASTA file "
                   "(for multiple databases, use "
                   "--ref-fp db1.fa --ref-fp db2.fa ..)\n"
                   "default is: %s\n" % pos_db)
@click.option('--pos-ref-db-fp', required=False, multiple=True,
              type=click.Path(resolve_path=True, readable=True, exists=False,
                              file_okay=True),
              help="The indexed  version of the positive filtering database. "
                   "If not supplied, deblur will index the database."
                   "For multiple databases, the order "
                   "must follow that of --ref-fp, for example, "
                   "--ref-db-fp db1.idx --ref-db-fp db2.idx ..")
@click.option('--neg-ref-fp', required=False, multiple=True,
              default=[], show_default=False,
              type=click.Path(resolve_path=True, readable=True, exists=True,
                              file_okay=True),
              help="Negative (artifacts) filtering database. "
                   "Keep all sequences aligning to this FASTA file "
                   "(for multiple databases, use "
                   "--ref-fp db1.fa --ref-fp db2.fa ..)\n"
                   "default is: %s\n" % neg_db)
@click.option('--neg-ref-db-fp', required=False, multiple=True,
              type=click.Path(resolve_path=True, readable=True, exists=False,
                              file_okay=True),
              help="The indexed  version of the negative filtering database. "
                   "If not supplied, deblur will index the database."
                   "For multiple databases, the order "
                   "must follow that of --ref-fp, for example, "
                   "--ref-db-fp db1.idx --ref-db-fp db2.idx ..")
@click.option('--overwrite', '-w', required=False, type=bool, default=False,
              is_flag=True,
              show_default=True, help="Overwrite output directory if exists.")
@click.option('--mean-error', '-m', required=False, type=float, default=0.005,
              show_default=True,
              help="The mean per nucleotide error, used for original "
              "sequence estimate. If "
              "not passed typical illumina error rate is used")
@click.option('--error-dist', '-d', required=False, type=str,
              default=get_default_error_profile(),
              show_default=True,
              callback=error_dist_from_str,
              help="A comma separated list of error probabilities for each "
                   "hamming distance. The length of the list determines the "
                   "number of hamming distances taken into account.")
@click.option('--indel-prob', '-i', required=False, type=float, default=0.01,
              show_default=True,
              help='Insertion/deletion (indel) probability '
                   '(same for N indels)')
@click.option('--indel-max', required=False, type=int, default=3,
              show_default=True,
              help="Maximal indel number")
@click.option('--trim-length', '-t', required=False, type=int, default=100,
              show_default=True, help="Sequence trim length")
@click.option('--min-reads', required=False, type=int, default=0,
              show_default=True, help="In final biom table - keep only "
              "sequences appearing at least min-reads in all samples "
              "combined.")
@click.option('--min-size', required=False, type=int, default=2,
              show_default=True, help="Per sample - discard sequences with"
              " an abundance value smaller than min-size")
@click.option('--negate', '-n', required=False, default=False, is_flag=True,
              show_default=True, type=bool,
              help="Discard all sequences aligning to the database "
                   "passed via --ref-fp option")
@click.option('--threads-per-sample', '-a', required=False, type=int,
              default=1, show_default=True,
              help="Number of threads to use per sample (0 to use all)")
@click.option('--keep-tmp-files', required=False, type=bool, default=False,
              is_flag=True,
              show_default=True,
              help="Keep temporary files (useful for debugging)")
@click.option('--log-level', required=False,
              type=click.IntRange(1, 5, clamp=True), default=2,
              show_default=True, help="Level of messages for log file"
              "(range 1-debug to 5-critical")
@click.option('--log-file', required=False,
              type=click.Path(resolve_path=True, readable=True,
                              exists=False, dir_okay=True),
              default='deblur.log',
              show_default=True, help="log file name")
@click.option('--jobs-to-start', '-O', required=False, type=int, default=1,
              show_default=True,
              help="Number of jobs to start (if to run in parallel)")
@click.option('--is-worker-thread', required=False, type=bool, default=False,
              is_flag=True,
              show_default=True,
              help="indicates this is not the main deblur process "
              "(used by the parallel deblur - do not use manually)")
@click.option('attack_mode', '-A', required=False, type=bool, default=False,
              help="ATTACK MODE (http://xkcd.com/1692)")
def workflow(seqs_fp, output_dir, pos_ref_fp, pos_ref_db_fp,
             neg_ref_fp, neg_ref_db_fp, overwrite,
             mean_error, error_dist, indel_prob, indel_max,
             trim_length, min_reads, min_size, negate, threads_per_sample,
             keep_tmp_files, log_level, log_file, jobs_to_start,
             is_worker_thread, attack_mode):
    """Launch deblur workflow"""
    start_log(level=log_level * 10, filename=log_file)
    logger = logging.getLogger(__name__)
    logger.warn('deblur version %s workflow started on %s' %
                (__version__, seqs_fp))
    logger.warn('parameters: %s' % locals())

    # set the error distribution
    if error_dist is None:
        logger.info('using default error profile')
        # set to the default illumina read error profile
        error_dist = get_default_error_profile()

    logger.info('error_dist is : %s' % error_dist)

    # get the default values for the rep_fp if empty
    if len(pos_ref_fp) == 0:
        pos_ref_fp = [pos_db]
        logger.debug('Using default positive filtering file %s' % pos_ref_fp)
    if len(neg_ref_fp) == 0:
        neg_ref_fp = [pos_db]
        logger.debug('Using default negative filtering file %s' % neg_ref_fp)

    # Create output directory
    if not is_worker_thread:
        logger.info('deblur main program started')
        if exists(output_dir):
            if overwrite:
                logger.debug('overwrite is on - deleting directory %s' %
                             output_dir)
                rmtree(output_dir)
            else:
                logger.critical('output directory %s already exists' %
                                output_dir)
                raise OSError("Output directory already exists. Choose a "
                              "different directory or use option "
                              "--overwrite (-w)")
        working_dir = join(output_dir, "deblur_working_dir")
        makedirs(working_dir)
    else:
        logger.info('deblur worker thread started')
        working_dir = join(output_dir, "deblur_working_dir")

    # if it's a worker thread, the seqs_fp parameter
    # is the single fasta file name
    if is_worker_thread:
        input_file_list = [seqs_fp]
    else:
        # if seqs_fp is a dir, use the fasta files in it,
        # otherwise, assume its a post split-libraries fasta file
        # and split it to fasta file per sample
        if isdir(seqs_fp):
            logger.info('processing directory %s' % seqs_fp)
            out_dir_split = seqs_fp
        else:
            # Split demultiplexed FASTA on sample IDs
            out_dir_split = join(output_dir, "split")
            logger.info('splitting file %s to per sample fasta. '
                        'output %s' % (seqs_fp, out_dir_split))
            mkdir(out_dir_split)
            with open(seqs_fp, 'U') as seqs_f:
                split_sequence_file_on_sample_ids_to_files(
                    seqs_f,
                    out_dir_split)
        input_file_list = glob('%s/*.fast[aq]' % out_dir_split)

    files_to_remove = []
    # Build SortMeRNA indexes
    if negate:
        if not neg_ref_db_fp:
            logger.info('building negative db sortmerna index files')
            neg_ref_db_fp = build_index_sortmerna(ref_fp=neg_ref_fp,
                                                  working_dir=working_dir)
    if not pos_ref_db_fp:
        logger.info('building positive db sortmerna index files')
        pos_ref_db_fp = build_index_sortmerna(ref_fp=pos_ref_fp,
                                              working_dir=working_dir)

    if negate:
        ref_db_fp = neg_ref_db_fp
        ref_fp = neg_ref_fp
    else:
        ref_db_fp = pos_ref_db_fp
        ref_fp = pos_ref_fp

    # Deblur each sample fasta file
    if jobs_to_start < 2:
        # run in serial mode (no parallelization)
        logger.info('processing per sample fasta files')
        for input_fp in input_file_list:
            if isfile(input_fp):
                deblurred_file_name = launch_workflow(
                    seqs_fp=input_fp, working_dir=working_dir,
                    mean_error=mean_error,
                    error_dist=error_dist, indel_prob=indel_prob,
                    indel_max=indel_max, trim_length=trim_length,
                    min_size=min_size, ref_fp=ref_fp, ref_db_fp=ref_db_fp,
                    negate=negate, threads_per_sample=threads_per_sample)
            if deblurred_file_name is None:
                logger.warn('deblurring failed for file %s' % input_fp)
        logger.info('finished processing per sample fasta files')
    else:
        # we're want to start parallel mode - so need to create
        # the thread shell commands
        logger.info('parallel processing per sample fasta files')
        parallel_deblur(input_file_list, sys.argv, pos_ref_db_fp,
                        neg_ref_db_fp, jobs_to_start)

    if is_worker_thread:
        logger.info('worker thread for %s finished' % seqs_fp)
        return

    # Merge chimera cleaned fasta files to a single biom table
    output_filename = 'final.biom'
    output_fp = join(output_dir, output_filename)
    outputfasta_filename = 'final.seqs.fa'
    outputfasta_fp = join(output_dir, outputfasta_filename)

    samples = get_files_for_table(working_dir)
    create_otu_table(output_fp, samples,
                     outputfasta_fp=outputfasta_fp, minreads=min_reads)

    # if negative mode, also create a biom table with
    # only 16s sequences and with only non-16s sequences
    if negate:
        remove_artifacts_from_biom_table(output_fp, outputfasta_fp,
                                         pos_ref_fp, output_dir,
                                         pos_ref_db_fp,
                                         threads=threads_per_sample)

    # Clean up
    if not keep_tmp_files:
        logger.info('Cleaning up temp files')
        remove_files(files_to_remove, error_on_missing=False)
        rmtree(working_dir)
    else:
        logger.info('Keeping temp files')
    logger.info('deblur workflow finished')
    logger.info('output saved to %s' % output_fp)
    logger.info('------------------')


if __name__ == '__main__':
    deblur_cmds()
