#!/usr/bin/env python

# ----------------------------------------------------------------------------
# Copyright (c) 2015, The Deblur Development Team.
#
# Distributed under the terms of the BSD 3-clause License.
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------

import click
from os import makedirs, mkdir, remove
from os.path import join, isfile, exists, isdir
from shutil import rmtree
import logging
import sys
from glob import glob

from deblur.deblurring import (deblur, get_default_error_profile)
from deblur.parallel_deblur import parallel_deblur
from deblur.workflow import (launch_workflow, trim_seqs, dereplicate_seqs,
                             remove_artifacts_seqs,
                             multiple_sequence_alignment,
                             remove_chimeras_denovo_from_seqs,
                             split_sequence_file_on_sample_ids_to_files,
                             build_index_sortmerna,
                             get_files_for_table,
                             create_otu_table,
                             start_log, sequence_generator,
                             remove_artifacts_from_biom_table)

from deblur.support_files import pos_db, neg_db
import deblur

__version__ = deblur.__version__


def error_dist_from_str(ctx, param, value):
    """Validate the error profile is either None or a list of floats
    and return it as a list of floats
    """
    if value is None:
        return value
    if not isinstance(value, str):
        return value
    try:
        error_dist = list(map(float, value.split(',')))
        return error_dist
    except ValueError:
        raise click.BadParameter('Error distribution must be a comma '
                                 'separated list of maximal error '
                                 'probability per hamming distance')


@click.group()
def deblur_cmds():
    # DEBLUR SEQUENCES COMMAND
    pass


@deblur_cmds.command()
@click.argument('seqs_fp', required=True,
                type=click.Path(resolve_path=True, readable=True, exists=True,
                                file_okay=True))
@click.option('--mean-error', '-m', required=False, type=float, default=0.005,
              show_default=True,
              help="The mean per nucleotide error, used for original "
              "sequence estimate. If "
              "not passed typical illumina error rate is used")
@click.option('--error-dist', '-d', required=False, type=str,
              default=get_default_error_profile(),
              show_default=True,
              callback=error_dist_from_str,
              help="A comma separated list of error probabilities for each "
                   "hamming distance. The length of the list determines the "
                   "number of hamming distances taken into account.")
@click.option('--indel-prob', '-i', required=False, type=float, default=0.01,
              help='Insertion/deletion (indel) probability '
                   '(same for N indels)')
@click.option('--indel-max', required=False, type=int, default=3,
              help="Maximal indel number")
@click.option('--log-level', required=False,
              type=click.IntRange(1, 5, clamp=True), default=2,
              show_default=True, help="Level of messages for log file"
              "(range 1-debug to 5-critical")
@click.option('--log-file', required=False,
              type=click.Path(resolve_path=True,
                              readable=True, exists=False,
                              dir_okay=True),
              default='deblur.log',
              show_default=True, help="log file name")
def deblur_seqs(seqs_fp, mean_error, error_dist, indel_prob,
                indel_max, log_level, log_file):
    """Clean read errors from Illumina reads"""
    start_log(level=log_level * 10, filename=log_file)
    logger = logging.getLogger(__name__)
    logger.info('deblur deblur_seqs started on file %s' % seqs_fp)

    # set the error distribution
    if error_dist is None:
        logger.info('using default error profile')
        # set to the default illumina read error profile
        error_dist = get_default_error_profile()

    logger.info('error_dist is : %s' % error_dist)

    seqs = deblur(sequence_generator(seqs_fp), mean_error, error_dist,
                  indel_prob, indel_max)

    output_path = "%s.clean" % seqs_fp
    with open(output_path, 'w') as f:
        for s in seqs:
            f.write(s.to_fasta())


# TRIM LENGTH COMMAND
@deblur_cmds.command()
@click.argument('seqs_fp', required=True,
                type=click.Path(resolve_path=True, readable=True, exists=True,
                                file_okay=True))
@click.argument('output_fp', required=True,
                type=click.Path(resolve_path=True, readable=True, exists=False,
                                file_okay=True))
@click.option('--trim-length', '-t', required=False, type=int, default=100,
              help="Sequence trim length")
@click.option('--log-level', required=False,
              type=click.IntRange(1, 5, clamp=True), default=2,
              show_default=True, help="Level of messages for log file"
              "(range 1-debug to 5-critical")
@click.option('--log-file', required=False,
              type=click.Path(resolve_path=True, readable=True,
                              exists=False, dir_okay=True),
              default='deblur.log',
              show_default=True, help="log file name")
def trim(seqs_fp, output_fp, trim_length, log_level, log_file):
    """Trim FASTA sequences"""
    start_log(level=log_level * 10, filename=log_file)
    logger = logging.getLogger(__name__)
    logger.info('deblur deblur_seqs started on file %s' % seqs_fp)
    with open(output_fp, 'w') as out_f:
        for label, seq in trim_seqs(sequence_generator(seqs_fp), trim_length):
            out_f.write(">%s\n%s\n" % (label, seq))


# SEQUENCE DEREPLICATION/SINGLETON REMOVAL COMMAND
@deblur_cmds.command()
@click.argument('seqs_fp', required=True,
                type=click.Path(resolve_path=True, readable=True, exists=True,
                                file_okay=True))
@click.argument('output_fp', required=True,
                type=click.Path(resolve_path=True, readable=True, exists=False,
                                file_okay=True))
@click.option('--min-size', required=False, type=int, default=2,
              show_default=True, help="Discard sequences with an abundance "
              "value smaller than min-size")
@click.option('--log-level', required=False,
              type=click.IntRange(1, 5, clamp=True), default=2,
              show_default=True, help="Level of messages for log file"
              "(range 1-debug to 5-critical")
@click.option('--log-file', required=False,
              type=click.Path(resolve_path=True, readable=True,
                              exists=False, dir_okay=True),
              default='deblur.log',
              show_default=True, help="log file name")
def dereplicate(seqs_fp, output_fp, min_size, log_level, log_file):
    """
    Dereplicate FASTA sequences.

    Dereplicate input FASTA sequences and remove clusters
    with fewer than minimum number of occurrences (set by --min-size).
    """
    start_log(level=log_level * 10, filename=log_file)
    dereplicate_seqs(seqs_fp=seqs_fp,
                     output_fp=output_fp,
                     min_size=min_size)


# ARTIFACT REMOVAL COMMAND
@deblur_cmds.command()
@click.argument('seqs_fp', required=True,
                type=click.Path(resolve_path=True, readable=True, exists=True,
                                file_okay=True))
@click.argument('output_dir', required=True,
                type=click.Path(resolve_path=True, readable=True, exists=False,
                                file_okay=True))
@click.option('--ref-fp', required=False, multiple=True,
              default=[], show_default=False,
              type=click.Path(resolve_path=True, readable=True, exists=True,
                              file_okay=True),
              help="Keep all sequences aligning to this FASTA database "
                   "(for multiple databases, use "
                   "--ref-fp db1.fa --ref-fp db2.fa ..)\n"
                   "default for positive filtering is: %s\n"
                   "default for negative filtering is: %s" %
                   (pos_db, neg_db))
@click.option('--ref-db-fp', required=False, multiple=True,
              type=click.Path(resolve_path=True, readable=True, exists=False,
                              file_okay=True),
              help="Keep all sequences aligning to this indexed "
                   "database. For multiple databases, the order "
                   "must follow that of --ref-fp, for example, "
                   "--ref-db-fp db1.idx --ref-db-fp db2.idx ..")
@click.option('--negate', '-n', required=False, default=False, is_flag=True,
              show_default=True, type=bool,
              help="Discard all sequences aligning to the database "
                   "passed via --ref-fp option")
@click.option('--threads-per-sample', '-t', required=False, type=int,
              default=1, show_default=True,
              help="Number of threads to use per sample (0 to use all)")
@click.option('--log-level', required=False,
              type=click.IntRange(1, 5, clamp=True), default=2,
              show_default=True, help="Level of messages for log file"
              "(range 1-debug to 5-critical")
@click.option('--log-file', required=False,
              type=click.Path(resolve_path=True, readable=True,
                              exists=False, dir_okay=True),
              default='deblur.log',
              show_default=True, help="log file name")
def remove_artifacts(seqs_fp, output_dir, ref_fp, ref_db_fp,
                     negate, threads_per_sample, log_level, log_file):
    """
    Filter artifacts.

    Filter artifacts from input sequences based on database(s)
    provided with the --ref-fp (raw FASTA file) or
    --ref-db-fp (indexed database) options.
    """
    start_log(level=log_level * 10, filename=log_file)
    logger = logging.getLogger(__name__)
    logger.info('---------------------------------')
    logger.info('remove_artifacts')
    logger.info('number of database files %d' % len(ref_fp))
    # get the default values for the ref_fp if empty
    if len(ref_fp) == 0:
        if negate:
            ref_fp = [neg_db]
            logger.debug('Using default negative filtering file %s' % ref_fp)
        else:
            ref_fp = [pos_db]
            logger.debug('Using default positive filtering file %s' % ref_fp)

    if ref_db_fp:
        if len(ref_fp) != len(ref_db_fp):
            logger.critical("The number of FASTA reference databases "
                            "does not match the number of indexed "
                            "reference databases")
            raise ValueError("The number of FASTA reference databases "
                             "does not match the number of indexed "
                             "reference databases")
    else:
        logger.warn('sortmerna database does not exist, '
                    'creating it into dir %s' % output_dir)
        ref_db_fp = build_index_sortmerna(
            ref_fp=ref_fp,
            working_dir=output_dir)

    remove_artifacts_seqs(seqs_fp=seqs_fp,
                          ref_fp=ref_fp,
                          working_dir=output_dir,
                          ref_db_fp=ref_db_fp,
                          negate=negate,
                          threads=threads_per_sample)


# INDEX ARTIFACT DB COMMAND
@deblur_cmds.command()
@click.argument('ref_fp', required=True,
                type=click.Path(resolve_path=True, readable=True, exists=False,
                                file_okay=True))
@click.argument('output_dir', required=True,
                type=click.Path(resolve_path=True, readable=True, exists=False,
                                file_okay=True))
@click.option('--log-level', required=False,
              type=click.IntRange(1, 5, clamp=True), default=2,
              show_default=True, help="Level of messages for log file"
              "(range 1-debug to 5-critical")
@click.option('--log-file', required=False,
              type=click.Path(resolve_path=True, readable=True,
                              exists=False, dir_okay=True),
              default='deblur.log',
              show_default=True, help="log file name")
def build_db_index(ref_fp, output_dir, log_level, log_file):
    """
    Preapare the artifacts database

    Input:
    ref_fp - the fasta sequence database for artifact removal
    output_dir - the directory to where to write the indexed database
    """
    start_log(level=log_level * 10, filename=log_file)
    logger = logging.getLogger(__name__)
    logger.info('---------------------------------')
    logger.info('build_db_index')
    logger.info('for database file %s, into dir %s' % (ref_fp, output_dir))
    build_index_sortmerna(ref_fp=[ref_fp], working_dir=output_dir)
    logger.info('done build_db_index')


# MULTIPLE SEQUENCE ALIGNMENT COMMAND
@deblur_cmds.command()
@click.argument('seqs_fp', required=True,
                type=click.Path(resolve_path=True, readable=True, exists=True,
                                file_okay=True))
@click.argument('output_fp', required=True,
                type=click.Path(resolve_path=True, readable=True, exists=False,
                                file_okay=True))
@click.option('--threads-per-sample', '-a', required=False, type=int,
              default=1, show_default=True,
              help="Number of threads to use per sample (0 to use all)")
@click.option('--log-level', required=False,
              type=click.IntRange(1, 5, clamp=True), default=2,
              show_default=True, help="Level of messages for log file"
              "(range 1-debug to 5-critical")
@click.option('--log-file', required=False,
              type=click.Path(resolve_path=True, readable=True,
                              exists=False, dir_okay=True),
              default='deblur.log',
              show_default=True, help="log file name")
def multiple_seq_alignment(seqs_fp, output_fp, threads_per_sample,
                           log_level, log_file):
    """Multiple sequence alignment"""
    start_log(level=log_level * 10, filename=log_file)
    alignment = multiple_sequence_alignment(seqs_fp,
                                            threads=threads_per_sample)

    with open(output_fp, 'w') as f:
        f.write(alignment.to_fasta())


# DE NOVO CHIMERA REMOVAL COMMAND
@deblur_cmds.command()
@click.argument('seqs_fp', required=True,
                type=click.Path(resolve_path=True, readable=True, exists=True,
                                file_okay=True))
@click.argument('output_fp', required=True,
                type=click.Path(resolve_path=True, readable=True,
                                exists=False, file_okay=True))
@click.option('--log-level', required=False,
              type=click.IntRange(1, 5, clamp=True), default=2,
              show_default=True, help="Level of messages for log file"
              "(range 1-debug to 5-critical")
@click.option('--log-file', required=False,
              type=click.Path(resolve_path=True, readable=True,
                              exists=False, dir_okay=True),
              default='deblur.log',
              show_default=True, help="log file name")
def remove_chimeras_denovo(seqs_fp, output_fp, log_level, log_file):
    """Remove chimeras de novo using UCHIME (VSEARCH implementation)"""
    start_log(level=log_level * 10, filename=log_file)
    remove_chimeras_denovo_from_seqs(seqs_fp, output_fp)


# GENERATE BIOM TABLE COMMAND
@deblur_cmds.command()
@click.argument('seqs_fp', required=True,
                type=click.Path(resolve_path=True, readable=True, exists=True,
                                file_okay=True))
@click.argument('output_biom_fp', required=True,
                type=click.Path(resolve_path=True, readable=True,
                                exists=False, file_okay=True))
@click.option('--min-reads', required=False, type=int, default=0,
              show_default=True, help="In final biom table - keep only "
              "sequences appearing at least min-reads in all samples "
              "combined.")
@click.option('--file_type', required=False, type=str,
              default='.fasta.trim.derep.no_artifacts.msa.deblur.no_chimeras',
              show_default=True,
              help='ending of files to be added to the biom table')
@click.option('--log-level', required=False,
              type=click.IntRange(1, 5, clamp=True), default=2,
              show_default=True, help="Level of messages for log file"
              "(range 1-debug to 5-critical")
@click.option('--log-file', required=False,
              type=click.Path(resolve_path=True, readable=True,
                              exists=False, dir_okay=True),
              default='deblur.log',
              show_default=True, help="log file name")
def build_biom_table(seqs_fp, output_biom_fp, min_reads, file_type, log_level,
                     log_file):
    """Generate a BIOM table from a directory of chimera removed fasta files
    Parameters
    ----------
    seqs_fp : str
      the path to the directory containing the chimera removed fasta files
    output_biom_fp : str
      the path where to save the output biom table file ('final.biom')
    file_type : str
      the files type to add to the table
      (default='.trim.derep.no_artifacts.msa.deblur.no_chimeras',
      can be '.fasta' or '.fa' if needed)
    """
    start_log(level=log_level * 10, filename=log_file)
    output_filename = 'final.biom'
    output_fp = join(output_biom_fp, output_filename)
    outputfasta_filename = 'final.seq.fa'
    outputfasta_fp = join(output_biom_fp, outputfasta_filename)

    samples = get_files_for_table(seqs_fp, file_type)
    create_otu_table(output_fp, samples,
                     outputfasta_fp=outputfasta_fp, minreads=min_reads)


# LAUNCH FULL DEBLUR PIPELINE COMMAND
@deblur_cmds.command()
@click.option('--seqs-fp', required=True,
              type=click.Path(resolve_path=True, readable=True, exists=True,
                              file_okay=True, dir_okay=True),
              help="Demultiplexed FASTA file including all samples, "
              "or a directory of .fasta files")
@click.option('--output-dir', required=True,
              type=click.Path(resolve_path=True, readable=True, exists=False,
                              dir_okay=True),
              help="Directory path to store output including BIOM table")
@click.option('--pos-ref-fp', required=False, multiple=True,
              default=[], show_default=False,
              type=click.Path(resolve_path=True, readable=True, exists=True,
                              file_okay=True),
              help="Positive (16S) filtering database. "
                   "Keep all sequences aligning to this FASTA file "
                   "(for multiple databases, use "
                   "--ref-fp db1.fa --ref-fp db2.fa ..)\n"
                   "default is: %s\n" % pos_db)
@click.option('--pos-ref-db-fp', required=False, multiple=True,
              type=click.Path(resolve_path=True, readable=True, exists=False,
                              file_okay=True),
              help="The indexed  version of the positive filtering database. "
                   "If not supplied, deblur will index the database."
                   "For multiple databases, the order "
                   "must follow that of --ref-fp, for example, "
                   "--ref-db-fp db1.idx --ref-db-fp db2.idx ..")
@click.option('--neg-ref-fp', required=False, multiple=True,
              default=[], show_default=False,
              type=click.Path(resolve_path=True, readable=True, exists=True,
                              file_okay=True),
              help="Negative (artifacts) filtering database. "
                   "Keep all sequences aligning to this FASTA file "
                   "(for multiple databases, use "
                   "--ref-fp db1.fa --ref-fp db2.fa ..)\n"
                   "default is: %s\n" % neg_db)
@click.option('--neg-ref-db-fp', required=False, multiple=True,
              type=click.Path(resolve_path=True, readable=True, exists=False,
                              file_okay=True),
              help="The indexed  version of the negative filtering database. "
                   "If not supplied, deblur will index the database."
                   "For multiple databases, the order "
                   "must follow that of --ref-fp, for example, "
                   "--ref-db-fp db1.idx --ref-db-fp db2.idx ..")
@click.option('--overwrite', '-w', required=False, type=bool, default=False,
              is_flag=True,
              show_default=True, help="Overwrite output directory if exists.")
@click.option('--mean-error', '-m', required=False, type=float, default=0.005,
              show_default=True,
              help="The mean per nucleotide error, used for original "
              "sequence estimate. If "
              "not passed typical illumina error rate is used")
@click.option('--error-dist', '-d', required=False, type=str,
              default=get_default_error_profile(),
              show_default=True,
              callback=error_dist_from_str,
              help="A comma separated list of error probabilities for each "
                   "hamming distance. The length of the list determines the "
                   "number of hamming distances taken into account.")
@click.option('--indel-prob', '-i', required=False, type=float, default=0.01,
              show_default=True,
              help='Insertion/deletion (indel) probability '
                   '(same for N indels)')
@click.option('--indel-max', required=False, type=int, default=3,
              show_default=True,
              help="Maximal indel number")
@click.option('--trim-length', '-t', required=False, type=int, default=100,
              show_default=True, help="Sequence trim length")
@click.option('--min-reads', required=False, type=int, default=0,
              show_default=True, help="In final biom table - keep only "
              "sequences appearing at least min-reads in all samples "
              "combined.")
@click.option('--min-size', required=False, type=int, default=2,
              show_default=True, help="Per sample - discard sequences with"
              " an abundance value smaller than min-size")
@click.option('--negate', '-n', required=False, default=False, is_flag=True,
              show_default=True, type=bool,
              help="Discard all sequences aligning to the database "
                   "passed via --ref-fp option")
@click.option('--threads-per-sample', '-a', required=False, type=int,
              default=1, show_default=True,
              help="Number of threads to use per sample (0 to use all)")
@click.option('--keep-tmp-files', required=False, type=bool, default=False,
              is_flag=True,
              show_default=True,
              help="Keep temporary files (useful for debugging)")
@click.option('--log-level', required=False,
              type=click.IntRange(1, 5, clamp=True), default=2,
              show_default=True, help="Level of messages for log file"
              "(range 1-debug to 5-critical")
@click.option('--log-file', required=False,
              type=click.Path(resolve_path=True, readable=True,
                              exists=False, dir_okay=True),
              default='deblur.log',
              show_default=True, help="log file name")
@click.option('--jobs-to-start', '-O', required=False, type=int, default=1,
              show_default=True,
              help="Number of jobs to start (if to run in parallel)")
@click.option('--is-worker-thread', required=False, type=bool, default=False,
              is_flag=True,
              show_default=True,
              help="indicates this is not the main deblur process "
              "(used by the parallel deblur - do not use manually)")
@click.option('attack_mode', '-A', required=False, type=bool, default=False,
              help="ATTACK MODE (http://xkcd.com/1692)")
def workflow(seqs_fp, output_dir, pos_ref_fp, pos_ref_db_fp,
             neg_ref_fp, neg_ref_db_fp, overwrite,
             mean_error, error_dist, indel_prob, indel_max,
             trim_length, min_reads, min_size, negate, threads_per_sample,
             keep_tmp_files, log_level, log_file, jobs_to_start,
             is_worker_thread, attack_mode):
    """Launch deblur workflow"""
    start_log(level=log_level * 10, filename=log_file)
    logger = logging.getLogger(__name__)
    logger.warn('deblur version %s workflow started on %s' %
                (__version__, seqs_fp))
    logger.warn('parameters: %s' % locals())

    # set the error distribution
    if error_dist is None:
        logger.info('using default error profile')
        # set to the default illumina read error profile
        error_dist = get_default_error_profile()

    logger.info('error_dist is : %s' % error_dist)

    # get the default values for the rep_fp if empty
    if len(pos_ref_fp) == 0:
        pos_ref_fp = [pos_db]
        logger.debug('Using default positive filtering file %s' % pos_ref_fp)
    if len(neg_ref_fp) == 0:
        neg_ref_fp = [neg_db]
        logger.debug('Using default negative filtering file %s' % neg_ref_fp)

    # Create output directory
    if not is_worker_thread:
        logger.info('deblur main program started')
        if exists(output_dir):
            if overwrite:
                logger.debug('overwrite is on - deleting directory %s' %
                             output_dir)
                rmtree(output_dir)
            else:
                logger.critical('output directory %s already exists' %
                                output_dir)
                raise OSError("Output directory already exists. Choose a "
                              "different directory or use option "
                              "--overwrite (-w)")
        working_dir = join(output_dir, "deblur_working_dir")
        makedirs(working_dir)
    else:
        logger.info('deblur worker thread started')
        working_dir = join(output_dir, "deblur_working_dir")

    # if it's a worker thread, the seqs_fp parameter
    # is the single fasta file name
    if is_worker_thread:
        input_file_list = [seqs_fp]
    else:
        # if seqs_fp is a dir, use the fasta files in it,
        # otherwise, assume its a post split-libraries fasta file
        # and split it to fasta file per sample
        if isdir(seqs_fp):
            logger.info('processing directory %s' % seqs_fp)
            out_dir_split = seqs_fp
        else:
            # Split demultiplexed FASTA on sample IDs
            out_dir_split = join(output_dir, "split")
            logger.info('splitting file %s to per sample fasta. '
                        'output %s' % (seqs_fp, out_dir_split))
            mkdir(out_dir_split)
            with open(seqs_fp, 'U') as seqs_f:
                split_sequence_file_on_sample_ids_to_files(
                    seqs_f,
                    out_dir_split)

        patterns = ['*.fast[aq]', '*.fast[aq].gz', '*.fna', '*.fq', '*.fna.gz',
                    '*.fq.gz']
        input_file_list = []
        for pattern in patterns:
            input_file_list.extend(glob('%s/%s' % (out_dir_split, pattern)))

    files_to_remove = []
    # Build SortMeRNA indexes
    if negate:
        if not neg_ref_db_fp:
            logger.info('building negative db sortmerna index files')
            neg_ref_db_fp = build_index_sortmerna(ref_fp=neg_ref_fp,
                                                  working_dir=working_dir)
    if not pos_ref_db_fp:
        logger.info('building positive db sortmerna index files')
        pos_ref_db_fp = build_index_sortmerna(ref_fp=pos_ref_fp,
                                              working_dir=working_dir)

    if negate:
        ref_db_fp = neg_ref_db_fp
        ref_fp = neg_ref_fp
    else:
        ref_db_fp = pos_ref_db_fp
        ref_fp = pos_ref_fp

    # Deblur each sample fasta file
    if jobs_to_start < 2:
        # run in serial mode (no parallelization)
        logger.info('processing per sample fasta files')
        for input_fp in input_file_list:
            if isfile(input_fp):
                deblurred_file_name = launch_workflow(
                    seqs_fp=input_fp, working_dir=working_dir,
                    mean_error=mean_error,
                    error_dist=error_dist, indel_prob=indel_prob,
                    indel_max=indel_max, trim_length=trim_length,
                    min_size=min_size, ref_fp=ref_fp, ref_db_fp=ref_db_fp,
                    negate=negate, threads_per_sample=threads_per_sample)
            if deblurred_file_name is None:
                logger.warn('deblurring failed for file %s' % input_fp)
        logger.info('finished processing per sample fasta files')
    else:
        # we're want to start parallel mode - so need to create
        # the thread shell commands
        logger.info('parallel processing per sample fasta files')
        parallel_deblur(input_file_list, sys.argv, pos_ref_db_fp,
                        neg_ref_db_fp, jobs_to_start)

    if is_worker_thread:
        logger.info('worker thread for %s finished' % seqs_fp)
        return

    # Merge chimera cleaned fasta files to a single biom table
    output_filename = 'final.biom'
    output_fp = join(output_dir, output_filename)
    outputfasta_filename = 'final.seqs.fa'
    outputfasta_fp = join(output_dir, outputfasta_filename)

    samples = get_files_for_table(working_dir)
    create_otu_table(output_fp, samples,
                     outputfasta_fp=outputfasta_fp, minreads=min_reads)

    # if negative mode, also create a biom table with
    # only 16s sequences and with only non-16s sequences
    if negate:
        remove_artifacts_from_biom_table(output_fp, outputfasta_fp,
                                         pos_ref_fp, output_dir,
                                         pos_ref_db_fp,
                                         threads=threads_per_sample)

    # Clean up
    if not keep_tmp_files:
        logger.info('Cleaning up temp files')
        for f in files_to_remove:
            remove(f)
        rmtree(working_dir)
    else:
        logger.info('Keeping temp files')
    logger.info('deblur workflow finished')
    logger.info('output saved to %s' % output_fp)
    logger.info('------------------')


if __name__ == '__main__':
    deblur_cmds()
