import os
import glob
from pathlib import Path
from snakemake.logging import logger
from pprint import pprint as pp
from ikiss import IKISS, dico_tool

ikiss_obj = IKISS(dico_tool, workflow=workflow, config=config)
tools_config = ikiss_obj.tools_config
cluster_config = ikiss_obj.cluster_config
# print(ikiss_obj.export_use_yaml)
# print for debug:
#pp(ikiss_obj)
# #exit()
#print(tools_config)
# exit()

###############################################################################


# Getting paths on usefully variables
output_dir = config['DATA']['OUTPUT']
fastq_dir = config['DATA']['FASTQ']
reference_file = config['PARAMS']['MAPPING_KMERS']['REF']
gff_file = config['PARAMS']['INTERSECT']['GFF']
feature = config['PARAMS']['INTERSECT']['FEATURE']
samples_file = config['PARAMS']['PCADAPT']['SAMPLES']
basename_reference = Path(reference_file).stem

reference_assembly = config['PARAMS']['ASSEMBLY_KMERS']['REF']
basename_reference2 = Path(reference_assembly).stem

FASTQ, = glob_wildcards(f"{config['DATA']['FASTQ']}{{fastq}}{ikiss_obj.fastq_files_ext}")
SAMPLE, = glob_wildcards(f"{config['DATA']['FASTQ']}{{sample}}_R1{ikiss_obj.fastq_files_ext}")


def get_threads(rule, default):
    """
    retrieve threads value from cluster_config file avail for SGE and SLURM
    If local get the --core value
    if fail return default value define on each rules

    Examples:
        rule rule_graph:
            threads: get_threads('rule_graph', 1)
    """
    # if cluster mode
    if cluster_config:
        if rule in cluster_config:
            if 'threads' in cluster_config[rule]:
                return int(cluster_config[rule]['threads'])
            elif 'cpus-per-task' in cluster_config[rule]:
                return int(cluster_config[rule]['cpus-per-task'])
        elif '__default__' in cluster_config:
            if 'cpus-per-task' in cluster_config['__default__']:
                return int(cluster_config['__default__']['cpus-per-task'])
            elif 'threads' in cluster_config['__default__']:
                return int(cluster_config['__default__']['threads'])
        # if local
    elif workflow.global_resources["_cores"]:
        if default == 1:            # for rule not able threading
            return default
        else:
            return workflow.global_resources["_cores"]
    # if cluster not rule and not default or local not _cores return value from call
    return default

# # run_get_versions was adapted from culebront project by orjuela, compte, et al 2022. 10.24072/pcjournal.153
# rule run_get_versions:
#     """
#     recovery soft versions
#     """
#     threads: get_threads('run_get_versions', 1)
#     input:
#         assemblers = expand(f"{output_dir}{{fastq}}/ASSEMBLERS/{{assemblers}}/ASSEMBLER/{{assemblers}}-version.txt", fastq = FASTQ[0], assemblers=culebront.assembly_tools_activated),
#         polishers = expand(rules.run_racon_version.output.version, fastq=FASTQ),
#         correction = expand(f"{output_dir}{{fastq}}/ASSEMBLERS/{{assemblers}}/CORRECTION/{{correction}}/{{correction}}-version.txt", fastq=FASTQ[0], assemblers=culebront.assembly_tools_activated, correction=culebront.correction_tools_activated),
#         circular = expand(f"{output_dir}{{fastq}}/ASSEMBLERS/{{assemblers}}/ASSEMBLER/CIRCLATOR-version.txt", fastq=FASTQ[0], assemblers=[ass for ass in culebront.assembly_tools_activated if ass in ["CANU","SMARTDENOVO"] and bool(culebront.config['CIRCULAR'])]),
#         quality = expand(f"{output_dir}{{fastq}}/ASSEMBLERS/{{assemblers}}/QUALITY/{{quality_step}}/{{quality}}/{{quality}}-version.txt", fastq=FASTQ[0], assemblers=culebront.assembly_tools_activated[0], quality_step=culebront.last_steps_list, quality=[qual for qual in culebront.quality_tools_activated if qual not in ["QUAST", "MAUVE", "BUSCO"]]),
#         mauve = expand(f"{output_dir}/versions/{{quality}}-version.txt", quality=[qual for qual in culebront.quality_tools_activated if qual in ["MAUVE"]])
#     params:
#         dir =f'{output_dir}versions/'
#     output:
#         csv = f"{output_dir}versions.csv"
#     message:
#         """
#         picking software versions used by iKISS
#         """
#     log:
#         output = f'{output_dir}versions/LOGS/GET-VERSIONS.o',
#         error = f'{output_dir}versions/LOGS/GET-VERSIONS.e'
#     script:
#         f"{ikiss_obj.snakemake_scripts}/get_versions.py"


def output_final(wildcards):
    dico_final = {
        "fastq_table": rules.fastq_stats.output.fastq_table,
        "kmer_module": f"{output_dir}3.TABLE2BED/",     
    }
    if 'PCADAPT' in ikiss_obj.method or 'LFMM' in ikiss_obj.method:
        dico_final.update({
        "method_kmers": expand(rules.merge_method.output.outliers_combined, method=ikiss_obj.method),
        })
        if 'MAPPING_KMERS' in ikiss_obj.tools_activated:
            dico_final.update({
                "outliers_and_mapping" : expand(rules.outliers_position.output.outliers_and_mapping, method=ikiss_obj.method),
                "stats" : expand(rules.outliers_position.output.stats, method=ikiss_obj.method),
            })
            if 'INTERSECT' in ikiss_obj.tools_activated:
                dico_final.update({
                    "stats_outliers" : expand(rules.intersect_and_outliers.output.stats_outliers, method=ikiss_obj.method),
                })
        if 'ASSEMBLY_KMERS' in ikiss_obj.tools_activated:
            dico_final.update({
                "outliers_assembly": expand(rules.mergetags.output.assembled_outliers, method=ikiss_obj.method),
                "outliers_csv": expand(rules.mergetags.output.assembled_csv, method=ikiss_obj.method),
            }) 
            if config['PARAMS']['ASSEMBLY_KMERS']['MAPPING_CONTIGS']:
                dico_final.update({
                    "sortedbam" : expand(rules.mapping_contigs.output.sortedbam, method=ikiss_obj.method),
                })
                if 'INTERSECT' in ikiss_obj.tools_activated:
                    dico_final.update({
                        "stats_contigs" : expand(rules.intersect_and_contigs.output.stats_contigs, method=ikiss_obj.method),
                })
    return dico_final

rule final:
    input:
        f"{output_dir}REPORT/iKISS_report.html"

###################### rules
rule kmers_gwas_per_sample:
    """
    run kmers_gwas_per_sample
    """
    threads: get_threads('kmers_gwas_per_sample', 1)
    input:
        forw = f"{fastq_dir}{{sample}}_R1{ikiss_obj.fastq_files_ext}"
    params:
        rev = f"{fastq_dir}{{sample}}_R2{ikiss_obj.fastq_files_ext}",
        name = f"{{sample}}",
        kmer_size = config['PARAMS']['KMERS_MODULE']['KMER_SIZE'],
        dir = f"{output_dir}1.KMERS_MODULE/{{sample}}"
    output:
        kmers_file = f"{output_dir}1.KMERS_MODULE/{{sample}}/{{sample}}_kmers_with_strand"
    log:
        output = f"{output_dir}LOGS/1.KMERS_MODULE/{{sample}}/{{sample}}_KMERS_MODULE.o",
        error = f"{output_dir}LOGS/1.KMERS_MODULE/{{sample}}/{{sample}}_KMERS_MODULE.e"
    benchmark:
        f"{output_dir}BENCHMARK/{{sample}}_KMERS_MODULE.txt"
    message:
        """
        Launching {rule}
        threads: {threads}
        input:
            forward : {input.forw}
        params:
            reverse : {params.rev}
        output:
            kmers_file: {output.kmers_file}
        log:
            output: {log.output}
            error: {log.error}
        """
    singularity:
        tools_config['SINGULARITY']['TOOLS']
    envmodules:
        tools_config["ENVMODULE"]["KMERS_GWAS"]
    shell:
        """
        cd {params.dir}
        # creating txt files path
        realpath {input.forw} {params.rev} > {params.name}_files.txt
        # calculate canonical and not canonical by each sample
        kmc_v3 -t{threads} -k{params.kmer_size} -ci2 @{params.name}_files.txt {params.name}_kmc3_canon ./ 1> {log.error} 2> {log.output}
        kmc_v3 -t{threads} -k{params.kmer_size} -ci0 -b @{params.name}_files.txt {params.name}_kmc3_all ./ 1>> {log.error} 2>> {log.output}
        #combine 2 runs
        kmers_add_strand_information -c {params.name}_kmc3_canon -n {params.name}_kmc3_all -k {params.kmer_size} -o {params.name}_kmers_with_strand 1>> {log.error} 2>> {log.output}
        """


rule kmers_to_use:
    """
    run kmers_to_use
    """
    threads: get_threads('kmers_to_use', 1)
    input:
        samples_list = expand({rules.kmers_gwas_per_sample.output.kmers_file}, sample=SAMPLE)
    params:
        dir = f"{output_dir}2.KMERS_TABLE/",
        kmer_size = config['PARAMS']['KMERS_MODULE']['KMER_SIZE'],
        mac = config['PARAMS']['KMERS_MODULE']['MAC'],
        p = config['PARAMS']['KMERS_MODULE']['P'],
        kmers_list_path = f"{output_dir}2.KMERS_TABLE/kmers_list_paths.txt"
    output:
        #pheno = f"{output_dir}2.KMERS_TABLE/phenotype.pheno",
        kmers_to_use = f"{output_dir}2.KMERS_TABLE/kmers_to_use"
    log:
        output = f"{output_dir}LOGS/2.KMERS_TABLE/KMERS_TO_USE.o",
        error = f"{output_dir}LOGS/2.KMERS_TABLE/KMERS_TO_USE.e"
    benchmark:
        f"{output_dir}BENCHMARK/KMERS_TO_USE.txt"
    message:
        """
        Launching {rule}
        threads: {threads}
        input:
            samples_list : {input.samples_list}
        output:
            kmers_to_use: {output.kmers_to_use}
        log:
            output: {log.output}
            error: {log.error}
        """
    singularity:
        tools_config['SINGULARITY']['TOOLS']
    shell:
        """
        cd {params.dir}
        # create file with paths to kmers_with_strand before merging
        realpath {input.samples_list} > samplesTMP.txt;
        awk -F '/' '{{print $_"\t"$NF"TMP" }}' samplesTMP.txt | sed 's/_kmers_with_strandTMP//g' - > {params.kmers_list_path}
        rm samplesTMP.txt
        # calculate kmers to use
        list_kmers_found_in_multiple_samples -l {params.kmers_list_path} -k {params.kmer_size} --mac {params.mac} -p {params.p} -o {output.kmers_to_use} 1> {log.error} 2> {log.output}
        """

rule kmers_table:
    """
    run create_kmers_table
    """
    threads: get_threads('create_kmers_table', 1)
    input:
        kmers_to_use = rules.kmers_to_use.output.kmers_to_use
    params:
        dir = f"{output_dir}2.KMERS_TABLE/",
        kmer_size = config['PARAMS']['KMERS_MODULE']['KMER_SIZE'],
        kmers_list_path = f"{output_dir}2.KMERS_TABLE/kmers_list_paths.txt",
        kmers_table_name = f"kmers_table"
    output:
        kmers_table = f"{output_dir}2.KMERS_TABLE/kmers_table.table"
    log:
        output = f"{output_dir}LOGS/2.KMERS_TABLE/KMERS_TABLE.o",
        error = f"{output_dir}LOGS/2.KMERS_TABLE/KMERS_TABLE.e"
    benchmark:
        f"{output_dir}BENCHMARK/KMERS_TABLE.txt"
    message:
        """
        Launching {rule}
        threads: {threads}
        input:
            kmers_to_use : {input.kmers_to_use}
        output:
            kmers_table: {output.kmers_table}
        log:
            output: {log.output}
            error: {log.error}
        """
    singularity:
        tools_config['SINGULARITY']['TOOLS']
    envmodules:
        tools_config["ENVMODULE"]["KMERS_GWAS"],
    shell:
        """
        cd {params.dir}
        # create the kmer table
        build_kmers_table -l {params.kmers_list_path} -k {params.kmer_size} -a {input.kmers_to_use} -o {params.kmers_table_name} 1> {log.error} 2> {log.output}
        """

##### BED WC
checkpoint kmers_table_to_bed:
    """
    run kmers_table_to_bed
    """
    threads: get_threads('kmers_table_to_bed', 1)
    input:
        kmers_table = rules.kmers_table.output.kmers_table,
    params:
        dir = f"{output_dir}3.TABLE2BED/",
        kmers_table_name = f"{output_dir}2.KMERS_TABLE/kmers_table",
        #pheno = rules.kmers_to_use.output.pheno,
        pheno = samples_file,
        kmer_size = config['PARAMS']['KMERS_MODULE']['KMER_SIZE'],
        mac = config['PARAMS']['KMERS_MODULE']['MAC'],
        maf = config['PARAMS']['KMERS_MODULE']['MAF'],
        nb_kmers_in_bed = config['PARAMS']['KMERS_MODULE']['B'],
        bed_name = f"output_file",
    output:
        bed = directory(f"{output_dir}3.TABLE2BED/"),
    log:
        #error detected by rule if {output_dir}/LOGS/3.TABLE2BED/ instead f"{output_dir}/3.TABLE2BED/log/
        output = f"{output_dir}3.TABLE2BED/log/TABLE2BED.o",
        error = f"{output_dir}3.TABLE2BED/log/TABLE2BED.e",
    benchmark:
        f"{output_dir}BENCHMARK/TABLE2BED.txt"
    message:
        """
        Launching {rule}
        threads: {threads}
        input:
            kmers_table : {input.kmers_table}
        output:
            kmers_table: {output.bed}
        log:
            output: {log.output}
            error: {log.error}
        """
    singularity:
        tools_config['SINGULARITY']['TOOLS']
    envmodules:
        tools_config["ENVMODULE"]["KMERS_GWAS"],
    shell:
        """
        cd {params.dir}
        # obtain the plink binary files (bed, fam, bim) 
        kmers_table_to_bed -t {params.kmers_table_name} -k {params.kmer_size} --maf {params.maf} --mac {params.mac} -b {params.nb_kmers_in_bed} -o {params.bed_name} --phentype_file {params.pheno} 1>> {log.error} 2>> {log.output}
        """

rule extract_kmers_from_bed:
    """
    extract_kmers_from_bed
    """
    threads: get_threads('extract_kmers_from_bed', 1)
    input:
        bed = f"{output_dir}3.TABLE2BED/{{bed}}.bed"
    params:
        dir = f"{output_dir}4.EXTRACT_FASTA/",
        tmp = f"{output_dir}3.TABLE2BED/{{bed}}.fasta",
    output:
        fasta = f"{output_dir}4.EXTRACT_FASTA/{{bed}}.fasta.gz",
    log:
        output = f"{output_dir}LOGS/4.EXTRACT_FASTA/{{bed}}_EXTRACT_FASTA.o",
        error = f"{output_dir}LOGS/4.EXTRACT_FASTA/{{bed}}_EXTRACT_FASTA.e",
    benchmark:
        f"{output_dir}BENCHMARK/{{bed}}_EXTRACT_FASTA.txt"
    message:
        """
        Launching {rule}
        threads: {threads}
        input:
            bed : {input.bed}
        output:
            fasta: {output.fasta}
        log:
            output: {log.output}
            error: {log.error} 
        """
    singularity:
        tools_config['SINGULARITY']['TOOLS']
    shell:
        """
        (cd {params.dir}
        python3 {ikiss_obj.snakemake_scripts}/kmer-bed2fasta.py -b {input.bed} 
        gzip {params.tmp} 
        mv {params.tmp}.gz {output.fasta} ) 1> {log.error} 2> {log.output}
        """

rule index_ref:
    """
    index_ref for bwa-mem2 or bwa 
    """
    threads: get_threads('index_ref', 4)
    input:
        ref = reference_file
    params:
        ref_dir = f"{output_dir}REF",
        index_options = config['PARAMS']['MAPPING_KMERS']['INDEX_OPTIONS'],
        index_type = f"bwa-mem2 " if config['PARAMS']['MAPPING_KMERS']['MODE'] == "bwa-mem2" else "bwa",
    output:
        # TODO: manage fasta extensions
        new_ref = f"{output_dir}REF/{basename_reference}.fasta",
        index_tag = f"{output_dir}REF/{basename_reference}.fasta.bwt.2bit.64" if config['PARAMS']['MAPPING_KMERS']['MODE'] == "bwa-mem2" else f"{output_dir}REF/{basename_reference}.fasta.sa"
    log:
        output = f"{output_dir}LOGS/{basename_reference}_INDEXING.o",
        error = f"{output_dir}LOGS/{basename_reference}_INDEXING.e",
    benchmark:
        f"{output_dir}BENCHMARK/{basename_reference}_INDEXING.txt"
    message:
        """
        Launching {rule}
        threads: {threads}
        input:
            ref: {input.ref}
        output:
            new_ref : {output.new_ref}
            index_tag : {output.index_tag}
        log:
            output: {log.output}
            error: {log.error} 
        """
    singularity:
        tools_config['SINGULARITY']['TOOLS']
    envmodules:
        tools_config["ENVMODULE"]["BWAMEM2"],
        tools_config["ENVMODULE"]["BWA"],
    shell:
        """
        (mkdir -p {params.ref_dir}
        cd {params.ref_dir}
        cp {reference_file} {output.new_ref} 
        {params.index_type} index {params.index_options} {output.new_ref} ) 1> {log.error} 2> {log.output}
        """

rule index_ref_to_assembly:
    """
    index_ref_to_assembly for bwa-mem2
    """
    threads: get_threads('index_ref', 4)
    input:
        ref = reference_assembly
    params:
        ref_dir = f"{output_dir}REF_ASSEMBLY",
    output:
        new_ref = f"{output_dir}REF_ASSEMBLY/{basename_reference2}.fasta",
        index_tag = f"{output_dir}REF_ASSEMBLY/{basename_reference2}.fasta.bwt.2bit.64"
    log:
        output = f"{output_dir}LOGS/{basename_reference2}_ASSEMBLY_INDEXING.o",
        error = f"{output_dir}LOGS/{basename_reference2}_ASSEMBLY_INDEXING.e",
    benchmark:
        f"{output_dir}BENCHMARK/{basename_reference2}_ASSEMBLY_INDEXING.txt"
    message:
        """
        Launching {rule}
        threads: {threads}
        input:
            ref: {input.ref}
        output:
            new_ref : {output.new_ref}
            index_tag : {output.index_tag}
        log:
            output: {log.output}
            error: {log.error} 
        """
    singularity:
        tools_config['SINGULARITY']['TOOLS']
    envmodules:
        tools_config["ENVMODULE"]["BWAMEM2"],
        tools_config["ENVMODULE"]["BWA"],
    shell:
        """
        (mkdir -p {params.ref_dir}
        cd {params.ref_dir}
        cp {input.ref} {output.new_ref} 
        bwa-mem2 index {output.new_ref}) 1> {log.error} 2> {log.output}
        """

        
rule mapping_kmers:
    """
    mapping_kmers
    """
    threads: get_threads('mapping_kmers', 4)
    input:
        fasta = f"{output_dir}4.EXTRACT_FASTA/{{bed}}.fasta.gz",
        ref = rules.index_ref.output.new_ref,
        index_tag = rules.index_ref.output.index_tag
    params:
        dir = f"{output_dir}8.MAPPING_KMERS/",
        sai = f"{output_dir}8.MAPPING_KMERS/{{bed}}_vs_{basename_reference}.sai",
        bam = f"{output_dir}8.MAPPING_KMERS/{{bed}}_vs_{basename_reference}.bam",
        options = config['PARAMS']['MAPPING_KMERS']['OPTIONS'],
        mode = config['PARAMS']['MAPPING_KMERS']['MODE'],
    output:
        sortedbam = f"{output_dir}8.MAPPING_KMERS/{{bed}}_vs_{basename_reference}_sorted.bam",
    log:
        output = f"{output_dir}LOGS/8.MAPPING_KMERS/{{bed}}_vs_{basename_reference}_MAPPING.o",
        error = f"{output_dir}LOGS/8.MAPPING_KMERS/{{bed}}_vs_{basename_reference}_MAPPING.e",
    benchmark:
        f"{output_dir}BENCHMARK/{{bed}}_vs_{basename_reference}_MAPPING.txt"
    message:
        """
        Launching {rule}
        threads: {threads}
        input:
            fasta: {input.fasta}
        output:
            bam : {output.sortedbam}
        log:
            output: {log.output}
            error: {log.error} 
        """
    singularity:
        tools_config['SINGULARITY']['TOOLS']
    envmodules:
        tools_config["ENVMODULE"]["BWAMEM2"],
        tools_config["ENVMODULE"]["BWA"],
        tools_config["ENVMODULE"]["SAMTOOLS"]
    shell:
        """
        (cd {params.dir} 
        if [[ {params.mode} == bwa-mem2 ]]; then 
             bwa-mem2 mem {params.options} -t {threads} {input.ref} {input.fasta} > {params.bam};
        fi
        if [[ {params.mode} == bwa-aln ]]; then
             bwa aln {params.options} -t {threads} {input.ref} {input.fasta} > {params.sai};
             bwa samse -f {params.bam} {input.ref} {params.sai} {input.fasta};
        fi
        samtools sort {params.bam} -o {output.sortedbam}       
        samtools index {output.sortedbam}
        samtools idxstats {output.sortedbam} > {output.sortedbam}.idxstats 
        samtools stats {output.sortedbam} > {output.sortedbam}.stats) 1> {log.error} 2> {log.output}
        """


rule filter_bam:
    """
    filter_bam
    """
    threads: get_threads('filter_bam', 4)
    input:
        sortedbam = f"{output_dir}8.MAPPING_KMERS/{{bed}}_vs_{basename_reference}_sorted.bam",
    params:
        flag = config['PARAMS']['MAPPING_KMERS']['FILTER_FLAG'],
        qual = config['PARAMS']['MAPPING_KMERS']['FILTER_QUAL'],
        dir = f"{output_dir}8.MAPPING_KMERS/",
        unmapped = temp(f"{output_dir}8.MAPPING_KMERS/{{bed}}_vs_{basename_reference}_F.bam"),
    output:
        filterbam = f"{output_dir}8.MAPPING_KMERS/{{bed}}_vs_{basename_reference}_FMQ.bam",
    log:
        output = f"{output_dir}LOGS/8.MAPPING_KMERS/{{bed}}_vs_{basename_reference}_FMQ.o",
        error = f"{output_dir}LOGS/8.MAPPING_KMERS/{{bed}}_vs_{basename_reference}_FMQ.e",
    benchmark:
        f"{output_dir}BENCHMARK/{{bed}}_vs_{basename_reference}_FMQ.txt"
    message:
        """
        Launching {rule}
        threads: {threads}
        input:
            sortedbam: {input.sortedbam}
        output:
            filterbam : {output.filterbam}
        log:
            output: {log.output}
            error: {log.error} 
        """
    singularity:
        tools_config['SINGULARITY']['TOOLS']
    envmodules:
        tools_config["ENVMODULE"]["SAMTOOLS"]
    shell:
        """
        (cd {params.dir}
        samtools view -bh -F {params.flag} {input.sortedbam} > {params.unmapped}
        samtools view -bh -q {params.qual} {params.unmapped} > {output.filterbam}
        rm {params.unmapped}
        ) 1> {log.error} 2> {log.output}
        """

rule kmer_position_from_bam:
    """
    kmer_position_from_bam
    """
    threads: get_threads('kmer_position_from_bam', 4)
    input:
        filterbam = f"{output_dir}8.MAPPING_KMERS/{{bed}}_vs_{basename_reference}_FMQ.bam",
    params:
        kmer_size = config['PARAMS']['KMERS_MODULE']['KMER_SIZE'],
        dir = f"{output_dir}8.MAPPING_KMERS/",
        name=f"{{bed}}",
        tmpposition = temp(f"{output_dir}9.KMERPOSITION/{{bed}}_vs_{basename_reference}_TMPKMERPOSITION.txt"),
    output:
        position = f"{output_dir}9.KMERPOSITION/{{bed}}_vs_{basename_reference}_KMERPOSITION.txt",
    log:
        output = f"{output_dir}LOGS/9.KMERPOSITION/{{bed}}_vs_{basename_reference}_KMERPOSITION.o",
        error = f"{output_dir}LOGS/9.KMERPOSITION/{{bed}}_vs_{basename_reference}_KMERPOSITION.e",
    benchmark:
        f"{output_dir}BENCHMARK/{{bed}}_vs_{basename_reference}_FMQ.txt"
    message:
        """
        Launching {rule}
        threads: {threads}
        input:
            filterbam: {input.filterbam}
        output:
            position : {output.position}
        log:
            output: {log.output}
            error: {log.error} 
        """
    singularity:
        tools_config['SINGULARITY']['TOOLS']
    envmodules:
        tools_config["ENVMODULE"]["SAMTOOLS"]
    shell:
        """
        (cd {params.dir}
        echo {params.name}
        name={params.name}
        samtools view {input.filterbam} | cut -f1-4,10 - > {params.tmpposition}
        awk '{{if ($2=="16") print "{params.name}\t"$1"\t"$5"\t"$3"\t"$4-{params.kmer_size}-1; else print "{params.name}\t"$1"\t"$5"\t"$3"\t"$4}}' {params.tmpposition} > {output.position}
        sort -n -k2,2 {output.position} > {params.tmpposition}
        mv {params.tmpposition} {output.position}
        ) 1> {log.error} 2> {log.output}   
        """

###################################### BED WC

def aggregate_bam(wildcards):
    checkpoint_output = checkpoints.kmers_table_to_bed.get(**wildcards).output[0]
    bed=glob_wildcards(os.path.join(checkpoint_output,"{bed}.bed")).bed
    return expand(f"{output_dir}9.KMERPOSITION/{{bed}}_vs_{basename_reference}_KMERPOSITION.txt", bed=bed)

# merge bed wc
rule merge_kmer_position:
    """
    list kmers positions files
    """
    threads: get_threads('merge_kmer_position', 1)
    input:
        list = aggregate_bam,
    params:
        dir = f"{output_dir}9.KMERPOSITION/"
    output:
        combined = f"{output_dir}10.MERGE_KMERPOSITION/kmer_position_merged.txt",
    log:
        output = f"{output_dir}LOGS/10.MERGE_KMERPOSITION/KMERPOSITION_MERGING.o",
        error = f"{output_dir}LOGS/10.MERGE_KMERPOSITION/KMERPOSITION_MERGING.e",
    benchmark:
        f"{output_dir}BENCHMARK/KMERPOSITION_MERGING.txt"
    message:
        """
        Launching {rule}
        threads: {threads}
        input:
            list : {input.list}
        output:
            combined: {output.combined}
        log:
            output: {log.output}
            error: {log.error}
        """
    singularity:
        tools_config['SINGULARITY']['TOOLS']
    shell:
        """
        (cd {params.dir} 
        echo -e 'bedname\tkmer_number\tsequence\tREF\tposition' > tmp.txt
        cat *KMERPOSITION.txt >> tmp.txt
        mv tmp.txt {output.combined} ) 1> {log.error} 2> {log.output}
        """

def aggregate_bam_to_samtools_merge(wildcards):
    checkpoint_output = checkpoints.kmers_table_to_bed.get(**wildcards).output[0]
    bed=glob_wildcards(os.path.join(checkpoint_output,"{bed}.bed")).bed
    return expand(f"{output_dir}8.MAPPING_KMERS/{{bed}}_vs_{basename_reference}_FMQ.bam", bed=bed)
 
# merge bam wc
rule samtools_merge:
    """
    list kmers positions files
    """
    threads: get_threads('samtools_merge', 1)
    input:
        list = aggregate_bam_to_samtools_merge,
    params:
        dir = f"{output_dir}8.MAPPING_KMERS/"
    output:
        combined = f"{output_dir}10.MERGE_KMERPOSITION/kmer_position_samtools_merge.bam",
    log:
        output = f"{output_dir}LOGS/10.MERGE_KMERPOSITION/KMERPOSITION_SAMTOOLS_MERGE.o",
        error = f"{output_dir}LOGS/10.MERGE_KMERPOSITION/KMERPOSITION_SAMTOOLS_MERGE.e",
    benchmark:
        f"{output_dir}BENCHMARK/KMERPOSITION_SAMTOOLS_MERGE.txt"
    message:
        """
        Launching {rule}
        threads: {threads}
        input:
            list : {input.list}
        output:
            combined: {output.combined}
        log:
            output: {log.output}
            error: {log.error}
        """
    singularity:
        tools_config['SINGULARITY']['TOOLS']
    shell:
        """
        (cd {params.dir} 
        echo {input.list} > bam_files.txt 
        sed -i 's/ /\\n/g' bam_files.txt
        samtools merge -@ {threads} -b bam_files.txt {output.combined}) 1> {log.error} 2> {log.output}
        """
###################################################################################"
###################################################################################"

##### SEGMENT WC
checkpoint split_bed:
    """
    from a bed, random kmers in several list before to PCA
    """
    threads: get_threads('split_bed', 1)
    input:
        bed = f"{output_dir}3.TABLE2BED/{{bed}}.bed",
        fasta = f"{output_dir}4.EXTRACT_FASTA/{{bed}}.fasta.gz",
        #fastq = rules.extract_kmers_from_bed.output.fastq,
    params:
        nb_kmers_in_bed = config['PARAMS']['KMERS_MODULE']['SPLIT_LIST_SIZE'],
        name = f"{{bed}}",
        min_lenght =  config['PARAMS']['KMERS_MODULE']['MIN_LIST_SIZE'],
    output:
        dir = directory(f"{output_dir}5.RANGES/{{bed}}")
    log:
        output = f"{output_dir}LOGS/5.RANGES/{{bed}}_RANGES.o",
        error = f"{output_dir}LOGS/5.RANGES/{{bed}}_RANGES.e",
    benchmark:
        f"{output_dir}BENCHMARK/{{bed}}_RANGES.txt"
    message:
        """
        Launching {rule}
        threads: {threads}
        input:
            bed : {input.bed}
            fasta: {input.fasta}
        params:
            name : {params.name} 
        output:
            dir : {output.dir}
        log:
            output: {log.output}
            error: {log.error}
        """
    singularity:
        tools_config['SINGULARITY']['TOOLS']
    shell:
        """
        VAR=`zcat {input.fasta} | wc -l - | cut -d' ' -f1 `;
        FILE_LENGHT=$((${{VAR}}/2));
        python3 {ikiss_obj.snakemake_scripts}/split_bed.py --list_length {params.nb_kmers_in_bed} --file_length $FILE_LENGHT --min_length {params.min_lenght} --output-name {params.name} --output-dir {output.dir} 1> {log.error} 2> {log.output}
        """

rule pcadapt:
    """
    pca using a segment
    """
    threads: get_threads('pcadapt', 1)
    input:
        kmer_list_file = f"{output_dir}5.RANGES/{{bed}}/{{segment}}.txt"
    params:
        k = config['PARAMS']['PCADAPT']['K'],
        correction = config['PARAMS']['PCADAPT']['CORRECTION'],
        alpha = config['PARAMS']['PCADAPT']['ALPHA'],
        bed = f"{output_dir}3.TABLE2BED/{{bed}}.bed",
        bim = f"{output_dir}3.TABLE2BED/{{bed}}.bim",
        fam = f"{output_dir}3.TABLE2BED/{{bed}}.fam",
        samples = samples_file,
        segment_file = f"{{bed}}_{{segment}}"
    output:
        outliers = f"{output_dir}6.PCADAPT/{{bed}}_{{segment}}_PCADAPT_outliers.csv",
        pvalues = f"{output_dir}6.PCADAPT/{{bed}}_{{segment}}_PCADAPT_pvalues.csv",
    log:
        output = f"{output_dir}LOGS/6.PCADAPT/{{bed}}_{{segment}}_PCADAPT.o",
        error = f"{output_dir}LOGS/6.PCADAPT/{{bed}}_{{segment}}_PCADAPT.e",
    benchmark:
        f"{output_dir}BENCHMARK/{{bed}}_{{segment}}_PCADAPT.txt"
    message:
        """
        Launching {rule}
        threads: {threads}
        input:
            kmer_list_file : {input.kmer_list_file}
        params:
            bed : {params.bed}
            k :  {params.k}
        output:
            outliers: {output.outliers}
        log:
            output: {log.output}
            error: {log.error}
        """
    singularity:
        tools_config['SINGULARITY']['TOOLS']
    envmodules:
        tools_config["ENVMODULE"]["R"]
    shell:
        """
        Rscript {ikiss_obj.snakemake_scripts}/pcadapt.R -x {params.k} -e {params.bed} -i {params.bim} -a {params.fam} -o {output.outliers} -k {input.kmer_list_file} -s {params.samples} 1>{log.output} 2>{log.error}
        sed -i '1d' {output.outliers}
        sed -i '1d' {output.pvalues}
        sed -i s'/_0//' {output.outliers}
        sed -i s'/_0//' {output.pvalues}
        """


# # nested wc https://stackoverflow.com/questions/58823881/how-are-nested-checkpoints-resolved-in-snakemake/58859794#58859794
def aggregate_segments(wildcards):
    outputs_bed = glob.glob(f"{checkpoints.kmers_table_to_bed.get().output}/*.bed")
    outputs_bed = [output.split('/')[-1].split('.bed')[0] for output in outputs_bed]
    split_files = []
    for bed in outputs_bed:
        outputs_segment = glob.glob(f"{checkpoints.split_bed.get(bed=bed).output}/*.txt")
        outputs_segment = [output.split('/')[-1].split('.txt')[0] for output in outputs_segment]
        for segment in outputs_segment:
            split_files.append(f"{output_dir}6.{{method}}/{bed}_{segment}_{{method}}_outliers.csv")
    return split_files


rule merge_method: 
    """
    merging under selection kmers detected by pcadapt
    """
    threads: get_threads('merge_method', 1)
    input:
        agg_segments = aggregate_segments
        #agg_segments = aggregate_segments_lfmm if 'LFMM' in ikiss_obj.method else aggregate_segments_pcadapt,
    params:
        method = f"{{method}}",
        dir = f"{output_dir}6.{{method}}/"
    output:
        pvalues_combined = f"{output_dir}7.MERGED_{{method}}/merged_{{method}}_pvalues.csv",
        outliers_combined = f"{output_dir}7.MERGED_{{method}}/merged_{{method}}_outliers.csv"
    log:
        output = f"{output_dir}LOGS/7.MERGED_{{method}}/MERGED_{{method}}.o",
        error = f"{output_dir}LOGS/7.MERGED_{{method}}/MERGED_{{method}}.e"
    benchmark:
        f"{output_dir}BENCHMARK/MERGED_{{method}}.txt"
    message:
        """
        Launching {rule}
        threads: {threads}
        output:
            pvalues_combined: {output.pvalues_combined}
            outliers_combined : {output.outliers_combined}
        log:
            output: {log.output}
            error: {log.error}
        """
    singularity:
        tools_config['SINGULARITY']['TOOLS']
    shell:
        """
        (if [[ {params.method} == LFMM ]]; then
            mkdir -p {params.dir} ; cd {params.dir} 
            echo -e 'sequence\tbedname\tsegment_nb\tpvalue' > tmp.csv
            cat *outliers.csv >> tmp.csv
            mv tmp.csv {output.outliers_combined}
            cat *pvalues.csv > {output.pvalues_combined}
        fi

        if [[ {params.method} == PCADAPT ]]; then
            mkdir -p {params.dir} ; cd {params.dir}
            echo -e '\tbedname\tsegment_nb\tkmer_number\tsequence\tpvalue.X1\tpvalue.X2' > tmp.csv
            cat *pvalues.csv >> tmp.csv
            mv tmp.csv {output.pvalues_combined} 
            echo -e '\tsequence\tbedname\tsegment_nb\tkmer_number\tpvalue.X1\tpvalue.X2\tkmer_number_index_get.pc\tPC' > tmp.csv
            cat *outliers.csv >> tmp.csv
            mv tmp.csv {output.outliers_combined} 
        fi
        ) 2>{log.error}
        """

rule outliers_position:
    """
    merging outliers and mapping
    """
    threads: get_threads('outliers_position', 1)
    input:
        outliers = rules.merge_method.output.outliers_combined,
        positions = rules.merge_kmer_position.output.combined,
        bam = rules.samtools_merge.output.combined
    params:
        dir = f"{output_dir}11.OUTLIERS_{{method}}_POSITION"
    output:
        outliers_and_mapping = f"{output_dir}11.OUTLIERS_{{method}}_POSITION/outliers_with_position.csv",
        stats = f"{output_dir}11.OUTLIERS_{{method}}_POSITION/stats.txt"
    log:
        output = f"{output_dir}LOGS/11.OUTLIERS_{{method}}_POSITION/OUTLIERS_POSITION.o",
        error = f"{output_dir}LOGS/11.OUTLIERS_{{method}}_POSITION/OUTLIERS_POSITION.e"
    benchmark:
        f"{output_dir}BENCHMARK/OUTLIERS_{{method}}_POSITION.txt"
    message:
        """
        Launching {rule}
        threads: {threads}
        input:
            outliers : {input.outliers}
            positions : {input.positions}
        output:
            outliers_and_mapping : {output.outliers_and_mapping}
        log:
            output: {log.output}
            error: {log.error}
        """
    singularity:
        tools_config['SINGULARITY']['TOOLS']
    shell:
        """
        python3 {ikiss_obj.snakemake_scripts}/outliers_and_position.py --outliers {input.outliers} --kmers_position {input.positions} --output {output.outliers_and_mapping} 1> {log.error} 2> {log.output}
        """

rule extracting_features_from_gff:
    """
    extracting_features_from_gff
    """
    threads: get_threads('extracting_features_from_gff', 1)
    input:
        gff = gff_file,
    params:    
        feature = feature
    output:
        gff_feature = f"{output_dir}13.GFF_FEATURES/extracted.gff",
    log:
        output = f"{output_dir}LOGS/13.GFF_FEATURES/GFF_FEATURES.o",
        error = f"{output_dir}LOGS/13.GFF_FEATURES/GFF_FEATURES.e"
    benchmark:
        f"{output_dir}BENCHMARK/GFF_FEATURES.txt"
    message:
        """
        Launching {rule}
        threads: {threads}
        input:
            gff : {input.gff}
        params:
            feature : {params.feature}
        output:
            gff_feature : {output.gff_feature}
        log:
            output: {log.output}
            error: {log.error}
        """
    singularity:
        tools_config['SINGULARITY']['TOOLS']
    shell:
        """
        grep -e "{params.feature}\s" {input.gff} > {output.gff_feature}
        """

rule kmers_bedtools_intersect:
    """
    intersect bedtools (bam vs gff) 
    """
    threads: get_threads('kmers_bedtools_intersect', 1)
    input:
        bam = rules.samtools_merge.output.combined,
        gff = rules.extracting_features_from_gff.output.gff_feature,
    params:
        dir = f"{output_dir}14.KMERS_INTERSECT"
    output:
        bed = f"{output_dir}14.KMERS_INTERSECT/kmers_bedtools_intersect_annotation.bed"
    log:
        output = f"{output_dir}LOGS/14.KMERS_INTERSECT/BEDTOOLS_INTERSECT.o",
        error = f"{output_dir}LOGS/14.KMERS_INTERSECT/BEDTOOLS_INTERSECT.e"
    benchmark:
        f"{output_dir}BENCHMARK/BEDTOOLS_INTERSECT.txt"
    message:
        """
        Launching {rule}
        threads: {threads}
        input:
            bam : {input.bam}
            gff : {input.gff}
        output:
            bed : {output.bed}
        log:
            output: {log.output}
            error: {log.error}
        """
    singularity:
        tools_config['SINGULARITY']['TOOLS']
    shell:
        """
        (cd {params.dir}
        bedtools intersect -abam {input.bam} -b {input.gff} -bed -header -wb -wa > {output.bed})1> {log.error} 2> {log.output}
        """


## =============================== LFMM

rule get_pca_from_phenotype:
    """
    doing a pca analysis using phenotype file with variables done by user
    """
    threads: get_threads('get_pca_from_phenotype', 6)
    input:
        phenotype = config['PARAMS']['LFMM']['PHENOTYPE_FILE'],
    output:
        pca_variance = f"{output_dir}6.LFMM_PHENO/PCA_from_phenotype.csv",
    params:
        jupyter = f"{output_dir}6.LFMM_PHENO/PCA_from_phenotype.ipynb",
        html = f"{output_dir}6.LFMM_PHENO/PCA_from_phenotype.html",
    log:
        output = f"{output_dir}LOGS/6.LFMM_PHENO/PCA_FROM_PHENOTYPE_LFMM.o",
        error = f"{output_dir}LOGS/6.LFMM_PHENO/PCA_FROM_PHENOTYPE_LFMM.e",
    benchmark:
        f"{output_dir}BENCHMARK/PCA_FROM_PHENOTYPE_LFMM.txt"
    message:
        """
        Launching {rule}
        threads: {threads}
        input:
            phenotype: {input.phenotype}
        output:
            pca_variance: {output.pca_variance}
        log:
            output: {log.output}
            error: {log.error}
        """
    singularity:
        tools_config['SINGULARITY']['TOOLS']
    envmodules:
        tools_config["ENVMODULE"]["KMERS_GWAS"]
    shell:
        """ 
        python3 {ikiss_obj.snakemake_scripts}/pca_from_phenotype.py -p {input.phenotype} -o {params.jupyter} 1>{log.output} 2>{log.error}
        jupyter nbconvert --execute --inplace {params.jupyter} --ExecutePreprocessor.timeout=6000
        jupyter nbconvert --execute {params.jupyter} --no-input --ExecutePreprocessor.timeout=6000 --to=html
        """


rule lfmm:
    """
    lfmm using a segment
    """
    threads: get_threads('lfmm', 4)
    input:
        kmer_list_file = f"{output_dir}5.RANGES/{{bed}}/{{segment}}.txt",
        phenotype = config['PARAMS']['LFMM']['PHENOTYPE_FILE'] if not config['PARAMS']['LFMM']['PHENOTYPE_PCA_ANALYSIS'] else rules.get_pca_from_phenotype.output.pca_variance
    params:
        k = config['PARAMS']['LFMM']['K'],
        correction = config['PARAMS']['LFMM']['CORRECTION'],
        alpha = config['PARAMS']['LFMM']['ALPHA'],
        bed = f"{output_dir}3.TABLE2BED/{{bed}}.bed",
        bim = f"{output_dir}3.TABLE2BED/{{bed}}.bim",
        fam = f"{output_dir}3.TABLE2BED/{{bed}}.fam",
        segment_file = f"{{bed}}_{{segment}}",
    output:
        outliers = f"{output_dir}6.LFMM/{{bed}}_{{segment}}_LFMM_outliers.csv",
        pvalues = f"{output_dir}6.LFMM/{{bed}}_{{segment}}_LFMM_pvalues.csv",
    log:
        output = f"{output_dir}LOGS/6.LFMM/{{bed}}_{{segment}}_LFMM.o",
        error = f"{output_dir}LOGS/6.LFMM/{{bed}}_{{segment}}_LFMM.e",
    benchmark:
        f"{output_dir}BENCHMARK/{{bed}}_{{segment}}_LFMM.txt"
    message:
        """
        Launching {rule}
        threads: {threads}
        input:
            kmer_list_file : {input.kmer_list_file}
            phenotype : {input.phenotype}
        params:
            bed : {params.bed}
            k: {params.k}
            alpha : {params.alpha}
            correction : {params.correction}
        output:
            outliers: {output.outliers}
        log:
            output: {log.output}
            error: {log.error}
        """
    singularity:
        tools_config['SINGULARITY']['TOOLS']
    envmodules:
        tools_config["ENVMODULE"]["R"]
    shell:
        """
        Rscript {ikiss_obj.snakemake_scripts}/lfmm.R -x {params.k} -e {params.bed} -i {params.bim} -a {params.fam} -o {output.outliers} -k {input.kmer_list_file} -p {input.phenotype} -f {params.alpha} -c {params.correction} 1>{log.output} 2>{log.error}
        """



################### ASSEMBLY_KMERS

def outliers_to_mergetags(wildcards):
    if 'LFMM' in wildcards.method :
        return rules.merge_method.output.outliers_combined
    elif 'PCADAPT' in wildcards.method :
        return rules.merge_method.output.outliers_combined

rule mergetags:
    """
    assembly significant kmers obtained by lfmm  by mergeTags
    https://github.com/Transipedia/dekupl-mergeTags
    """
    threads: get_threads('mergetags', 1)
    input:
        merged_pvalues = outliers_to_mergetags,
    params:
        method = f"{{method}}",
        dir = f"{output_dir}12.ASSEMBLY_OUTLIERS_{{method}}",
        kmer_size = config['PARAMS']['KMERS_MODULE']['KMER_SIZE'],
        min_overlap = config['PARAMS']['ASSEMBLY_KMERS']['OVERLAP_SIZE'],
        contig_size = config['PARAMS']['ASSEMBLY_KMERS']['FILTER_CONTIG_SIZE'],
        tmp4mergetags = temp(f"{output_dir}12.ASSEMBLY_OUTLIERS_{{method}}/tmp4mergetags.csv"),
    output:
        assembled_outliers = f"{output_dir}12.ASSEMBLY_OUTLIERS_{{method}}/outliers_{{method}}_mergetags.fasta",
        assembled_csv= f"{output_dir}12.ASSEMBLY_OUTLIERS_{{method}}/outliers_{{method}}_mergetags.csv",
    log:
        output = f"{output_dir}LOGS/12.ASSEMBLY_OUTLIERS_{{method}}/OUTLIERS_MERGETAGS.o",
        error = f"{output_dir}LOGS/12.ASSEMBLY_OUTLIERS_{{method}}/OUTLIERS_MERGETAGS.e",
    benchmark:
        f"{output_dir}BENCHMARK/OUTLIERS_{{method}}_MERGETAGS.txt",
    message:
        """
        Launching {rule}
        threads: {threads}
        input:
            merged_pvalues : {input.merged_pvalues}
        output:
            assembled_outliers : {output.assembled_outliers}
        log:
            output: {log.output}
            error: {log.error}
        """
    singularity:
        tools_config['SINGULARITY']['TOOLS']
    shell:
        """
        (
        if [[ {params.method} == LFMM ]]; then
            mkdir -p {params.dir}; cd {params.dir} 
            awk '{{print $1\"\\t"$4\"\\t"$4\"\\t\"$4\"\\t\"$3}}' {input.merged_pvalues} > {params.tmp4mergetags}
            mergeTags -k 31 -m 15 -n  {params.tmp4mergetags} > {output.assembled_csv}
            awk '{{ if (NR>1 && length($2)>={params.contig_size}) print \">mergeTags_\"length($2)\"_\"$3\"\\n\"$2}}' {output.assembled_csv} > {output.assembled_outliers}
            rm {params.tmp4mergetags}
        fi

        if [[ {params.method} == PCADAPT ]]; then
            mkdir -p {params.dir}; cd {params.dir} 
            awk '{{print $2\"\\t"$6\"\\t"$6\"\\t\"$6\"\\t\"$5}}' {input.merged_pvalues} > {params.tmp4mergetags}
            mergeTags -k 31 -m 15 -n  {params.tmp4mergetags} > {output.assembled_csv}
            awk '{{ if (NR>1 && length($2)>={params.contig_size}) print \">mergeTags_\"length($2)\"_\"$3\"\\n\"$2}}' {output.assembled_csv} > {output.assembled_outliers}
            rm {params.tmp4mergetags}
        fi
        )1> {log.error} 2> {log.output}
        """


##################### MAPPING CONTIGS

def fasta_to_map(wildcards):
    if 'LFMM' in wildcards.method :
        #return rules.mergetags_lfmm.output.assembled_outliers
        return f"{output_dir}12.ASSEMBLY_OUTLIERS_LFMM/outliers_LFMM_mergetags.fasta",
    elif 'PCADAPT' in wildcards.method :
        #return rules.mergetags_pcadapt.output.assembled_outliers
        return f"{output_dir}12.ASSEMBLY_OUTLIERS_PCADAPT/outliers_PCADAPT_mergetags.fasta"


def get_ref(wildcards):
    if config['PARAMS']['MAPPING_KMERS']:
        if (basename_reference == basename_reference2):
            if config['PARAMS']['MAPPING_KMERS']['MODE'] == "bwa-mem2" :
                return (rules.index_ref.output.new_ref)
            elif config['PARAMS']['MAPPING_KMERS']['MODE'] == "bwa-aln":
                return (rules.index_ref_to_assembly.output.new_ref) 
        else :
            return (rules.index_ref_to_assembly.output.new_ref) 
            # if config['PARAMS']['MAPPING_KMERS']['MODE'] == "bwa-mem2" :
            #     return (rules.index_ref.output.new_ref)
            # elif config['PARAMS']['MAPPING_KMERS']['MODE'] == "bwa-aln":
            #     return (rules.index_ref.output.new_ref)
    else : 
            return (rules.index_ref_to_assembly.output.new_ref) 

rule mapping_contigs:
    """
    mapping contigs vs ref
    """
    threads: get_threads('mapping_contigs', 4)
    input:
        fasta = fasta_to_map,
        #ref = rules.index_ref_to_assembly.output.new_ref,
        #index_tag = rules.index_ref_to_assembly.output.index_tag
        ref = get_ref
    params:
        dir = f"{output_dir}12.ASSEMBLY_OUTLIERS_{{method}}/",
        sai = f"{output_dir}12.ASSEMBLY_OUTLIERS_{{method}}/contigs_{{method}}_vs_{basename_reference}.sai",
        bam = f"{output_dir}12.ASSEMBLY_OUTLIERS_{{method}}/contigs_{{method}}_vs_{basename_reference}.bam",
        options = config['PARAMS']['ASSEMBLY_KMERS']['MAPPING_OPTIONS'],
        mode = "bwa-mem2",
    output:
        sortedbam = f"{output_dir}12.ASSEMBLY_OUTLIERS_{{method}}/contigs_{{method}}_vs_{basename_reference}.sorted.bam",
    log:
        output = f"{output_dir}LOGS/12.ASSEMBLY_OUTLIERS_{{method}}/contigs_{{method}}_vs_{basename_reference}.MAPPING.o",
        error = f"{output_dir}LOGS/12.ASSEMBLY_OUTLIERS_{{method}}/contigs_{{method}}_vs_{basename_reference}.MAPPING.e",
    benchmark:
        f"{output_dir}BENCHMARK/{{method}}_vs_{basename_reference}_MAPPING.txt"
    message:
        """
        Launching {rule}
        threads: {threads}
        input:
            fasta: {input.fasta}
            ref: {input.ref}
        output:
            bam : {output.sortedbam}
        log:
            output: {log.output}
            error: {log.error} 
        """
    singularity:
        tools_config['SINGULARITY']['TOOLS']
    envmodules:
        tools_config["ENVMODULE"]["BWAMEM2"],
        tools_config["ENVMODULE"]["BWA"],
        tools_config["ENVMODULE"]["SAMTOOLS"]
    shell:
        """
        (cd {params.dir} 
        bwa-mem2 mem {params.options} -t {threads} {input.ref} {input.fasta} > {params.bam};             
        samtools sort {params.bam} -o {output.sortedbam}       
        samtools index {output.sortedbam}
        samtools idxstats {output.sortedbam} > {output.sortedbam}.idxstats 
        samtools stats {output.sortedbam} > {output.sortedbam}.stats) 1> {log.error} 2> {log.output}
        """

rule contigs_bedtools_intersect:
    """
    intersect contigs with bedtools (bam vs gff) 
    """
    threads: get_threads('contigs_intersect', 1)
    input:
        bam = rules.mapping_contigs.output.sortedbam,
        gff = rules.extracting_features_from_gff.output.gff_feature,
    params:
        dir = f"{output_dir}14.CONTIGS_INTERSECT_{{method}}"
    output:
        bed = f"{output_dir}14.CONTIGS_INTERSECT_{{method}}/contigs_intersect_annotation.bed"
    log:
        output = f"{output_dir}LOGS/14.CONTIGS_INTERSECT_{{method}}/BEDTOOLS_CONTIGS_{{method}}_INTERSECT.o",
        error = f"{output_dir}LOGS/14.CONTIGS_INTERSECT_{{method}}/BEDTOOLS_CONTIGS_{{method}}_INTERSECT.e"
    benchmark:
        f"{output_dir}BENCHMARK/BEDTOOLS_CONTIGS_INTERSECT_{{method}}.txt"
    message:
        """
        Launching {rule}
        threads: {threads}
        input:
            bam : {input.bam}
            gff : {input.gff}
        output:
            bed : {output.bed}
        log:
            output: {log.output}
            error: {log.error}
        """
    singularity:
        tools_config['SINGULARITY']['TOOLS']
    shell:
        """
        (cd {params.dir}
        bedtools intersect -abam {input.bam} -b {input.gff} -bed -header -wb -wa > {output.bed})1> {log.error} 2> {log.output}
        """

##################### INTERSECT CONTIGS

rule intersect_and_contigs:
    """
    intersect bedtools (bam vs gff) and lfmm outliers {method}
    """
    threads: get_threads('intersect_and_contigs', 1)
    input:
        bed = rules.contigs_bedtools_intersect.output.bed
    params:
        dir = f"{output_dir}15.CONTIGS_{{method}}_INTERSECT",
        mapq = config["PARAMS"]["INTERSECT"]["FILTER_MAPQ_STATS"]
    output:
        stats_contigs = f"{output_dir}15.CONTIGS_{{method}}_INTERSECT/global_intersect_stats/nb_contigs_by_gene_filter.csv",
    log:
        output = f"{output_dir}LOGS/15.CONTIGS_{{method}}_INTERSECT/CONTIGS_{{method}}_INTERSECT.o",
        error = f"{output_dir}LOGS/15.CONTIGS_{{method}}_INTERSECT/CONTIGS_{{method}}_INTERSECT.e"
    benchmark:
        f"{output_dir}BENCHMARK/CONTIGS_{{method}}_INTERSECT.txt"
    message:
        """
        Launching {rule}
        threads: {threads}
        input:
            bed : {input.bed}
        output:
            stats_contigs : {output.stats_contigs}
        log:
            output: {log.output}
            error: {log.error}
        """
    singularity:
        tools_config['SINGULARITY']['TOOLS']
    shell:
        """
        (cd {params.dir}
        mkdir -p global_intersect_stats ; mkdir -p global_intersect_stats
        python3 {ikiss_obj.snakemake_scripts}/stats_intersect.py --bed {input.bed} --mapq 15 )1> {log.error} 2> {log.output}
        """


##################### INTERSECT KMERS 

rule intersect_and_outliers:
    """
    intersect bedtools (bam vs gff) and lfmm outliers {method}
    """
    threads: get_threads('intersect_and_outliers', 1)
    input:
        outliers = rules.merge_method.output.outliers_combined,
        bed = rules.kmers_bedtools_intersect.output.bed
    params:
        dir = f"{output_dir}15.OUTLIERS_{{method}}_INTERSECT"
    output:
        stats_all = f"{output_dir}15.OUTLIERS_{{method}}_INTERSECT/global_intersect_stats/nb_kmers_by_gene_filter.csv",
        stats_outliers = f"{output_dir}15.OUTLIERS_{{method}}_INTERSECT/outliers_intersect_stats/nb_kmers_by_gene_filter.csv"
    log:
        output = f"{output_dir}LOGS/15.OUTLIERS_{{method}}_INTERSECT/OUTLIERS_{{method}}_INTERSECT.o",
        error = f"{output_dir}LOGS/15.OUTLIERS_{{method}}_INTERSECT/OUTLIERS_{{method}}_INTERSECT.e"
    benchmark:
        f"{output_dir}BENCHMARK/OUTLIERS_{{method}}_INTERSECT.txt"
    message:
        """
        Launching {rule}
        threads: {threads}
        input:
            outliers : {input.outliers}
            bed : {input.bed}
        output:
            stats_all : {output.stats_all}
        log:
            output: {log.output}
            error: {log.error}
        """
    singularity:
        tools_config['SINGULARITY']['TOOLS']
    shell:
        """
        (cd {params.dir}
        mkdir -p global_intersect_stats ; mkdir -p global_intersect_stats
        mkdir -p outliers_intersect_stats ; mkdir -p outliers_intersect_stats
        python3 {ikiss_obj.snakemake_scripts}/stats_intersect.py --outliers {input.outliers} --bed {input.bed} --mapq 15) 1> {log.error} 2> {log.output}
        """


############################ REPORT #######################################################
# report contains dico_final now

rule fastq_stats:
    """
    run fastq_stats
    """
    threads: get_threads('fastq_stats', 8)
    input:
        dir = fastq_dir
    output:
        fastq_table = f"{output_dir}0.FASTQ_STATS/fastq_stats.txt"
    log:
        output = f"{output_dir}LOGS/0.FASTQ_STATS/FASTQ_STATS.o",
        error = f"{output_dir}LOGS/0.FASTQ_STATS/FASTQ_STATS.e"
    benchmark:
        f"{output_dir}BENCHMARK/FASTQ_STATS.txt"
    message:
        """
        Launching {rule}
        threads: {threads}
        input:
            kmers_to_use : {input.dir}
        output:
            kmers_table: {output.fastq_table}
        log:
            output: {log.output}
            error: {log.error}
        """
    singularity:
        tools_config['SINGULARITY']['TOOLS']
    #envmodules:
    #    tools_config["ENVMODULE"]["SEQKIT"],
    shell:
        """
        cd {input.dir}
        seqkit stats -T -j {threads} {input.dir}/* -o {output.fastq_table} 1>{log.error}  2> {log.output}
        sed -i 's|{input.dir}/||' {output.fastq_table}
        sed -i 's|{ikiss_obj.fastq_files_ext}||' {output.fastq_table}
        """

rule report_ikiss:
    """
    rule to recovery all files to report
    """
    threads: get_threads('report_ikiss',1)
    input:
        unpack(output_final)
    params:
        txt_config = f"{output_dir}/config_corrected.yaml",
        samples_list = SAMPLE,
        out_dir_report = directory(f"{output_dir}REPORT"),
        workflow_steps = ikiss_obj.tools_activated,
        fastq_stats = rules.fastq_stats.output.fastq_table,
        list_log_kmer_per_sample = expand(rules.kmers_gwas_per_sample.log.error, sample=SAMPLE) if 'KMERS_MODULE' in ikiss_obj.tools_activated else "",
        kmer_table_rep = f"{output_dir}3.TABLE2BED/" if 'KMERS_MODULE' in ikiss_obj.tools_activated else "",
        plots_pcadapt = f"{output_dir}6.PCADAPT" if 'PCADAPT' in ikiss_obj.tools_activated else "",
        plots_lfmm = f"{output_dir}6.LFMM" if 'LFMM' in ikiss_obj.tools_activated else "",
        phenotype = ikiss_obj.phenotype,
        contig_size = config['PARAMS']['ASSEMBLY_KMERS']['FILTER_CONTIG_SIZE'] if 'ASSEMBLY_KMERS' in ikiss_obj.tools_activated else "",
        ref = reference_file if 'ASSEMBLY_KMERS' in ikiss_obj.tools_activated else "",
        phenotype_pca_html = rules.get_pca_from_phenotype.params.html if config['PARAMS']['LFMM']['PHENOTYPE_PCA_ANALYSIS'] else "",
    output:
        jupyter = f"{output_dir}REPORT/iKISS_report.ipynb"
    log:
        output = f"{output_dir}LOGS/REPORT/report.o",
        error = f"{output_dir}LOGS/REPORT/report.e",
    benchmark:
        f"{output_dir}BENCHMARK/Report.txt",
    message:
        """
        Launching {rule}
        threads: {threads}
        input:
            {input}
        params:
            {params}
        output:
            jupyter : {output.jupyter}
        """
    singularity:
        tools_config['SINGULARITY']['TOOLS']
    script:
        f"{ikiss_obj.snakemake_scripts}/report.py"


rule html_ikiss:
    """
    rule to build jupyter notebook and generate iKISS report 
    """
    threads: get_threads('html_ikiss',1)
    input:
        notebook = rules.report_ikiss.output.jupyter
    params:
        dir_report = f"{output_dir}REPORT/",
        html_phenotype_lfmm = f"cp {rules.get_pca_from_phenotype.params.html} . " if (config['PARAMS']['LFMM']['PHENOTYPE_PCA_ANALYSIS'] and 'LFMM' in ikiss_obj.tools_activated) else "",
        jupyter_phenotype_lfmm= f' cp {rules.get_pca_from_phenotype.params.jupyter} . ' if (config['PARAMS']['LFMM']['PHENOTYPE_PCA_ANALYSIS'] and 'LFMM' in ikiss_obj.tools_activated) else "",
    output:
        html=f"{output_dir}REPORT/iKISS_report.html"
    message:
        """
        Launching {rule}
        threads: {threads}
        input:
            notebook : {input.notebook}
        output:
            html : {output.html}
        """
    singularity:
        tools_config['SINGULARITY']['TOOLS']
    shell:
        """
        cd {params.dir_report}
        jupyter nbconvert --execute {input.notebook} --no-input --ExecutePreprocessor.timeout=6000 --to=html
        {params.html_phenotype_lfmm}
        {params.jupyter_phenotype_lfmm}
        """