# Scatter-gather pattern
# Tests common bioinformatics pattern of splitting and merging

PARTITIONS = list(range(1, 6))

rule all:
    input:
        "output/merged_result.txt"

rule prepare_data:
    output:
        "output/input_data.txt"
    shell:
        """
        echo "Original input data" > {output}
        for i in $(seq 1 100); do
            echo "Line $i" >> {output}
        done
        """

rule scatter:
    input:
        "output/input_data.txt"
    output:
        expand("output/partitions/part_{n}.txt", n=PARTITIONS)
    shell:
        """
        mkdir -p output/partitions
        # Split the input into partitions
        total_lines=$(wc -l < {input})
        lines_per_part=$((total_lines / 5 + 1))
        split -l $lines_per_part -d -a 1 {input} output/partitions/temp_
        # Rename to expected outputs
        for i in 0 1 2 3 4; do
            mv output/partitions/temp_$i output/partitions/part_$((i+1)).txt 2>/dev/null || true
        done
        # Ensure all partition files exist
        for n in {PARTITIONS}; do
            touch output/partitions/part_$n.txt
        done
        """

rule process_partition:
    input:
        "output/partitions/part_{n}.txt"
    output:
        "output/processed/part_{n}.processed.txt"
    shell:
        """
        mkdir -p output/processed
        echo "Processed partition {wildcards.n}" > {output}
        wc -l < {input} >> {output}
        """

rule gather:
    input:
        expand("output/processed/part_{n}.processed.txt", n=PARTITIONS)
    output:
        "output/merged_result.txt"
    shell:
        """
        echo "Merged results:" > {output}
        cat {input} >> {output}
        """
