# Fan-out and fan-in pattern
# One job creates multiple outputs, many jobs process them, one job gathers

CHUNKS = ["chunk_1", "chunk_2", "chunk_3", "chunk_4", "chunk_5"]

rule all:
    input:
        "output/gathered.txt"

rule create_chunks:
    output:
        expand("output/chunks/{chunk}.txt", chunk=CHUNKS)
    shell:
        """
        mkdir -p output/chunks
        for chunk in {CHUNKS}; do
            echo "Data for $chunk" > output/chunks/$chunk.txt
        done
        """

rule process_chunk:
    input:
        "output/chunks/{chunk}.txt"
    output:
        "output/processed/{chunk}.done.txt"
    shell:
        """
        mkdir -p output/processed
        echo "Processed: $(cat {input})" > {output}
        """

rule gather:
    input:
        expand("output/processed/{chunk}.done.txt", chunk=CHUNKS)
    output:
        "output/gathered.txt"
    shell:
        """
        cat {input} > {output}
        echo "All chunks gathered" >> {output}
        """
