Coverage for tests/test_multiple_outputs.py: 99%
86 statements
« prev ^ index » next coverage.py v7.11.1, created at 2025-11-10 18:41 +0100
« prev ^ index » next coverage.py v7.11.1, created at 2025-11-10 18:41 +0100
1from optwps import WPS
4def test_multiple_outputs_with_chrom_placeholder(
5 make_test_bed_file, make_test_bam_file_paired, tmp_path
6):
7 """Test that {chrom} placeholder creates separate files per chromosome."""
8 wps = WPS(
9 bed_file=str(make_test_bed_file),
10 protection_size=120,
11 valid_chroms=set(["1", "2", "X", "3", "4", "5"]),
12 )
14 output_template = str(tmp_path / "wps_{chrom}.tsv")
15 wps.run(
16 bamfile=str(make_test_bam_file_paired),
17 out_filepath=output_template,
18 )
20 # Check that separate files were created for each chromosome in the bed file
21 expected_chroms = ["1", "2", "3", "4", "5", "X"]
22 for chrom in expected_chroms:
23 output_file = tmp_path / f"wps_{chrom}.tsv"
24 assert (
25 output_file.exists()
26 ), f"Expected output file for chromosome {chrom} not found"
28 # Verify that the file contains only data for that chromosome
29 with open(output_file) as f:
30 lines = f.readlines()
31 assert len(lines) > 0, f"Output file for chromosome {chrom} is empty"
32 for line in lines:
33 cols = line.strip().split("\t")
34 assert (
35 cols[0] == chrom
36 ), f"Found data for wrong chromosome in {output_file}"
39def test_multiple_outputs_with_target_placeholder(
40 make_test_bed_file, make_test_bam_file_paired, tmp_path
41):
42 """Test that {target} placeholder creates separate files per target region."""
43 wps = WPS(
44 bed_file=str(make_test_bed_file),
45 protection_size=120,
46 valid_chroms=set(["1", "2", "X", "3", "4", "5"]),
47 )
49 output_template = str(tmp_path / "wps_{target}.tsv")
50 wps.run(
51 bamfile=str(make_test_bam_file_paired),
52 out_filepath=output_template,
53 )
55 # Read the bed file to know expected targets
56 with open(make_test_bed_file) as f:
57 bed_lines = f.readlines()
59 # Check that files were created for each target
60 created_files = list(tmp_path.glob("wps_*.tsv"))
61 assert len(created_files) > 0, "No output files were created"
63 # Verify each target file has correct region data
64 for line in bed_lines:
65 chrom, start, end = line.strip().split()[:3]
66 chrom = chrom.replace("chr", "")
67 if chrom not in ["1", "2", "X", "3", "4", "5"]:
68 continue
70 target_name = f"{chrom}_{start}_{end}"
71 target_file = tmp_path / f"wps_{target_name}.tsv"
72 assert target_file.exists(), f"Expected target file {target_file} not found"
74 # Verify content is for the correct region
75 with open(target_file) as f:
76 file_lines = f.readlines()
77 assert len(file_lines) > 0
78 for file_line in file_lines:
79 cols = file_line.strip().split("\t")
80 file_chrom = cols[0]
81 file_start = int(cols[1])
82 file_end = int(cols[2])
84 assert file_chrom == chrom
85 assert file_start >= int(start)
86 assert file_end <= int(end) + 1 # end is exclusive
89def test_multiple_outputs_with_both_placeholders(
90 make_test_bed_file, make_test_bam_file_paired, tmp_path
91):
92 """Test that both {chrom} and {target} placeholders can be used together."""
93 wps = WPS(
94 bed_file=str(make_test_bed_file),
95 protection_size=120,
96 valid_chroms=set(["1", "2", "X", "3", "4", "5"]),
97 )
99 output_template = str(tmp_path / "wps_{chrom}_{target}.tsv")
100 wps.run(
101 bamfile=str(make_test_bam_file_paired),
102 out_filepath=output_template,
103 )
105 # Check that files were created
106 created_files = list(tmp_path.glob("wps_*.tsv"))
107 assert len(created_files) > 0, "No output files were created"
109 # Verify file naming pattern
110 for output_file in created_files:
111 filename = output_file.name
112 assert filename.startswith("wps_")
113 assert filename.endswith(".tsv")
114 # Should have format: wps_{chrom}_{chrom}_{start}_{end}.tsv
115 parts = filename.replace("wps_", "").replace(".tsv", "").split("_")
116 assert len(parts) >= 4, f"Unexpected filename format: {filename}"
119def test_multiple_outputs_handles_chunking(
120 make_test_bed_file, make_test_bam_file_paired, tmp_path
121):
122 """Test that multiple chunks of the same region are written to the same file."""
123 # Use a very small chunk size to force multiple chunks per region
124 wps = WPS(
125 bed_file=str(make_test_bed_file),
126 protection_size=120,
127 valid_chroms=set(["1", "2", "X", "3", "4", "5"]),
128 chunk_size=100, # Very small chunk size
129 )
131 output_template = str(tmp_path / "wps_{chrom}.tsv")
132 wps.run(
133 bamfile=str(make_test_bam_file_paired),
134 out_filepath=output_template,
135 )
137 # Verify files were created and have continuous data
138 for chrom in ["1", "2", "3", "4", "5", "X"]:
139 output_file = tmp_path / f"wps_{chrom}.tsv"
140 if output_file.exists(): 140 ↛ 138line 140 didn't jump to line 138 because the condition on line 140 was always true
141 with open(output_file) as f:
142 lines = f.readlines()
143 assert len(lines) > 0
145 # Check that positions are continuous (accounting for potential gaps from bed regions)
146 positions = [int(line.split("\t")[1]) for line in lines]
147 # Positions should be sorted
148 assert positions == sorted(
149 positions
150 ), f"Positions are not sorted in {output_file}"
153def test_single_output_still_works(
154 make_test_bed_file, make_test_bam_file_paired, tmp_path
155):
156 """Test that normal single output mode still works when no placeholders are used."""
157 wps = WPS(
158 bed_file=str(make_test_bed_file),
159 protection_size=120,
160 valid_chroms=set(["1", "2", "X", "3", "4", "5"]),
161 )
163 output_file = str(tmp_path / "wps_single.tsv")
164 wps.run(
165 bamfile=str(make_test_bam_file_paired),
166 out_filepath=output_file,
167 )
169 # Check that only one file was created
170 output_files = list(tmp_path.glob("wps_*.tsv"))
171 assert len(output_files) == 1, f"Expected 1 output file, found {len(output_files)}"
173 # Verify the file has data
174 with open(output_file) as f:
175 lines = f.readlines()
176 assert len(lines) > 0
179def test_multiple_outputs_with_gzip(
180 make_test_bed_file, make_test_bam_file_paired, tmp_path
181):
182 """Test that multiple outputs work with gzipped files."""
183 wps = WPS(
184 bed_file=str(make_test_bed_file),
185 protection_size=120,
186 valid_chroms=set(["1", "2", "X", "3", "4", "5"]),
187 )
189 output_template = str(tmp_path / "wps_{chrom}.tsv.gz")
190 wps.run(
191 bamfile=str(make_test_bam_file_paired),
192 out_filepath=output_template,
193 )
195 # Check that gzipped files were created
196 gz_files = list(tmp_path.glob("wps_*.tsv.gz"))
197 assert len(gz_files) > 0, "No gzipped output files were created"
199 # Verify we can read the gzipped files
200 import gzip
202 for gz_file in gz_files:
203 with gzip.open(gz_file, "rt") as f:
204 lines = f.readlines()
205 assert len(lines) > 0