Coverage for tests/test_multiple_outputs.py: 99%

86 statements  

« prev     ^ index     » next       coverage.py v7.11.1, created at 2025-11-10 18:41 +0100

1from optwps import WPS 

2 

3 

4def test_multiple_outputs_with_chrom_placeholder( 

5 make_test_bed_file, make_test_bam_file_paired, tmp_path 

6): 

7 """Test that {chrom} placeholder creates separate files per chromosome.""" 

8 wps = WPS( 

9 bed_file=str(make_test_bed_file), 

10 protection_size=120, 

11 valid_chroms=set(["1", "2", "X", "3", "4", "5"]), 

12 ) 

13 

14 output_template = str(tmp_path / "wps_{chrom}.tsv") 

15 wps.run( 

16 bamfile=str(make_test_bam_file_paired), 

17 out_filepath=output_template, 

18 ) 

19 

20 # Check that separate files were created for each chromosome in the bed file 

21 expected_chroms = ["1", "2", "3", "4", "5", "X"] 

22 for chrom in expected_chroms: 

23 output_file = tmp_path / f"wps_{chrom}.tsv" 

24 assert ( 

25 output_file.exists() 

26 ), f"Expected output file for chromosome {chrom} not found" 

27 

28 # Verify that the file contains only data for that chromosome 

29 with open(output_file) as f: 

30 lines = f.readlines() 

31 assert len(lines) > 0, f"Output file for chromosome {chrom} is empty" 

32 for line in lines: 

33 cols = line.strip().split("\t") 

34 assert ( 

35 cols[0] == chrom 

36 ), f"Found data for wrong chromosome in {output_file}" 

37 

38 

39def test_multiple_outputs_with_target_placeholder( 

40 make_test_bed_file, make_test_bam_file_paired, tmp_path 

41): 

42 """Test that {target} placeholder creates separate files per target region.""" 

43 wps = WPS( 

44 bed_file=str(make_test_bed_file), 

45 protection_size=120, 

46 valid_chroms=set(["1", "2", "X", "3", "4", "5"]), 

47 ) 

48 

49 output_template = str(tmp_path / "wps_{target}.tsv") 

50 wps.run( 

51 bamfile=str(make_test_bam_file_paired), 

52 out_filepath=output_template, 

53 ) 

54 

55 # Read the bed file to know expected targets 

56 with open(make_test_bed_file) as f: 

57 bed_lines = f.readlines() 

58 

59 # Check that files were created for each target 

60 created_files = list(tmp_path.glob("wps_*.tsv")) 

61 assert len(created_files) > 0, "No output files were created" 

62 

63 # Verify each target file has correct region data 

64 for line in bed_lines: 

65 chrom, start, end = line.strip().split()[:3] 

66 chrom = chrom.replace("chr", "") 

67 if chrom not in ["1", "2", "X", "3", "4", "5"]: 

68 continue 

69 

70 target_name = f"{chrom}_{start}_{end}" 

71 target_file = tmp_path / f"wps_{target_name}.tsv" 

72 assert target_file.exists(), f"Expected target file {target_file} not found" 

73 

74 # Verify content is for the correct region 

75 with open(target_file) as f: 

76 file_lines = f.readlines() 

77 assert len(file_lines) > 0 

78 for file_line in file_lines: 

79 cols = file_line.strip().split("\t") 

80 file_chrom = cols[0] 

81 file_start = int(cols[1]) 

82 file_end = int(cols[2]) 

83 

84 assert file_chrom == chrom 

85 assert file_start >= int(start) 

86 assert file_end <= int(end) + 1 # end is exclusive 

87 

88 

89def test_multiple_outputs_with_both_placeholders( 

90 make_test_bed_file, make_test_bam_file_paired, tmp_path 

91): 

92 """Test that both {chrom} and {target} placeholders can be used together.""" 

93 wps = WPS( 

94 bed_file=str(make_test_bed_file), 

95 protection_size=120, 

96 valid_chroms=set(["1", "2", "X", "3", "4", "5"]), 

97 ) 

98 

99 output_template = str(tmp_path / "wps_{chrom}_{target}.tsv") 

100 wps.run( 

101 bamfile=str(make_test_bam_file_paired), 

102 out_filepath=output_template, 

103 ) 

104 

105 # Check that files were created 

106 created_files = list(tmp_path.glob("wps_*.tsv")) 

107 assert len(created_files) > 0, "No output files were created" 

108 

109 # Verify file naming pattern 

110 for output_file in created_files: 

111 filename = output_file.name 

112 assert filename.startswith("wps_") 

113 assert filename.endswith(".tsv") 

114 # Should have format: wps_{chrom}_{chrom}_{start}_{end}.tsv 

115 parts = filename.replace("wps_", "").replace(".tsv", "").split("_") 

116 assert len(parts) >= 4, f"Unexpected filename format: {filename}" 

117 

118 

119def test_multiple_outputs_handles_chunking( 

120 make_test_bed_file, make_test_bam_file_paired, tmp_path 

121): 

122 """Test that multiple chunks of the same region are written to the same file.""" 

123 # Use a very small chunk size to force multiple chunks per region 

124 wps = WPS( 

125 bed_file=str(make_test_bed_file), 

126 protection_size=120, 

127 valid_chroms=set(["1", "2", "X", "3", "4", "5"]), 

128 chunk_size=100, # Very small chunk size 

129 ) 

130 

131 output_template = str(tmp_path / "wps_{chrom}.tsv") 

132 wps.run( 

133 bamfile=str(make_test_bam_file_paired), 

134 out_filepath=output_template, 

135 ) 

136 

137 # Verify files were created and have continuous data 

138 for chrom in ["1", "2", "3", "4", "5", "X"]: 

139 output_file = tmp_path / f"wps_{chrom}.tsv" 

140 if output_file.exists(): 140 ↛ 138line 140 didn't jump to line 138 because the condition on line 140 was always true

141 with open(output_file) as f: 

142 lines = f.readlines() 

143 assert len(lines) > 0 

144 

145 # Check that positions are continuous (accounting for potential gaps from bed regions) 

146 positions = [int(line.split("\t")[1]) for line in lines] 

147 # Positions should be sorted 

148 assert positions == sorted( 

149 positions 

150 ), f"Positions are not sorted in {output_file}" 

151 

152 

153def test_single_output_still_works( 

154 make_test_bed_file, make_test_bam_file_paired, tmp_path 

155): 

156 """Test that normal single output mode still works when no placeholders are used.""" 

157 wps = WPS( 

158 bed_file=str(make_test_bed_file), 

159 protection_size=120, 

160 valid_chroms=set(["1", "2", "X", "3", "4", "5"]), 

161 ) 

162 

163 output_file = str(tmp_path / "wps_single.tsv") 

164 wps.run( 

165 bamfile=str(make_test_bam_file_paired), 

166 out_filepath=output_file, 

167 ) 

168 

169 # Check that only one file was created 

170 output_files = list(tmp_path.glob("wps_*.tsv")) 

171 assert len(output_files) == 1, f"Expected 1 output file, found {len(output_files)}" 

172 

173 # Verify the file has data 

174 with open(output_file) as f: 

175 lines = f.readlines() 

176 assert len(lines) > 0 

177 

178 

179def test_multiple_outputs_with_gzip( 

180 make_test_bed_file, make_test_bam_file_paired, tmp_path 

181): 

182 """Test that multiple outputs work with gzipped files.""" 

183 wps = WPS( 

184 bed_file=str(make_test_bed_file), 

185 protection_size=120, 

186 valid_chroms=set(["1", "2", "X", "3", "4", "5"]), 

187 ) 

188 

189 output_template = str(tmp_path / "wps_{chrom}.tsv.gz") 

190 wps.run( 

191 bamfile=str(make_test_bam_file_paired), 

192 out_filepath=output_template, 

193 ) 

194 

195 # Check that gzipped files were created 

196 gz_files = list(tmp_path.glob("wps_*.tsv.gz")) 

197 assert len(gz_files) > 0, "No gzipped output files were created" 

198 

199 # Verify we can read the gzipped files 

200 import gzip 

201 

202 for gz_file in gz_files: 

203 with gzip.open(gz_file, "rt") as f: 

204 lines = f.readlines() 

205 assert len(lines) > 0