Coverage for tests/conftest.py: 100%

63 statements  

« prev     ^ index     » next       coverage.py v7.11.1, created at 2025-11-10 18:41 +0100

1from pytest import fixture 

2import pysam 

3 

4 

5@fixture 

6def make_test_bed_file(tmp_path): 

7 bed_content = """chr1\t1000\t2000 

8chr2\t1500\t2500 

9chr3\t0\t100 

10chr4\t50\t150 

11chr5\t3000\t4000 

12chrM\t200\t800 

13chrX\t500\t1500 

14chrUn_gl000220\t100\t300 

15""" 

16 bed_file = tmp_path / "test_regions.bed" 

17 bed_file.write_text(bed_content) 

18 return bed_file 

19 

20 

21def _create_read(name, ref_id, pos, length, flag, mate_pos=None, isize=None): 

22 """Helper function to create a properly configured read. 

23 

24 For paired-end reads, provide mate_pos and isize. 

25 For single-end reads, leave mate_pos and isize as None. 

26 """ 

27 read = pysam.AlignedSegment() 

28 read.query_name = name 

29 read.reference_id = ref_id 

30 read.reference_start = pos 

31 read.cigar = ((0, length),) 

32 read.mapping_quality = 60 

33 read.query_sequence = "A" * length 

34 read.query_qualities = pysam.qualitystring_to_array("I" * length) 

35 read.flag = flag 

36 

37 # Set mate information only for paired-end reads 

38 if mate_pos is not None: 

39 read.next_reference_id = ref_id 

40 read.next_reference_start = mate_pos 

41 read.template_length = isize if isize is not None else 0 

42 

43 return read 

44 

45 

46@fixture 

47def make_test_bam_file_paired(tmp_path, make_test_bed_file): 

48 bam_path = tmp_path / "test_reads.bam" 

49 header = { 

50 "HD": {"VN": "1.0"}, 

51 "SQ": [ 

52 {"LN": 5000, "SN": "chr1"}, 

53 {"LN": 5000, "SN": "chr2"}, 

54 {"LN": 5000, "SN": "chrX"}, 

55 {"LN": 1000, "SN": "chrM"}, 

56 {"LN": 300, "SN": "chrUn_gl000220"}, 

57 {"LN": 5000, "SN": "chr3"}, 

58 {"LN": 5000, "SN": "chr4"}, 

59 {"LN": 5000, "SN": "chr5"}, 

60 ], 

61 } 

62 

63 with pysam.AlignmentFile(bam_path, "wb", header=header) as outf: 

64 for line in make_test_bed_file.read_text().strip().split("\n"): 

65 chrom, start, end = line.split("\t")[:3] 

66 start, end = int(start), int(end) 

67 ref_id = outf.get_tid(chrom) 

68 

69 # Pair 1: Completely spans the region 

70 pos1, pos2, length = max(1, start - 10), end + 5, 100 

71 isize = (pos2 + length) - pos1 

72 name = f"pair1_{chrom}_{start}" 

73 outf.write(_create_read(name, ref_id, pos1, length, 99, pos2, isize)) 

74 outf.write(_create_read(name, ref_id, pos2, length, 147, pos1, -isize)) 

75 

76 # Pair 2: Partially outside the region 

77 pos1, pos2, length = max(1, end - 50), end + 30, 80 

78 isize = (pos2 + length) - pos1 

79 name = f"pair2_{chrom}_{start}" 

80 outf.write(_create_read(name, ref_id, pos1, length, 99, pos2, isize)) 

81 outf.write(_create_read(name, ref_id, pos2, length, 147, pos1, -isize)) 

82 

83 # Sort and index the BAM file 

84 sorted_bam_path = str(tmp_path / "test_reads_sorted.bam") 

85 pysam.sort("-o", sorted_bam_path, str(bam_path)) 

86 pysam.index(sorted_bam_path) 

87 return sorted_bam_path 

88 

89 

90@fixture 

91def make_test_bam_file_single(tmp_path, make_test_bed_file): 

92 """Create a single-end BAM file for testing.""" 

93 bam_path = tmp_path / "test_reads_single.bam" 

94 header = { 

95 "HD": {"VN": "1.0"}, 

96 "SQ": [ 

97 {"LN": 5000, "SN": "chr1"}, 

98 {"LN": 5000, "SN": "chr2"}, 

99 {"LN": 5000, "SN": "chrX"}, 

100 {"LN": 1000, "SN": "chrM"}, 

101 {"LN": 300, "SN": "chrUn_gl000220"}, 

102 {"LN": 5000, "SN": "chr3"}, 

103 {"LN": 5000, "SN": "chr4"}, 

104 {"LN": 5000, "SN": "chr5"}, 

105 ], 

106 } 

107 

108 with pysam.AlignmentFile(bam_path, "wb", header=header) as outf: 

109 for line in make_test_bed_file.read_text().strip().split("\n"): 

110 chrom, start, end = line.split("\t")[:3] 

111 start, end = int(start), int(end) 

112 ref_id = outf.get_tid(chrom) 

113 

114 # Read 1: Completely spans the region (forward strand) 

115 pos, length = max(1, start - 10), end - start + 20 

116 outf.write( 

117 _create_read(f"read1_{chrom}_{start}", ref_id, pos, length, flag=0) 

118 ) 

119 

120 # Read 2: Partially outside the region (reverse strand) 

121 pos, length = end - 30, 60 

122 outf.write( 

123 _create_read(f"read2_{chrom}_{start}", ref_id, pos, length, flag=16) 

124 ) 

125 

126 # Sort and index the BAM file 

127 sorted_bam_path = str(tmp_path / "test_reads_single_sorted.bam") 

128 pysam.sort("-o", sorted_bam_path, str(bam_path)) 

129 pysam.index(sorted_bam_path) 

130 return sorted_bam_path