Coverage for src/optwps/utils.py: 90%

23 statements  

« prev     ^ index     » next       coverage.py v7.11.1, created at 2025-11-10 18:41 +0100

1""" 

2Utility functions for optwps package. 

3 

4This module provides helper functions for BAM file processing and file I/O operations. 

5""" 

6 

7_open = open 

8 

9import os 

10import sys 

11import pigz 

12import gzip 

13 

14from contextlib import nullcontext 

15 

16 

17def is_soft_clipped(cigar): 

18 """ 

19 Check if a read has soft clipping in its CIGAR string. 

20 

21 Soft clipping (op=4) indicates that some bases at the start or end of the read 

22 are not aligned to the reference but are present in the sequence. 

23 

24 Args: 

25 cigar (list): CIGAR tuples from pysam AlignedSegment.cigartuples 

26 Each tuple is (operation, length) 

27 

28 Returns: 

29 bool: True if any soft clipping operation is present, False otherwise 

30 """ 

31 return any(op == 4 for op, _ in cigar) 

32 

33 

34def ref_aln_length(cigar): 

35 """ 

36 Calculate the length of alignment on the reference sequence from CIGAR. 

37 

38 Computes the total length consumed on the reference by summing lengths of 

39 operations that consume reference bases: M(0), D(2), N(3), =(7), X(8). 

40 

41 Args: 

42 cigar (list): CIGAR tuples from pysam AlignedSegment.cigartuples 

43 Each tuple is (operation, length) 

44 

45 Returns: 

46 int: Total length on reference sequence 

47 """ 

48 return sum(l for op, l in cigar if op in (0, 2, 3, 7, 8)) 

49 

50 

51def exopen(fil: str, mode: str = "r", *args, use_pigz=True, njobs=-1, **kwargs): 

52 """ 

53 Open a file with automatic gzip support and parallel compression. 

54 

55 This function wraps the standard open() function with automatic detection 

56 and handling of gzipped files. When writing gzipped files, parallel compression 

57 (pigz) can be used for better performance on multi-core systems. Also supports 

58 writing to stdout when fil='stdout'. 

59 

60 Args: 

61 fil (str): Path to the file to open, or 'stdout' for standard output 

62 mode (str, optional): File open mode ('r', 'w', 'rb', 'wb', etc.). 

63 Default: 'r' 

64 *args: Additional positional arguments passed to open function 

65 use_pigz (bool, optional): Whether to use pigz for parallel gzip compression. 

66 Falls back to standard gzip when pigz is unavailable or when handling 

67 multiple concurrent writers. Default: True 

68 njobs (int, optional): Number of parallel jobs for gzip compression. 

69 If -1, uses all available CPU cores. Default: -1 

70 **kwargs: Additional keyword arguments passed to open function 

71 

72 Returns: 

73 file object: Opened file handle (stdout, standard file, or gzipped file) 

74 """ 

75 if njobs == -1: 75 ↛ 77line 75 didn't jump to line 77 because the condition on line 75 was always true

76 njobs = os.cpu_count() 

77 if fil == "stdout": 

78 assert "r" not in mode, "Cannot open stdout in read mode" 

79 return nullcontext(sys.stdout) 

80 if fil.endswith(".gz"): 

81 open_func = pigz.open if use_pigz else gzip.open 

82 try: 

83 return open_func( 

84 fil, mode + "t" if not mode.endswith("b") else mode, *args, **kwargs 

85 ) 

86 except BaseException: 

87 return open_func(fil, mode + "t" if not mode.endswith("b") else mode) 

88 

89 return _open(fil, mode, *args, **kwargs)