Coverage for src/optwps/utils.py: 90%
23 statements
« prev ^ index » next coverage.py v7.11.1, created at 2025-11-10 18:41 +0100
« prev ^ index » next coverage.py v7.11.1, created at 2025-11-10 18:41 +0100
1"""
2Utility functions for optwps package.
4This module provides helper functions for BAM file processing and file I/O operations.
5"""
7_open = open
9import os
10import sys
11import pigz
12import gzip
14from contextlib import nullcontext
17def is_soft_clipped(cigar):
18 """
19 Check if a read has soft clipping in its CIGAR string.
21 Soft clipping (op=4) indicates that some bases at the start or end of the read
22 are not aligned to the reference but are present in the sequence.
24 Args:
25 cigar (list): CIGAR tuples from pysam AlignedSegment.cigartuples
26 Each tuple is (operation, length)
28 Returns:
29 bool: True if any soft clipping operation is present, False otherwise
30 """
31 return any(op == 4 for op, _ in cigar)
34def ref_aln_length(cigar):
35 """
36 Calculate the length of alignment on the reference sequence from CIGAR.
38 Computes the total length consumed on the reference by summing lengths of
39 operations that consume reference bases: M(0), D(2), N(3), =(7), X(8).
41 Args:
42 cigar (list): CIGAR tuples from pysam AlignedSegment.cigartuples
43 Each tuple is (operation, length)
45 Returns:
46 int: Total length on reference sequence
47 """
48 return sum(l for op, l in cigar if op in (0, 2, 3, 7, 8))
51def exopen(fil: str, mode: str = "r", *args, use_pigz=True, njobs=-1, **kwargs):
52 """
53 Open a file with automatic gzip support and parallel compression.
55 This function wraps the standard open() function with automatic detection
56 and handling of gzipped files. When writing gzipped files, parallel compression
57 (pigz) can be used for better performance on multi-core systems. Also supports
58 writing to stdout when fil='stdout'.
60 Args:
61 fil (str): Path to the file to open, or 'stdout' for standard output
62 mode (str, optional): File open mode ('r', 'w', 'rb', 'wb', etc.).
63 Default: 'r'
64 *args: Additional positional arguments passed to open function
65 use_pigz (bool, optional): Whether to use pigz for parallel gzip compression.
66 Falls back to standard gzip when pigz is unavailable or when handling
67 multiple concurrent writers. Default: True
68 njobs (int, optional): Number of parallel jobs for gzip compression.
69 If -1, uses all available CPU cores. Default: -1
70 **kwargs: Additional keyword arguments passed to open function
72 Returns:
73 file object: Opened file handle (stdout, standard file, or gzipped file)
74 """
75 if njobs == -1: 75 ↛ 77line 75 didn't jump to line 77 because the condition on line 75 was always true
76 njobs = os.cpu_count()
77 if fil == "stdout":
78 assert "r" not in mode, "Cannot open stdout in read mode"
79 return nullcontext(sys.stdout)
80 if fil.endswith(".gz"):
81 open_func = pigz.open if use_pigz else gzip.open
82 try:
83 return open_func(
84 fil, mode + "t" if not mode.endswith("b") else mode, *args, **kwargs
85 )
86 except BaseException:
87 return open_func(fil, mode + "t" if not mode.endswith("b") else mode)
89 return _open(fil, mode, *args, **kwargs)