ontmont package#
Submodules#
ontmont.bundle module#
- ontmont.bundle.make_brk_supports(bundle)#
Count supports for unique breakpoint coordinates
- Parameters:
bundle (list) – List of BreakpointChain
- Returns:
pandas.Series – Count of unique breakpoints, taking chrom, pos, ori into account
- ontmont.bundle.make_brk_table(bundle, brk_supports, unilateral_score_cutoff=5, bilateral_score_cutoff=8)#
Create a dataframe of breakpoints
- Parameters:
bundle (list) – List of BreakpointChain variables
brk_supports (dict | pandas.Series) – Number of support for each breakpoint coordinate
unilateral_score_cutoff (int, optional) – Alignment score cutoff for unilateral IR. Defaults to 5.
bilateral_score_cutoff (int, optional) – Alignment score cutoff for bilateral IR. Defaults to 8.
- Returns:
pandas.DataFrame – table of breakpoint coordinates with support and IR statistics
- ontmont.bundle.make_brks_bundle(reads_df, genome, sw_palindrome, sw_holliday, margins=[15, 30, 60])#
Make a list of BreapointChain based on alignment table, genome, and alignment parameters
- Parameters:
reads_df (pandas.DataFrame) – Table of read alignment statistics
genome (pyfaidx.Fasta) – Genome fasta
sw_palindrome (swalign.LocalAlignment) – Parameters for detecting IR
sw_holliday (swalign.LocalAlignment) – Parameters for detecting homology
margins (list, optional) – Bases to slice from breakpoints. Defaults to [15, 30, 60].
- Returns:
list – list of BreakpointChain
- ontmont.bundle.make_seg_table(bundle, seg_supports, segment_score_cutoff=5)#
Create a dataframe based on a BreakpointChain bundle and supports dict
- Parameters:
bundle (list) – List of BreakpointChain variables
seg_supports (dict | pandas.Series) – Number of support for each breakpoint coordinate
segment_score_cutoff (int, optional) – Alignment score cutoff the IR found in the segment. Defaults to 5.
- Returns:
pandas.DataFrame – table of segments coordinate with supports and IR statistics
- ontmont.bundle.make_tra_table(bundle, tra_supports)#
Make a table of SVs based on bundle and number of supports
- Parameters:
bundle (list) – List of BreakpointChain
tra_supports (dict | pandas.Series) – Support count for SVs
- Returns:
pandas.DataFrame – Table of SVs, duplicate removed, removed if located on contig termini
ontmont.cli module#
ontmont.collect module#
- ontmont.collect.extract_split_alignments(reads, max_reads=500)#
Extract SplitAlignment objects from IteratorRow with a max_reads parameter
- Parameters:
reads (pysam.IteratorRow) – Reads fetched from a pysam.Alignmentfile
max_reads (int, optional) – Number of reads to extract at maximum. Defaults to 500.
- Returns:
list – list of SplitAlignment objects
- ontmont.collect.find_presence_of_matching_sv(sv1, sv2, margin=50)#
Check overlap of sv2 for sv1 table
- Parameters:
sv1 (pandas.DataFrame) – SV table to label matching SVs
sv2 (pandas.DataFrame) – SV table reference to check presence of overlap
margin (int, optional) – Margin (bp) of breakpoint coordinate difference. Defaults to 50.
- Returns:
pd.Series – {True, False} list of matches. Length equal to sv1 row size.
- ontmont.collect.fix_lower_support_coordinates(complexes, coord_map)#
Map breakpoint of lower support to close-by breakpoint with higher support
- Parameters:
complexes (list) – List of BreakpointChain
coord_map (dict) – Map of str(Breakpoint) coordinates
- Returns:
list – List of BreakpointChain, mapped to fixed coordinates
- ontmont.collect.get_breakpoint_support_from_bundle(complexes)#
Get breakpoint support count
- Parameters:
complexes (list) – List of BreakpointChain
- Returns:
collections.Counter – Support for str(Breakpoint) coordinates
- ontmont.collect.get_normalized_sv(tra)#
Sort (normalize) a BreakpointPair
- Parameters:
tra (BreakpointPair) – Pair of breakpoints
- Returns:
list – Sorted breakpoint coordinates, flattened
- ontmont.collect.get_svtype(tra: BreakpointPair)#
Get SV type string for a given BreakpointPair
- Parameters:
tra (BreakpointPair) – Paired breakpoint object
- Raises:
ValueError – If no SV type has been assigned
- Returns:
str – SV type string
- ontmont.collect.make_brks_bundle(reads_df)#
Make a list of BreapointChain based on alignment table
- Parameters:
reads_df (pandas.DataFrame) – Table of read alignment statistics
- Returns:
list – List of BreakpointChain
- ontmont.collect.make_tumor_sv_table(complexes, sv=None, margin=10, get_support=True)#
Make SV table from list of BreakpointChain
- Parameters:
complexes (list) – List of BreakpointChain
sv (pandas.DataFrame, optional) – Table of source SVs as reference for in_source flag. Defaults to None
margin (int, optional) – Margin (bp) for merging clustered breakpoints. Defaults to 10.
get_support (bool, optional) – Merge breakpoints with same coordinates and add count as support. Defaults to True.
- Returns:
pandas.DataFrame – SV table from bundle [, with in_source labels] [, collapsed by coordinate with support counts]
- ontmont.collect.map_similar_coordinate_to_higher_rank(complexes, breakpoint_support, margin=10)#
Make mapping of close-by coordinates, with breakpoints of higher support taking priority
- Parameters:
complexes (list) – List of BreakpointChain
breakpoint_support (dict | collections.Counter) – Support for breakpoint coordinates
margin (int, optional) – Margin (bp) to merge close-by coordinates. Defaults to 10.
- Returns:
tuple – tuple containing:
coord_map (
dict): src -> dst coordinatecoord_map_log (tuple): (max_coord, src_count, max_count) [only for debugging]
- ontmont.collect.normalize_sv_table(sv, chrom1_col='chromosome_1', chrom2_col='chromosome_2', pos1_col='position_1', pos2_col='position_2', ori1_col='strand_1', ori2_col='strand_2')#
Sort breakpoint1 and breakpoint2 of a SV table
- Parameters:
sv (pandas.DataFrame) – Table of SVs
chrom1_col (str, optional) – Defaults to ‘chromosome_1’.
chrom2_col (str, optional) – Defaults to ‘chromosome_2’.
pos1_col (str, optional) – Defaults to ‘position_1’.
pos2_col (str, optional) – Defaults to ‘position_2’.
ori1_col (str, optional) – Defaults to ‘strand_1’.
ori2_col (str, optional) – Defaults to ‘strand_2’.
- Returns:
pandas.DataFrame – Sorted (normalized) SV table
- ontmont.collect.pull_breakpoints_from_bam_files(bam_paths, sv, get_read_table=False)#
Get BreakpointChain list from BAM file according to an input SV table
- Parameters:
bam_paths (pysam.AlignmentFile) – BAM file
sv (pandas.DataFrame) – SV table
get_read_table (bool, optional) – Return read table as well. Defaults to False.
- Returns:
list [, pandas.DataFrame] – List of BreakpointChain [, table of read alignment stats]
- ontmont.collect.pull_breakpoints_from_reads_in_sv_regions(bam, tra, get_read_table=False, min_n_breakpoint=3, margin=10)#
Extract and append BreakpointChain objects from a bam file and a table of SVs
- Parameters:
bam (pysam.AlignmentFile) – BAM file
tra (pandas.DataFrame) – Table of SVs
get_read_table (bool, optional) – Return table of read alignment stats. Defaults to False.
min_n_breakpoint (int, optional) – Minimum number of breakpoints required to be saved. Useful in selecting complex rearrangements if the number is high. Defaults to 3.
margin (int, optional) – Margin (bp) from breakpoints to fetch reads. Defaults to 10.
- Returns:
list – List of BreakpointChain
- ontmont.collect.pull_sv_supporting_reads_from_bundle(sv, bundle)#
Filter bundle to include BreakpointChain objects that have breakpoints matching that of the input sv table
- Parameters:
sv (pandas.DataFrame) – SV table
bundle (list) – list of BreapointChain
- Returns:
list – Filtered list of BreakpointChain
ontmont.datatypes module#
- class ontmont.datatypes.Breakpoint(chrom, pos, orientation)#
Bases:
object- chroms = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', 'X', 'Y', 'M', 'chr1', 'chr2', 'chr3', 'chr4', 'chr5', 'chr6', 'chr7', 'chr8', 'chr9', 'chr10', 'chr11', 'chr12', 'chr13', 'chr14', 'chr15', 'chr16', 'chr17', 'chr18', 'chr19', 'chr20', 'chr21', 'chr22', 'chrX', 'chrY', 'chrM']#
- get_breakpoint_seqs(margin, genome)#
- class ontmont.datatypes.BreakpointChain(brks_iterable)#
Bases:
list- get_segments()#
- get_transitions(sort_transition=False)#
- class ontmont.datatypes.BreakpointPair(brk1, brk2)#
Bases:
object
- class ontmont.datatypes.Sample(long_sample)#
Bases:
object- long_samples = ['14472B_201', '14472B_202', '14472B_500', '14472B_501', '14472C_203', '14472C_204', '14472C_502', '14472D_101', '14472D_102', '14472D_103', '14472D_104', '14472D_205', '14472_100', '14472_300']#
- sample2group = {'14472B_201': '2', '14472B_202': '2', '14472B_500': '5', '14472B_501': '5', '14472C_203': '2', '14472C_204': '2', '14472C_502': '5', '14472D_101': '1', '14472D_102': '1', '14472D_103': '1', '14472D_104': '1', '14472D_205': '2', '14472_100': '1', '14472_300': '3'}#
- sample2short = {'14472B_201': '201', '14472B_202': '202', '14472B_500': '500', '14472B_501': '501', '14472C_203': '203', '14472C_204': '204', '14472C_502': '502', '14472D_101': '101', '14472D_102': '102', '14472D_103': '103', '14472D_104': '104', '14472D_205': '205', '14472_100': '100', '14472_300': '300'}#
- class ontmont.datatypes.SplitAlignment(cigarstring, read_name, refname, read_pos, strand)#
Bases:
object- extract_cigar_field()#
- static get_cigar_tuples(cigarstring)#
Returns a cigar tuple from a CIGAR string
- ontmont.datatypes.get_breakpoint_seqs(chrom, pos, margin, genome)#
ontmont.irs module#
- ontmont.irs.align_two_sequences(seq1, seq2, sw, rc=True)#
- ontmont.irs.calc_distance_score(dist1, dist2, dist_cutoff1, dist_cutoff2)#
- ontmont.irs.calc_pval_bilateral_ir(seq1, seq2, sw, src_score, dist_cutoff1, dist_cutoff2, n_iter=1000, random_seed=42)#
- ontmont.irs.calc_pval_holliday(seq1, seq2, negative1, negative2, src_score, sw, dist_cutoff1=1, dist_cutoff2=3, n_iter=1000, random_seed=42)#
- ontmont.irs.calc_pval_onesided_ir(seq, sw, direction, src_score, dist_cutoff, n_iter=1000, random_seed=42)#
- ontmont.irs.calc_pval_segmental_ir(seq1, seq2, ori1, ori2, sw, src_score, dist_cutoff1, dist_cutoff2, n_iter=1000, random_seed=42)#
- ontmont.irs.get_best_holliday_junctions(pair, sw, genome, score_cutoff=4, dist_cutoff1=2, dist_cutoff2=5, margins=[15, 30, 60])#
- ontmont.irs.get_best_ir_within_breakpoints(seq1, seq2, sw, dist_cutoff1=1, dist_cutoff2=3, margins=[15, 30, 60])#
- ontmont.irs.get_best_ir_within_segment(pair, sw, genome, dist_cutoff1=1, dist_cutoff2=3, margins=[15, 30, 60])#
- ontmont.irs.get_best_onesided_ir(seq, direction, sw, dist_cutoff=1, margins=[15, 30, 60])#
- ontmont.irs.get_breakpoint_pair_seq_data(pair)#
- ontmont.irs.get_onesided_ir(seq, sw)#
- ontmont.irs.is_holliday_junction(aln, negative1, negative2, dist_cutoff1=1, dist_cutoff2=3, dist_sum_cutoff=4, score_cutoff=4)#
- ontmont.irs.overlaps(seg1, seg2)#
ontmont.plot module#
- ontmont.plot.add_gene_annotations(ax, gtf, chromosome, start, end, genes=None, already_plotted={}, gene_font_size=8)#
- ontmont.plot.add_sv_legend_to_axes(sv_axes, svcolors, supports, alpha=0.5, loc1=(0.82, 0.5), loc2=(0.82, 0.0), size=8, alignment='left', show_support_legend=True)#
- ontmont.plot.convert_tupleproxy_to_pyranges(exon)#
- ontmont.plot.extract_vaf_from_annotated_sv_table(tra, sv, margin=10)#
- ontmont.plot.fix_coordinate_xticklabels(sv_axes)#
- ontmont.plot.fix_yticks_with_integers(cn_axes)#
- ontmont.plot.get_chrom_xlims(read_svs, read_chroms, sv_x_length_margin)#
- ontmont.plot.get_chromosomes_from_bundle(bundle)#
- ontmont.plot.get_cn_blocks_for_read_xlim(cn, read_chroms, chrom_xlims, bin_size=50000, clones=['Pseudobulk'])#
- ontmont.plot.get_edgecolor(svtype)#
- ontmont.plot.get_gene_repr_exons(gtf, gene, lenient=False)#
- ontmont.plot.get_repr_transcript_id(gtf, gene_name, lenient=False)#
- ontmont.plot.get_sv_table_chroms(sv)#
- ontmont.plot.get_transcript_exons(gtf, transcript_id)#
- ontmont.plot.get_unique_sv_with_support(bundle)#
- ontmont.plot.is_overlap(interval1, interval2)#
- ontmont.plot.make_axes_for_breakpoints(brk_chroms, figsize=(10, 4), hspace=1.5, rowsizes=(1, 4, 5, 1), modes=['vaf', 'cn', 'sv', 'gene'], width_ratios=None)#
- ontmont.plot.make_spline_coordinates(pos1, pos2, sv_y=1, margin_div=5, y_offset=0.05)#
- ontmont.plot.parse_gtf_region(gtf, region)#
- ontmont.plot.plot_cn_on_axes(cn, cn_axes, clone_colors, bin_size=50000, metric='copy', max_cn=6)#
- ontmont.plot.plot_gene_annotations(ax, gtf, chrom, input_svs, margin=50000, gene_font_size=8)#
- ontmont.plot.plot_jabba_cn(chrom_cn, ax, cn_colors, ylim=(0, 6))#
- ontmont.plot.plot_quasijabba(input_svs, cn, cn_clones=['Pseudobulk'], figsize=(12, 4), svlineh=0.8, width_ratios=None, alpha=0.3, loc1=(0.83, 0.75), default_rad=0.25, suptitle='', read_length=None, gtf=None, gene_margin=50000, bin_size=50000, linewidth=0, show_support_legend=True, gene_font_size=8, sv_x_length_margin=0.2, intra_y_offset=1, linetick_divfactor=3, ylim=(0, 10))#
- ontmont.plot.plot_segments_on_axes(plot_data, sv_axes)#
- ontmont.plot.plot_sv_lines_on_cn(plot_data, cn_axes)#
- ontmont.plot.plot_svs_to_cn_segments(input_svs, axes, chrom_cns, svlineh, alpha, _linewidth=0, intra_y_offset=1, linetick_divfactor=3, default_rad=0.25, sv_x_length_margin=0.2, sv_x_offset=100000)#
- ontmont.plot.plot_transition_on_axes(plot_data, sv_axes, alpha=0.5)#
- ontmont.plot.plot_vaf_cn_sv_for_reads(plot_data, sv, cn, clone_ids, fig_row_sizes=(15, 40, 50), cn_window=200000, cn_metric='state')#
- ontmont.plot.plot_vaf_on_axes(plot_data, vaf_axes, clone_colors, flt_ont_sv)#
ontmont.process module#
- ontmont.process.filter_sv_by_clone(sv, vaf_col_str='vaf_')#
- ontmont.process.merge_cn_segments(cn, merge_gap=100000)#
- ontmont.process.update_blocks_and_reset_prev(blocks, prev, row, features=['clone_id', 'chr', 'start', 'end', 'state'])#
ontmont.utils module#
- ontmont.utils.enumerate_breakpoints(df)#
- ontmont.utils.extract_split_alignments(bam, chroms_proc)#
- ontmont.utils.filter_breakpoints_at_contig_ends(brk_df)#
- ontmont.utils.filter_sv_with_breakpoint_at_contig_ends(df)#
- ontmont.utils.get_chromosomes_to_process(bam, drop_expression_vectors=False)#
- ontmont.utils.get_secondaries(read)#
- ontmont.utils.is_breakpoints_not_sorted(chrom1, pos1, chrom2, pos2, chrom_order)#
- ontmont.utils.make_split_read_table(alignments)#
- ontmont.utils.remove_duplicates_from_tra_table(tra_df)#
- ontmont.utils.reverse_complement(seq)#
- ontmont.utils.shuffle_seq(seq)#