vg
tools for working with variation graphs
Public Types | Public Member Functions | Public Attributes | Protected Member Functions | Static Protected Member Functions | Protected Attributes | List of all members
vg::Surjector Class Reference

#include <surjector.hpp>

Inheritance diagram for vg::Surjector:
vg::AlignerClient

Public Types

using path_chunk_t = pair< pair< string::const_iterator, string::const_iterator >, path_t >
 a local type that represents a read interval matched to a portion of the alignment path More...
 

Public Member Functions

 Surjector (const PathPositionHandleGraph *graph)
 
Alignment surject (const Alignment &source, const unordered_set< path_handle_t > &paths, string &path_name_out, int64_t &path_pos_out, bool &path_rev_out, bool allow_negative_scores=false, bool preserve_deletions=false) const
 
vector< Alignmentmulti_surject (const Alignment &source, const unordered_set< path_handle_t > &paths, vector< tuple< string, int64_t, bool >> &positions_out, bool allow_negative_scores=false, bool preserve_deletions=false) const
 Same as above, but include alignments to all paths instead of only the optimal one. More...
 
Alignment surject (const Alignment &source, const unordered_set< path_handle_t > &paths, bool allow_negative_scores=false, bool preserve_deletions=false) const
 
vector< Alignmentmulti_surject (const Alignment &source, const unordered_set< path_handle_t > &paths, bool allow_negative_scores=false, bool preserve_deletions=false) const
 Same as above, but include alignments to all paths instead of only the optimal one. More...
 
multipath_alignment_t surject (const multipath_alignment_t &source, const unordered_set< path_handle_t > &paths, string &path_name_out, int64_t &path_pos_out, bool &path_rev_out, bool allow_negative_scores=false, bool preserve_deletions=false) const
 
vector< multipath_alignment_tmulti_surject (const multipath_alignment_t &source, const unordered_set< path_handle_t > &paths, vector< tuple< string, int64_t, bool >> &positions_out, bool allow_negative_scores=false, bool preserve_deletions=false) const
 Same as above, but include alignments to all paths instead of only the optimal one. More...
 
template<>
int32_t get_score (const Alignment &aln)
 
template<>
int32_t get_score (const multipath_alignment_t &mp_aln)
 
- Public Member Functions inherited from vg::AlignerClient
virtual void set_alignment_scores (int8_t match, int8_t mismatch, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus)
 Set all the aligner scoring parameters and create the stored aligner instances. More...
 
virtual void set_alignment_scores (std::istream &matrix_stream, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus)
 
virtual void set_alignment_scores (const int8_t *score_matrix, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus)
 

Public Attributes

int64_t min_splice_length = 20
 the minimum length deletion that the spliced algorithm will interpret as a splice event More...
 
int64_t dominated_path_chunk_diff = 10
 
int64_t min_splice_repair_length = 250
 the minimum length apparent intron that we will try to repair More...
 
size_t max_subgraph_bases = 100 * 1024
 How big of a graph in bp should we ever try to align against for realigning surjection? More...
 
int64_t min_fold_coverage_for_downsample = 8
 in spliced surject, downsample if the base-wise average coverage by chunks is this high More...
 
int64_t downsample_coverage = 16
 while downsampling, try to get down to this coverage on each base More...
 
int64_t min_shift_for_prune = 32 * 1024
 
int64_t shift_prune_diff = 16 * 1024
 
atomic_flag warned_about_subgraph_size = ATOMIC_FLAG_INIT
 And have we complained about hitting it? More...
 
bool prune_suspicious_anchors = false
 
int64_t max_tail_anchor_prune = 4
 
double low_complexity_p_value = .001
 
size_t max_anchors = 200
 
bool annotate_with_all_path_scores = false
 
- Public Attributes inherited from vg::AlignerClient
bool adjust_alignments_for_base_quality = false
 

Protected Member Functions

void surject_internal (const Alignment *source_aln, const multipath_alignment_t *source_mp_aln, vector< Alignment > *alns_out, vector< multipath_alignment_t > *mp_alns_out, const unordered_set< path_handle_t > &paths, vector< tuple< string, int64_t, bool >> &positions_out, bool all_paths, bool allow_negative_scores, bool preserve_deletions) const
 
Alignment realigning_surject (const PathPositionHandleGraph *graph, const Alignment &source, const path_handle_t &path_handle, bool rev_strand, const vector< path_chunk_t > &path_chunks, const vector< pair< step_handle_t, step_handle_t >> &ref_chunks, pair< step_handle_t, step_handle_t > &path_range_out, bool allow_negative_scores, bool preserve_N_alignments=false, bool sinks_are_anchors=false, bool sources_are_anchors=false, vector< pair< step_handle_t, step_handle_t >> *all_path_ranges_out=nullptr) const
 
multipath_alignment_t spliced_surject (const PathPositionHandleGraph *path_position_graph, const string &src_sequence, const string &src_quality, const int32_t src_mapping_quality, const path_handle_t &path_handle, bool rev_strand, vector< path_chunk_t > &path_chunks, vector< pair< step_handle_t, step_handle_t >> &ref_chunks, vector< tuple< size_t, size_t, int32_t >> &connections, pair< step_handle_t, step_handle_t > &path_range_out, bool allow_negative_scores, bool deletions_as_splices) const
 
unordered_map< pair< path_handle_t, bool >, pair< vector< path_chunk_t >, vector< pair< step_handle_t, step_handle_t > > > > extract_overlapping_paths (const PathPositionHandleGraph *graph, const Alignment &source, const unordered_set< path_handle_t > &surjection_paths) const
 get the chunks of the alignment path that follow the given reference paths More...
 
unordered_map< pair< path_handle_t, bool >, pair< vector< path_chunk_t >, vector< pair< step_handle_t, step_handle_t > > > > extract_overlapping_paths (const PathPositionHandleGraph *graph, const multipath_alignment_t &source, const unordered_set< path_handle_t > &surjection_paths, unordered_map< pair< path_handle_t, bool >, vector< tuple< size_t, size_t, int32_t >>> &connections_out) const
 same semantics except for a multipath alignment More...
 
void filter_redundant_path_chunks (bool path_rev, vector< path_chunk_t > &path_chunks, vector< pair< step_handle_t, step_handle_t >> &ref_chunks, vector< tuple< size_t, size_t, int32_t >> &connections) const
 
pair< size_t, size_t > compute_path_interval (const PathPositionHandleGraph *graph, const Alignment &source, path_handle_t path_handle, bool rev_strand, const vector< path_chunk_t > &path_chunks, const vector< pair< step_handle_t, step_handle_t >> &ref_chunks, bool no_left_expansion, bool no_right_expansion) const
 
void set_path_position (const PathPositionHandleGraph *graph, const pos_t &init_surj_pos, const pos_t &final_surj_pos, const step_handle_t &range_begin, const step_handle_t &range_end, bool rev_strand, string &path_name_out, int64_t &path_pos_out, bool &path_rev_out) const
 use the graph position bounds and the path range bounds to assign a path position to a surjected read More...
 
template<class AlnType >
string path_score_annotations (const unordered_map< pair< path_handle_t, bool >, pair< AlnType, pair< step_handle_t, step_handle_t >>> &surjections) const
 
vector< vector< size_t > > reverse_adjacencies (const vector< vector< size_t >> &adj) const
 reverses an adjacency list More...
 
vector< size_t > connected_components (const vector< vector< size_t >> &adj, const vector< vector< size_t >> &rev_adj, size_t *num_comps_out) const
 
vector< vector< size_t > > transitive_reduction (const vector< vector< size_t >> &adj) const
 returns the transitive reduction of a topologically sorted DAG's adjacency list More...
 
vector< vector< size_t > > remove_dominated_chunks (const string &src_sequence, const vector< vector< size_t >> &adj, vector< path_chunk_t > &path_chunks, vector< pair< step_handle_t, step_handle_t >> &ref_chunks, vector< tuple< size_t, size_t, int32_t >> &connections) const
 eliminate any path chunks that have the exact same colinearities as another but are much shorter More...
 
void cut_anchors (bool rev_strand, vector< path_chunk_t > &path_chunks, vector< pair< step_handle_t, step_handle_t >> &ref_chunks, vector< tuple< size_t, size_t, int32_t >> &connections) const
 if any anchors overlap each other, cut the second at the implied overlap position More...
 
void downsample_chunks (const string &src_sequence, vector< path_chunk_t > &path_chunks, vector< pair< step_handle_t, step_handle_t >> &ref_chunks, vector< tuple< size_t, size_t, int32_t >> &connections) const
 if there are too many chunks, downsample to a given level More...
 
vector< pair< vector< size_t >, vector< size_t > > > find_constriction_bicliques (const vector< vector< size_t >> &adj, const string &src_sequence, const string &src_quality, vector< path_chunk_t > &path_chunks, vector< pair< step_handle_t, step_handle_t >> &ref_chunks, const vector< tuple< size_t, size_t, int32_t >> &connections) const
 
void prune_unconnectable (vector< vector< size_t >> &adj, vector< vector< tuple< size_t, int32_t, bool >>> &splice_adj, vector< size_t > &component, vector< vector< size_t >> &comp_groups, vector< path_chunk_t > &path_chunks, vector< pair< step_handle_t, step_handle_t >> &ref_chunks) const
 
- Protected Member Functions inherited from vg::AlignerClient
 AlignerClient (double gc_content_estimate=vg::default_gc_content)
 
const GSSWAlignerget_aligner (bool have_qualities=true) const
 
const QualAdjAlignerget_qual_adj_aligner () const
 
const Alignerget_regular_aligner () const
 

Static Protected Member Functions

static Alignment make_null_alignment (const Alignment &source)
 make a sentinel meant to indicate an unmapped read More...
 
static multipath_alignment_t make_null_mp_alignment (const string &src_sequence, const string &src_quality)
 
template<class AlnType >
static int32_t get_score (const AlnType &aln)
 

Protected Attributes

const PathPositionHandleGraphgraph = nullptr
 the graph we're surjecting onto More...
 

Additional Inherited Members

- Static Public Member Functions inherited from vg::AlignerClient
static int8_t * parse_matrix (std::istream &matrix_stream)
 Allocates an array to hold a 4x4 substitution matrix and returns it. More...
 

Member Typedef Documentation

◆ path_chunk_t

using vg::Surjector::path_chunk_t = pair<pair<string::const_iterator, string::const_iterator>, path_t>

a local type that represents a read interval matched to a portion of the alignment path

Constructor & Destructor Documentation

◆ Surjector()

vg::Surjector::Surjector ( const PathPositionHandleGraph graph)

Member Function Documentation

◆ compute_path_interval()

pair< size_t, size_t > vg::Surjector::compute_path_interval ( const PathPositionHandleGraph graph,
const Alignment source,
path_handle_t  path_handle,
bool  rev_strand,
const vector< path_chunk_t > &  path_chunks,
const vector< pair< step_handle_t, step_handle_t >> &  ref_chunks,
bool  no_left_expansion,
bool  no_right_expansion 
) const
protected

Compute the widest end-inclusive interval of path positions that the realigned sequence could align to, or an interval where start > end if there are no path chunks.

◆ connected_components()

vector< size_t > vg::Surjector::connected_components ( const vector< vector< size_t >> &  adj,
const vector< vector< size_t >> &  rev_adj,
size_t *  num_comps_out 
) const
protected

returns a vector assignming each node to a connectd component, requires both the forward and reverse adjacency lists. optionally also returns the total number of components

◆ cut_anchors()

void vg::Surjector::cut_anchors ( bool  rev_strand,
vector< path_chunk_t > &  path_chunks,
vector< pair< step_handle_t, step_handle_t >> &  ref_chunks,
vector< tuple< size_t, size_t, int32_t >> &  connections 
) const
protected

if any anchors overlap each other, cut the second at the implied overlap position

◆ downsample_chunks()

void vg::Surjector::downsample_chunks ( const string &  src_sequence,
vector< path_chunk_t > &  path_chunks,
vector< pair< step_handle_t, step_handle_t >> &  ref_chunks,
vector< tuple< size_t, size_t, int32_t >> &  connections 
) const
protected

if there are too many chunks, downsample to a given level

◆ extract_overlapping_paths() [1/2]

unordered_map< pair< path_handle_t, bool >, pair< vector< Surjector::path_chunk_t >, vector< pair< step_handle_t, step_handle_t > > > > vg::Surjector::extract_overlapping_paths ( const PathPositionHandleGraph graph,
const Alignment source,
const unordered_set< path_handle_t > &  surjection_paths 
) const
protected

get the chunks of the alignment path that follow the given reference paths

◆ extract_overlapping_paths() [2/2]

unordered_map< pair< path_handle_t, bool >, pair< vector< Surjector::path_chunk_t >, vector< pair< step_handle_t, step_handle_t > > > > vg::Surjector::extract_overlapping_paths ( const PathPositionHandleGraph graph,
const multipath_alignment_t source,
const unordered_set< path_handle_t > &  surjection_paths,
unordered_map< pair< path_handle_t, bool >, vector< tuple< size_t, size_t, int32_t >>> &  connections_out 
) const
protected

same semantics except for a multipath alignment

◆ filter_redundant_path_chunks()

void vg::Surjector::filter_redundant_path_chunks ( bool  path_rev,
vector< path_chunk_t > &  path_chunks,
vector< pair< step_handle_t, step_handle_t >> &  ref_chunks,
vector< tuple< size_t, size_t, int32_t >> &  connections 
) const
protected

remove any path chunks and corresponding ref chunks that are identical to a longer path chunk over the region where they overlap

◆ find_constriction_bicliques()

vector< pair< vector< size_t >, vector< size_t > > > vg::Surjector::find_constriction_bicliques ( const vector< vector< size_t >> &  adj,
const string &  src_sequence,
const string &  src_quality,
vector< path_chunk_t > &  path_chunks,
vector< pair< step_handle_t, step_handle_t >> &  ref_chunks,
const vector< tuple< size_t, size_t, int32_t >> &  connections 
) const
protected

returns all sets of chunks such that 1) all of chunks on the left set abut all of the chunks on the right set on the read, 2) all source-to-sink paths in the connected component go through an edge between the left and right sides, 3) all of the chunks that do not have a connection between them are fully connected (i.e. form a biclique)

◆ get_score() [1/3]

template<>
int32_t vg::Surjector::get_score ( const Alignment aln)

◆ get_score() [2/3]

template<class AlnType >
static int32_t vg::Surjector::get_score ( const AlnType &  aln)
staticprotected

◆ get_score() [3/3]

template<>
int32_t vg::Surjector::get_score ( const multipath_alignment_t mp_aln)

◆ make_null_alignment()

Alignment vg::Surjector::make_null_alignment ( const Alignment source)
staticprotected

make a sentinel meant to indicate an unmapped read

◆ make_null_mp_alignment()

multipath_alignment_t vg::Surjector::make_null_mp_alignment ( const string &  src_sequence,
const string &  src_quality 
)
staticprotected

◆ multi_surject() [1/3]

vector< Alignment > vg::Surjector::multi_surject ( const Alignment source,
const unordered_set< path_handle_t > &  paths,
bool  allow_negative_scores = false,
bool  preserve_deletions = false 
) const

Same as above, but include alignments to all paths instead of only the optimal one.

◆ multi_surject() [2/3]

vector< Alignment > vg::Surjector::multi_surject ( const Alignment source,
const unordered_set< path_handle_t > &  paths,
vector< tuple< string, int64_t, bool >> &  positions_out,
bool  allow_negative_scores = false,
bool  preserve_deletions = false 
) const

Same as above, but include alignments to all paths instead of only the optimal one.

◆ multi_surject() [3/3]

vector< multipath_alignment_t > vg::Surjector::multi_surject ( const multipath_alignment_t source,
const unordered_set< path_handle_t > &  paths,
vector< tuple< string, int64_t, bool >> &  positions_out,
bool  allow_negative_scores = false,
bool  preserve_deletions = false 
) const

Same as above, but include alignments to all paths instead of only the optimal one.

◆ path_score_annotations()

template<class AlnType >
string vg::Surjector::path_score_annotations ( const unordered_map< pair< path_handle_t, bool >, pair< AlnType, pair< step_handle_t, step_handle_t >>> &  surjections) const
protected

◆ prune_unconnectable()

void vg::Surjector::prune_unconnectable ( vector< vector< size_t >> &  adj,
vector< vector< tuple< size_t, int32_t, bool >>> &  splice_adj,
vector< size_t > &  component,
vector< vector< size_t >> &  comp_groups,
vector< path_chunk_t > &  path_chunks,
vector< pair< step_handle_t, step_handle_t >> &  ref_chunks 
) const
protected

◆ realigning_surject()

Alignment vg::Surjector::realigning_surject ( const PathPositionHandleGraph graph,
const Alignment source,
const path_handle_t path_handle,
bool  rev_strand,
const vector< path_chunk_t > &  path_chunks,
const vector< pair< step_handle_t, step_handle_t >> &  ref_chunks,
pair< step_handle_t, step_handle_t > &  path_range_out,
bool  allow_negative_scores,
bool  preserve_N_alignments = false,
bool  sinks_are_anchors = false,
bool  sources_are_anchors = false,
vector< pair< step_handle_t, step_handle_t >> *  all_path_ranges_out = nullptr 
) const
protected

◆ remove_dominated_chunks()

vector< vector< size_t > > vg::Surjector::remove_dominated_chunks ( const string &  src_sequence,
const vector< vector< size_t >> &  adj,
vector< path_chunk_t > &  path_chunks,
vector< pair< step_handle_t, step_handle_t >> &  ref_chunks,
vector< tuple< size_t, size_t, int32_t >> &  connections 
) const
protected

eliminate any path chunks that have the exact same colinearities as another but are much shorter

◆ reverse_adjacencies()

vector< vector< size_t > > vg::Surjector::reverse_adjacencies ( const vector< vector< size_t >> &  adj) const
protected

reverses an adjacency list

◆ set_path_position()

void vg::Surjector::set_path_position ( const PathPositionHandleGraph graph,
const pos_t init_surj_pos,
const pos_t final_surj_pos,
const step_handle_t range_begin,
const step_handle_t range_end,
bool  rev_strand,
string &  path_name_out,
int64_t &  path_pos_out,
bool &  path_rev_out 
) const
protected

use the graph position bounds and the path range bounds to assign a path position to a surjected read

◆ spliced_surject()

multipath_alignment_t vg::Surjector::spliced_surject ( const PathPositionHandleGraph path_position_graph,
const string &  src_sequence,
const string &  src_quality,
const int32_t  src_mapping_quality,
const path_handle_t path_handle,
bool  rev_strand,
vector< path_chunk_t > &  path_chunks,
vector< pair< step_handle_t, step_handle_t >> &  ref_chunks,
vector< tuple< size_t, size_t, int32_t >> &  connections,
pair< step_handle_t, step_handle_t > &  path_range_out,
bool  allow_negative_scores,
bool  deletions_as_splices 
) const
protected

◆ surject() [1/3]

Alignment vg::Surjector::surject ( const Alignment source,
const unordered_set< path_handle_t > &  paths,
bool  allow_negative_scores = false,
bool  preserve_deletions = false 
) const

Extract the portions of an alignment that are on a chosen set of paths and try to align realign the portions that are off of the chosen paths to the intervening path segments to obtain an alignment that is fully restricted to the paths.

Replaces the alignment's refpos with the path name, position, and strand the alignment has been surjected to.

Optionally either allow softclips so that the alignment has a nonnegative score on the path or require the full-length alignment, possibly creating a negative score.

Also optionally leaves deletions against the reference path in the final alignment (useful for splicing).

◆ surject() [2/3]

Alignment vg::Surjector::surject ( const Alignment source,
const unordered_set< path_handle_t > &  paths,
string &  path_name_out,
int64_t &  path_pos_out,
bool &  path_rev_out,
bool  allow_negative_scores = false,
bool  preserve_deletions = false 
) const

Extract the portions of an alignment that are on a chosen set of paths and try to align realign the portions that are off of the chosen paths to the intervening path segments to obtain an alignment that is fully restricted to the paths.

Also returns the path name, position, and strand of the new alignment.

Optionally either allow softclips so that the alignment has a nonnegative score on the path or require the full-length alignment, possibly creating a negative score.

Also optionally leaves deletions against the reference path in the final alignment (useful for splicing).

◆ surject() [3/3]

multipath_alignment_t vg::Surjector::surject ( const multipath_alignment_t source,
const unordered_set< path_handle_t > &  paths,
string &  path_name_out,
int64_t &  path_pos_out,
bool &  path_rev_out,
bool  allow_negative_scores = false,
bool  preserve_deletions = false 
) const

Same semantics as with alignments except that connections are always preserved as splices. The output consists of a multipath alignment with a single path, separated by splices (either from large deletions or from connections)

◆ surject_internal()

void vg::Surjector::surject_internal ( const Alignment source_aln,
const multipath_alignment_t source_mp_aln,
vector< Alignment > *  alns_out,
vector< multipath_alignment_t > *  mp_alns_out,
const unordered_set< path_handle_t > &  paths,
vector< tuple< string, int64_t, bool >> &  positions_out,
bool  all_paths,
bool  allow_negative_scores,
bool  preserve_deletions 
) const
protected

◆ transitive_reduction()

vector< vector< size_t > > vg::Surjector::transitive_reduction ( const vector< vector< size_t >> &  adj) const
protected

returns the transitive reduction of a topologically sorted DAG's adjacency list

Member Data Documentation

◆ annotate_with_all_path_scores

bool vg::Surjector::annotate_with_all_path_scores = false

◆ dominated_path_chunk_diff

int64_t vg::Surjector::dominated_path_chunk_diff = 10

◆ downsample_coverage

int64_t vg::Surjector::downsample_coverage = 16

while downsampling, try to get down to this coverage on each base

◆ graph

const PathPositionHandleGraph* vg::Surjector::graph = nullptr
protected

the graph we're surjecting onto

◆ low_complexity_p_value

double vg::Surjector::low_complexity_p_value = .001

◆ max_anchors

size_t vg::Surjector::max_anchors = 200

How many anchors (per path) will we use when surjecting using anchors? Excessive anchors will be pruned away.

◆ max_subgraph_bases

size_t vg::Surjector::max_subgraph_bases = 100 * 1024

How big of a graph in bp should we ever try to align against for realigning surjection?

◆ max_tail_anchor_prune

int64_t vg::Surjector::max_tail_anchor_prune = 4

◆ min_fold_coverage_for_downsample

int64_t vg::Surjector::min_fold_coverage_for_downsample = 8

in spliced surject, downsample if the base-wise average coverage by chunks is this high

◆ min_shift_for_prune

int64_t vg::Surjector::min_shift_for_prune = 32 * 1024

◆ min_splice_length

int64_t vg::Surjector::min_splice_length = 20

the minimum length deletion that the spliced algorithm will interpret as a splice event

◆ min_splice_repair_length

int64_t vg::Surjector::min_splice_repair_length = 250

the minimum length apparent intron that we will try to repair

◆ prune_suspicious_anchors

bool vg::Surjector::prune_suspicious_anchors = false

◆ shift_prune_diff

int64_t vg::Surjector::shift_prune_diff = 16 * 1024

◆ warned_about_subgraph_size

atomic_flag vg::Surjector::warned_about_subgraph_size = ATOMIC_FLAG_INIT
mutable

And have we complained about hitting it?


The documentation for this class was generated from the following files: