vg
tools for working with variation graphs
Public Member Functions | Static Public Member Functions | Protected Member Functions | Static Protected Member Functions | Protected Attributes | Static Protected Attributes | List of all members
vg::MultipathAlignmentGraph Class Reference

#include <multipath_alignment_graph.hpp>

Public Member Functions

 MultipathAlignmentGraph (const HandleGraph &graph, MultipathMapper::memcluster_t &hits, const function< pair< id_t, bool >(id_t)> &project, const unordered_multimap< id_t, pair< id_t, bool >> &injection_trans, vector< size_t > &path_node_provenance, size_t max_branch_trim_length=0, gcsa::GCSA *gcsa=nullptr, const MultipathMapper::match_fanouts_t *fanout_breaks=nullptr)
 
 MultipathAlignmentGraph (const HandleGraph &graph, MultipathMapper::memcluster_t &hits, const unordered_map< id_t, pair< id_t, bool >> &projection_trans, vector< size_t > &path_node_provenance, size_t max_branch_trim_length=0, gcsa::GCSA *gcsa=nullptr, const MultipathMapper::match_fanouts_t *fanout_breaks=nullptr)
 Same as the previous constructor, but construct injection_trans implicitly and temporarily. More...
 
 MultipathAlignmentGraph (const HandleGraph &graph, MultipathMapper::memcluster_t &hits, const function< pair< id_t, bool >(id_t)> &project, vector< size_t > &path_node_provenance, size_t max_branch_trim_length=0, gcsa::GCSA *gcsa=nullptr, const MultipathMapper::match_fanouts_t *fanout_breaks=nullptr)
 
 MultipathAlignmentGraph (const HandleGraph &graph, const vector< pair< pair< string::const_iterator, string::const_iterator >, path_t >> &path_chunks, const Alignment &alignment, const function< pair< id_t, bool >(id_t)> &project, const unordered_multimap< id_t, pair< id_t, bool >> &injection_trans, bool realign_Ns=true, bool preserve_tail_anchors=false, vector< size_t > *path_node_provenance=nullptr)
 
 MultipathAlignmentGraph (const HandleGraph &graph, const vector< pair< pair< string::const_iterator, string::const_iterator >, path_t >> &path_chunks, const Alignment &alignment, const unordered_map< id_t, pair< id_t, bool >> &projection_trans, bool realign_Ns=true, bool preserve_tail_anchors=false, vector< size_t > *path_node_provenance=nullptr)
 Same as the previous constructor, but construct injection_trans implicitly and temporarily. More...
 
 MultipathAlignmentGraph (const HandleGraph &graph, const vector< pair< pair< string::const_iterator, string::const_iterator >, path_t >> &path_chunks, const Alignment &alignment, const function< pair< id_t, bool >(id_t)> &project, bool realign_Ns=true, bool preserve_tail_anchors=false, vector< size_t > *path_node_provenance=nullptr)
 
 MultipathAlignmentGraph (const HandleGraph &graph, const Alignment &alignment, SnarlManager *snarl_manager, SnarlDistanceIndex *dist_index, size_t max_snarl_cut_size, const function< pair< id_t, bool >(id_t)> &project, const unordered_multimap< id_t, pair< id_t, bool >> &injection_trans)
 
 MultipathAlignmentGraph (const HandleGraph &graph, const Alignment &alignment, SnarlManager *snarl_manager, SnarlDistanceIndex *dist_index, size_t max_snarl_cut_size, const unordered_map< id_t, pair< id_t, bool >> &projection_trans)
 Same as the previous constructor, but construct injection_trans implicitly and temporarily. More...
 
 MultipathAlignmentGraph (const HandleGraph &graph, const Alignment &alignment, SnarlManager *snarl_manager, SnarlDistanceIndex *dist_index, size_t max_snarl_cut_size, const function< pair< id_t, bool >(id_t)> &project)
 
 ~MultipathAlignmentGraph ()
 
void topological_sort (vector< size_t > &order_out)
 
void trim_hanging_indels (const Alignment &alignment, bool trim_Ns=true, bool preserve_tail_anchors=false)
 
void remove_transitive_edges (const vector< size_t > &topological_order)
 
void prune_to_high_scoring_paths (const Alignment &alignment, const GSSWAligner *aligner, double max_suboptimal_score_ratio, const vector< size_t > &topological_order, vector< size_t > &path_node_provenance)
 
void clear_reachability_edges ()
 
size_t count_reachability_edges () const
 Get the number of reachability edges in the graph. More...
 
void trim_to_branch_points (const HandleGraph *graph, size_t max_trim_length=1)
 
void resect_snarls_from_paths (SnarlManager *cutting_snarls, SnarlDistanceIndex *dist_index, const function< pair< id_t, bool >(id_t)> &project, int64_t max_snarl_cut_size=5)
 
void synthesize_tail_anchors (const Alignment &alignment, const HandleGraph &align_graph, const GSSWAligner *aligner, size_t min_anchor_size, size_t max_alt_alns, bool dynamic_alt_alns, size_t max_gap, double pessimistic_tail_gap_multiplier)
 
void add_reachability_edges (const HandleGraph &vg, const function< pair< id_t, bool >(id_t)> &project, const unordered_multimap< id_t, pair< id_t, bool >> &injection_trans, vector< size_t > *path_node_provenance=nullptr)
 Add edges between reachable nodes and split nodes at overlaps. More...
 
void align (const Alignment &alignment, const HandleGraph &align_graph, const GSSWAligner *aligner, bool score_anchors_as_matches, size_t max_alt_alns, bool dynamic_alt_alns, size_t max_gap, double pessimistic_tail_gap_multiplier, size_t max_tail_length, bool simplify_topologies, size_t unmergeable_len, size_t band_padding, multipath_alignment_t &multipath_aln_out, SnarlManager *cutting_snarls=nullptr, SnarlDistanceIndex *dist_index=nullptr, const function< pair< id_t, bool >(id_t)> *project=nullptr, bool allow_negative_scores=false, bool align_in_reverse=false)
 
void align (const Alignment &alignment, const HandleGraph &align_graph, const GSSWAligner *aligner, bool score_anchors_as_matches, size_t max_alt_alns, bool dynamic_alt_alns, size_t max_gap, double pessimistic_tail_gap_multiplier, size_t max_tail_length, bool simplify_topologies, size_t unmergeable_len, const function< size_t(const Alignment &, const HandleGraph &)> &band_padding_function, multipath_alignment_t &multipath_aln_out, SnarlManager *cutting_snarls=nullptr, SnarlDistanceIndex *dist_index=nullptr, const function< pair< id_t, bool >(id_t)> *project=nullptr, bool allow_negative_scores=false, bool align_in_reverse=false)
 
void to_dot (ostream &out, const Alignment *alignment=nullptr) const
 
vector< vector< id_t > > get_connected_components () const
 Get lists of the vg node IDs that participate in each connected component in the MultipathAlignmentGraph. More...
 
bool empty () const
 Does the multipath alignment graph have any nodes? More...
 
size_t size () const
 
size_t max_shift () const
 
void prune_high_shift_edges (size_t prune_diff, bool prohibit_new_sources, bool prohibit_new_sinks)
 

Static Public Member Functions

static unordered_multimap< id_t, pair< id_t, bool > > create_injection_trans (const unordered_map< id_t, pair< id_t, bool >> &projection_trans)
 
static unordered_multimap< id_t, pair< id_t, bool > > create_injection_trans (const HandleGraph &graph, const function< pair< id_t, bool >(id_t)> &project)
 
static unordered_map< id_t, pair< id_t, bool > > create_identity_projection_trans (const HandleGraph &graph)
 
static function< pair< id_t, bool >id_t)> create_projector (const unordered_map< id_t, pair< id_t, bool >> &projection_trans)
 Create a lambda function that projects using a map that projects. More...
 

Protected Member Functions

void create_path_chunk_nodes (const HandleGraph &graph, const vector< pair< pair< string::const_iterator, string::const_iterator >, path_t >> &path_chunks, const Alignment &alignment, const function< pair< id_t, bool >(id_t)> &project, const unordered_multimap< id_t, pair< id_t, bool >> &injection_trans, vector< size_t > *path_node_provenance=nullptr)
 Add the path chunks as nodes to the connectivity graph. More...
 
void create_match_nodes (const HandleGraph &graph, MultipathMapper::memcluster_t &hits, const function< pair< id_t, bool >(id_t)> &project, const unordered_multimap< id_t, pair< id_t, bool >> &injection_trans, vector< size_t > &path_node_provenance, int64_t max_branch_trim_length, const MultipathMapper::match_fanouts_t *fanout_breaks)
 Walk out MEMs into match nodes and filter out redundant sub-MEMs. More...
 
void merge_partially_redundant_match_nodes (const unordered_map< int64_t, vector< int64_t >> &node_matches, vector< size_t > &path_node_provenance)
 If path nodes partially overlap, merge the sections that overlap into a single path node. More...
 
void jitter_homopolymer_ends (const HandleGraph &graph, vector< size_t > &path_node_provenance, const MultipathMapper::memcluster_t &hits, int64_t max_branch_trim_length)
 
void collapse_order_length_runs (const HandleGraph &graph, gcsa::GCSA *gcsa, vector< size_t > &path_node_provenance)
 
void reorder_adjacency_lists (const vector< size_t > &order)
 
int64_t pessimistic_tail_gap (int64_t tail_length, double multiplier)
 
bool into_cutting_snarl (id_t node_id, bool is_rev, SnarlManager *snarl_manager, SnarlDistanceIndex *dist_index) const
 Returns true if we're pointing into a snarl that we want to cut out of paths. More...
 
vector< pair< size_t, size_t > > get_cut_segments (path_t &path, SnarlManager *cutting_snarls, SnarlDistanceIndex *dist_index, const function< pair< id_t, bool >(id_t)> &project, int64_t max_snarl_cut_size) const
 Returns the intervals of the path that lie inside of snarls. More...
 
unordered_map< bool, unordered_map< size_t, vector< Alignment > > > align_tails (const Alignment &alignment, const HandleGraph &align_graph, const GSSWAligner *aligner, size_t max_alt_alns, bool dynamic_alt_alns, size_t max_gap, double pessimistic_tail_gap_multiplier, size_t min_paths, size_t max_tail_length, unordered_set< size_t > *sources=nullptr)
 
void reverse_alignment (Alignment &aln) const
 
void add_decomposed_tail_alignments (const Alignment &alignment, const HandleGraph &align_graph, multipath_alignment_t &multipath_aln_out, unordered_set< size_t > &prohibited_merges, vector< pair< path_t, int32_t >> &shared_tail_alns, vector< vector< pair< path_t, int32_t >>> &unshared_tail_alns, size_t attachment_idx, bool to_left, size_t unmergeable_len, const GSSWAligner *aligner, SnarlManager *cutting_snarls=nullptr, SnarlDistanceIndex *dist_index=nullptr, const function< pair< id_t, bool >(id_t)> *project=nullptr)
 

Static Protected Member Functions

static bool trim_and_check_for_empty (const Alignment &alignment, bool trim_Ns, PathNode &path_node, bool preserve_tail_anchors, int64_t *removed_start_from_length=nullptr, int64_t *removed_end_from_length=nullptr)
 
static void deduplicate_alt_alns (vector< pair< path_t, int32_t >> &alt_alns, bool leftward, bool rightward)
 
static pair< path_t, int32_t > zip_alignments (vector< pair< path_t, int32_t >> &alt_alns, bool from_left, const Alignment &alignment, const HandleGraph &align_graph, string::const_iterator begin, const GSSWAligner *aligner)
 
static pair< vector< pair< path_t, int32_t > >, vector< vector< pair< path_t, int32_t > > > > decompose_alignments (const vector< pair< path_t, int32_t >> &alt_alns, const Alignment &alignment, const HandleGraph &align_graph, string::const_iterator begin, const GSSWAligner *aligner)
 

Protected Attributes

vector< PathNodepath_nodes
 Nodes representing walked MEMs in the graph. More...
 
bool has_reachability_edges = false
 

Static Protected Attributes

static thread_local unordered_map< double, vector< int64_t > > pessimistic_tail_gap_memo
 Memo for the transcendental pessimistic tail gap function (thread local to maintain thread-safety) More...
 
static const size_t tail_gap_memo_max_size = 1000
 The largest size we will memoize up to. More...
 

Constructor & Destructor Documentation

◆ MultipathAlignmentGraph() [1/9]

vg::MultipathAlignmentGraph::MultipathAlignmentGraph ( const HandleGraph graph,
MultipathMapper::memcluster_t hits,
const function< pair< id_t, bool >(id_t)> &  project,
const unordered_multimap< id_t, pair< id_t, bool >> &  injection_trans,
vector< size_t > &  path_node_provenance,
size_t  max_branch_trim_length = 0,
gcsa::GCSA *  gcsa = nullptr,
const MultipathMapper::match_fanouts_t fanout_breaks = nullptr 
)

Construct a graph of the reachability between MEMs in a DAG-ified graph. If a GCSA is specified, use it to collapse MEMs whose lengths bump up against the GCSA's order limit on MEM length. Produces a graph with reachability edges. Assumes that the cluster is sorted by primarily length and secondarily lexicographically by read interval. If a hit fails to be walked ouut in the graph, it is removed from hits.

◆ MultipathAlignmentGraph() [2/9]

vg::MultipathAlignmentGraph::MultipathAlignmentGraph ( const HandleGraph graph,
MultipathMapper::memcluster_t hits,
const unordered_map< id_t, pair< id_t, bool >> &  projection_trans,
vector< size_t > &  path_node_provenance,
size_t  max_branch_trim_length = 0,
gcsa::GCSA *  gcsa = nullptr,
const MultipathMapper::match_fanouts_t fanout_breaks = nullptr 
)

Same as the previous constructor, but construct injection_trans implicitly and temporarily.

◆ MultipathAlignmentGraph() [3/9]

vg::MultipathAlignmentGraph::MultipathAlignmentGraph ( const HandleGraph graph,
MultipathMapper::memcluster_t hits,
const function< pair< id_t, bool >(id_t)> &  project,
vector< size_t > &  path_node_provenance,
size_t  max_branch_trim_length = 0,
gcsa::GCSA *  gcsa = nullptr,
const MultipathMapper::match_fanouts_t fanout_breaks = nullptr 
)

Same as the previous constructor, but construct injection_trans implicitly and temporarily using a lambda for a projector

◆ MultipathAlignmentGraph() [4/9]

vg::MultipathAlignmentGraph::MultipathAlignmentGraph ( const HandleGraph graph,
const vector< pair< pair< string::const_iterator, string::const_iterator >, path_t >> &  path_chunks,
const Alignment alignment,
const function< pair< id_t, bool >(id_t)> &  project,
const unordered_multimap< id_t, pair< id_t, bool >> &  injection_trans,
bool  realign_Ns = true,
bool  preserve_tail_anchors = false,
vector< size_t > *  path_node_provenance = nullptr 
)

Construct a graph of the reachability between aligned chunks in a linearized path graph. Produces a graph with reachability edges.

◆ MultipathAlignmentGraph() [5/9]

vg::MultipathAlignmentGraph::MultipathAlignmentGraph ( const HandleGraph graph,
const vector< pair< pair< string::const_iterator, string::const_iterator >, path_t >> &  path_chunks,
const Alignment alignment,
const unordered_map< id_t, pair< id_t, bool >> &  projection_trans,
bool  realign_Ns = true,
bool  preserve_tail_anchors = false,
vector< size_t > *  path_node_provenance = nullptr 
)

Same as the previous constructor, but construct injection_trans implicitly and temporarily.

◆ MultipathAlignmentGraph() [6/9]

vg::MultipathAlignmentGraph::MultipathAlignmentGraph ( const HandleGraph graph,
const vector< pair< pair< string::const_iterator, string::const_iterator >, path_t >> &  path_chunks,
const Alignment alignment,
const function< pair< id_t, bool >(id_t)> &  project,
bool  realign_Ns = true,
bool  preserve_tail_anchors = false,
vector< size_t > *  path_node_provenance = nullptr 
)

Same as the previous constructor, but construct injection_trans implicitly and temporarily and using a lambda for a projector

◆ MultipathAlignmentGraph() [7/9]

vg::MultipathAlignmentGraph::MultipathAlignmentGraph ( const HandleGraph graph,
const Alignment alignment,
SnarlManager snarl_manager,
SnarlDistanceIndex *  dist_index,
size_t  max_snarl_cut_size,
const function< pair< id_t, bool >(id_t)> &  project,
const unordered_multimap< id_t, pair< id_t, bool >> &  injection_trans 
)

Make a multipath alignment graph using the path of a single-path alignment. Only one of snarl_manager and dist_index need be supplied.

◆ MultipathAlignmentGraph() [8/9]

vg::MultipathAlignmentGraph::MultipathAlignmentGraph ( const HandleGraph graph,
const Alignment alignment,
SnarlManager snarl_manager,
SnarlDistanceIndex *  dist_index,
size_t  max_snarl_cut_size,
const unordered_map< id_t, pair< id_t, bool >> &  projection_trans 
)

Same as the previous constructor, but construct injection_trans implicitly and temporarily.

◆ MultipathAlignmentGraph() [9/9]

vg::MultipathAlignmentGraph::MultipathAlignmentGraph ( const HandleGraph graph,
const Alignment alignment,
SnarlManager snarl_manager,
SnarlDistanceIndex *  dist_index,
size_t  max_snarl_cut_size,
const function< pair< id_t, bool >(id_t)> &  project 
)

Same as the previous constructor, but construct injection_trans implicitly and temporarily using a function instead of a map

◆ ~MultipathAlignmentGraph()

vg::MultipathAlignmentGraph::~MultipathAlignmentGraph ( )

Member Function Documentation

◆ add_decomposed_tail_alignments()

void vg::MultipathAlignmentGraph::add_decomposed_tail_alignments ( const Alignment alignment,
const HandleGraph align_graph,
multipath_alignment_t multipath_aln_out,
unordered_set< size_t > &  prohibited_merges,
vector< pair< path_t, int32_t >> &  shared_tail_alns,
vector< vector< pair< path_t, int32_t >>> &  unshared_tail_alns,
size_t  attachment_idx,
bool  to_left,
size_t  unmergeable_len,
const GSSWAligner aligner,
SnarlManager cutting_snarls = nullptr,
SnarlDistanceIndex *  dist_index = nullptr,
const function< pair< id_t, bool >(id_t)> *  project = nullptr 
)
protected

◆ add_reachability_edges()

void vg::MultipathAlignmentGraph::add_reachability_edges ( const HandleGraph vg,
const function< pair< id_t, bool >(id_t)> &  project,
const unordered_multimap< id_t, pair< id_t, bool >> &  injection_trans,
vector< size_t > *  path_node_provenance = nullptr 
)

Add edges between reachable nodes and split nodes at overlaps.

Get the offset in the first visited graph node at which the given MEM starts. Does not account for orientation.

Get the offset in the first visited graph node at which the given MEM ends (i.e. the past-the-end offset). Does not account for orientation.

Get the ID of the first node visited in the graph along the path for a MEM. Does not account for orientation.

Get the ID of the last node visited in the graph along the path for a MEM. Does not account for orientation.

Get the offset in the read of either the start or past-the-end position of the given MEM, according to the end flag.

Get the node ID in the graph of either the start or end position of the given MEM, according to the end flag.

◆ align() [1/2]

void vg::MultipathAlignmentGraph::align ( const Alignment alignment,
const HandleGraph align_graph,
const GSSWAligner aligner,
bool  score_anchors_as_matches,
size_t  max_alt_alns,
bool  dynamic_alt_alns,
size_t  max_gap,
double  pessimistic_tail_gap_multiplier,
size_t  max_tail_length,
bool  simplify_topologies,
size_t  unmergeable_len,
const function< size_t(const Alignment &, const HandleGraph &)> &  band_padding_function,
multipath_alignment_t multipath_aln_out,
SnarlManager cutting_snarls = nullptr,
SnarlDistanceIndex *  dist_index = nullptr,
const function< pair< id_t, bool >(id_t)> *  project = nullptr,
bool  allow_negative_scores = false,
bool  align_in_reverse = false 
)

Do intervening and tail alignments between the anchoring paths and store the result in a multipath_alignment_t. Reachability edges must be in the graph. Also, choose the band padding dynamically as a function of the inter-MEM sequence and graph. The Alignment passed must be the same Alignment that owns the sequence into which iterators were passed when the MultipathAlignmentGraph was constructed! TODO: Shouldn't the class hold a reference to the Alignment then?

Note that the output alignment may NOT be in topologically-sorted order, even if this MultipathAlignmentGraph is. You MUST sort it with topologically_order_subpaths() before trying to run DP on it.

◆ align() [2/2]

void vg::MultipathAlignmentGraph::align ( const Alignment alignment,
const HandleGraph align_graph,
const GSSWAligner aligner,
bool  score_anchors_as_matches,
size_t  max_alt_alns,
bool  dynamic_alt_alns,
size_t  max_gap,
double  pessimistic_tail_gap_multiplier,
size_t  max_tail_length,
bool  simplify_topologies,
size_t  unmergeable_len,
size_t  band_padding,
multipath_alignment_t multipath_aln_out,
SnarlManager cutting_snarls = nullptr,
SnarlDistanceIndex *  dist_index = nullptr,
const function< pair< id_t, bool >(id_t)> *  project = nullptr,
bool  allow_negative_scores = false,
bool  align_in_reverse = false 
)

Do intervening and tail alignments between the anchoring paths and store the result in a multipath_alignment_t. Reachability edges must be in the graph. The Alignment passed must be the same Alignment that owns the sequence into which iterators were passed when the MultipathAlignmentGraph was constructed! TODO: Shouldn't the class hold a reference to the Alignment then?

Note that the output alignment may NOT be in topologically-sorted order, even if this MultipathAlignmentGraph is. You MUST sort it with topologically_order_subpaths() before trying to run DP on it.

◆ align_tails()

unordered_map< bool, unordered_map< size_t, vector< Alignment > > > vg::MultipathAlignmentGraph::align_tails ( const Alignment alignment,
const HandleGraph align_graph,
const GSSWAligner aligner,
size_t  max_alt_alns,
bool  dynamic_alt_alns,
size_t  max_gap,
double  pessimistic_tail_gap_multiplier,
size_t  min_paths,
size_t  max_tail_length,
unordered_set< size_t > *  sources = nullptr 
)
protected

Generate alignments of the tails of the query sequence, beyond the sources and sinks. The Alignment passed must be the one that owns the sequence we are working on. Returns a map from tail (left=false, right=true), to a map from subpath number to all the Alignments of the tail off of that subpath. Also computes the source subpaths and adds their numbers to the given set if not null.

If a tail is longer than max_tail_length, produces an alignment softclipping it.

If dynamic alignment count is also selected, can indicate a minimum number of paths that must be in the extending graph in order to do an alignment

◆ clear_reachability_edges()

void vg::MultipathAlignmentGraph::clear_reachability_edges ( )

Clear reachability edges, so that add_reachability_edges can be run (possibly after modifying the graph).

◆ collapse_order_length_runs()

void vg::MultipathAlignmentGraph::collapse_order_length_runs ( const HandleGraph graph,
gcsa::GCSA *  gcsa,
vector< size_t > &  path_node_provenance 
)
protected

Identifies runs of exact matches that are sub-maximal because they hit the order of the GCSA index and merges them into a single node, assumes that match nodes are sorted by length and then lexicographically by read interval, does not update edges

◆ count_reachability_edges()

size_t vg::MultipathAlignmentGraph::count_reachability_edges ( ) const

Get the number of reachability edges in the graph.

◆ create_identity_projection_trans()

unordered_map< id_t, pair< id_t, bool > > vg::MultipathAlignmentGraph::create_identity_projection_trans ( const HandleGraph graph)
static

Create an identity projection translation from a DAG that did not need to be modified during dagification.

◆ create_injection_trans() [1/2]

unordered_multimap< id_t, pair< id_t, bool > > vg::MultipathAlignmentGraph::create_injection_trans ( const HandleGraph graph,
const function< pair< id_t, bool >(id_t)> &  project 
)
static

Create the constant injection translation data from a function instead of a map

◆ create_injection_trans() [2/2]

unordered_multimap< id_t, pair< id_t, bool > > vg::MultipathAlignmentGraph::create_injection_trans ( const unordered_map< id_t, pair< id_t, bool >> &  projection_trans)
static

Create the constant injection translation data, which maps a node in the original graph to every one of its occurrences in the dagified graph, by reversing the projection translation. This data is needed to construct the MultipathAlignmentGraph, and to perform some other operations on it, but is big enough that it is worth not making it a member.

◆ create_match_nodes()

void vg::MultipathAlignmentGraph::create_match_nodes ( const HandleGraph graph,
MultipathMapper::memcluster_t hits,
const function< pair< id_t, bool >(id_t)> &  project,
const unordered_multimap< id_t, pair< id_t, bool >> &  injection_trans,
vector< size_t > &  path_node_provenance,
int64_t  max_branch_trim_length,
const MultipathMapper::match_fanouts_t fanout_breaks 
)
protected

Walk out MEMs into match nodes and filter out redundant sub-MEMs.

◆ create_path_chunk_nodes()

void vg::MultipathAlignmentGraph::create_path_chunk_nodes ( const HandleGraph graph,
const vector< pair< pair< string::const_iterator, string::const_iterator >, path_t >> &  path_chunks,
const Alignment alignment,
const function< pair< id_t, bool >(id_t)> &  project,
const unordered_multimap< id_t, pair< id_t, bool >> &  injection_trans,
vector< size_t > *  path_node_provenance = nullptr 
)
protected

Add the path chunks as nodes to the connectivity graph.

◆ create_projector()

function< pair< id_t, bool >id_t)> vg::MultipathAlignmentGraph::create_projector ( const unordered_map< id_t, pair< id_t, bool >> &  projection_trans)
static

Create a lambda function that projects using a map that projects.

◆ decompose_alignments()

pair< vector< pair< path_t, int32_t > >, vector< vector< pair< path_t, int32_t > > > > vg::MultipathAlignmentGraph::decompose_alignments ( const vector< pair< path_t, int32_t >> &  alt_alns,
const Alignment alignment,
const HandleGraph align_graph,
string::const_iterator  begin,
const GSSWAligner aligner 
)
staticprotected

Identifies regions that are shared across all of the alternative alignments, and then splits those regions out into separate alignments, dividing the set of alternative alignments accordingly. Return value consists of a vector of the shared segments and a vector of vectors of the segments between. The length of the vector of shared segments is +1 of the vector of between segments, so that they are interleaved. The first and/or last of the alignments of shared segments may be empty if there is no shared prefix or suffix across all the alignments. If there are no shared segments at all, will return pair of empty vectors. Blocks of unshared paths will be sorted into descending order by score.

◆ deduplicate_alt_alns()

void vg::MultipathAlignmentGraph::deduplicate_alt_alns ( vector< pair< path_t, int32_t >> &  alt_alns,
bool  leftward,
bool  rightward 
)
staticprotected

Removes alignments that follow the same path through the graph, retaining only the highest scoring ones. If deduplicating leftward, then also removes paths that take a longer path for no greater score in the leftward direction. Vice versa for rightward. Assumes alignments are descending order by score.

◆ empty()

bool vg::MultipathAlignmentGraph::empty ( ) const

Does the multipath alignment graph have any nodes?

◆ get_connected_components()

vector< vector< id_t > > vg::MultipathAlignmentGraph::get_connected_components ( ) const

Get lists of the vg node IDs that participate in each connected component in the MultipathAlignmentGraph.

◆ get_cut_segments()

vector< pair< size_t, size_t > > vg::MultipathAlignmentGraph::get_cut_segments ( path_t path,
SnarlManager cutting_snarls,
SnarlDistanceIndex *  dist_index,
const function< pair< id_t, bool >(id_t)> &  project,
int64_t  max_snarl_cut_size 
) const
protected

Returns the intervals of the path that lie inside of snarls.

◆ into_cutting_snarl()

bool vg::MultipathAlignmentGraph::into_cutting_snarl ( id_t  node_id,
bool  is_rev,
SnarlManager snarl_manager,
SnarlDistanceIndex *  dist_index 
) const
protected

Returns true if we're pointing into a snarl that we want to cut out of paths.

◆ jitter_homopolymer_ends()

void vg::MultipathAlignmentGraph::jitter_homopolymer_ends ( const HandleGraph graph,
vector< size_t > &  path_node_provenance,
const MultipathMapper::memcluster_t hits,
int64_t  max_branch_trim_length 
)
protected

◆ max_shift()

size_t vg::MultipathAlignmentGraph::max_shift ( ) const

For a graph with reachability edges, identifies the largest difference between read interval and reference distance

◆ merge_partially_redundant_match_nodes()

void vg::MultipathAlignmentGraph::merge_partially_redundant_match_nodes ( const unordered_map< int64_t, vector< int64_t >> &  node_matches,
vector< size_t > &  path_node_provenance 
)
protected

If path nodes partially overlap, merge the sections that overlap into a single path node.

◆ pessimistic_tail_gap()

int64_t vg::MultipathAlignmentGraph::pessimistic_tail_gap ( int64_t  tail_length,
double  multiplier 
)
protected

Return the pessimistic gap length corresponding to a certain tail length and multiplier (proportional to the square root of the tail length)

◆ prune_high_shift_edges()

void vg::MultipathAlignmentGraph::prune_high_shift_edges ( size_t  prune_diff,
bool  prohibit_new_sources,
bool  prohibit_new_sinks 
)

◆ prune_to_high_scoring_paths()

void vg::MultipathAlignmentGraph::prune_to_high_scoring_paths ( const Alignment alignment,
const GSSWAligner aligner,
double  max_suboptimal_score_ratio,
const vector< size_t > &  topological_order,
vector< size_t > &  path_node_provenance 
)

Removes nodes and edges that are not part of any path that has an estimated score within some amount of the highest scoring path. Reachability edges must be present.

◆ remove_transitive_edges()

void vg::MultipathAlignmentGraph::remove_transitive_edges ( const vector< size_t > &  topological_order)

Removes all transitive edges from graph (reduces to minimum equivalent graph), except for edges between path nodes that abut either on the graph or read. These edges often correspond to overlap breaks in low complexity sequence, so retaining them improves alignment in low-complexity regions like STR expansions. Note: reorders internal representation of adjacency lists. Reachability edges must be in the graph.

◆ reorder_adjacency_lists()

void vg::MultipathAlignmentGraph::reorder_adjacency_lists ( const vector< size_t > &  order)
protected

Reorders adjacency list representation of edges so that they follow the indicated ordering of their target nodes

◆ resect_snarls_from_paths()

void vg::MultipathAlignmentGraph::resect_snarls_from_paths ( SnarlManager cutting_snarls,
SnarlDistanceIndex *  dist_index,
const function< pair< id_t, bool >(id_t)> &  project,
int64_t  max_snarl_cut_size = 5 
)

Cut the interior of snarls out of anchoring paths (and split alignment nodes accordingly) unless they are longer than the max cut size. Snarls can be stored either in a SnarlManager or a SnarlDistanceIndex (only one need be supplied).

◆ reverse_alignment()

void vg::MultipathAlignmentGraph::reverse_alignment ( Alignment aln) const
protected

◆ size()

size_t vg::MultipathAlignmentGraph::size ( ) const

◆ synthesize_tail_anchors()

void vg::MultipathAlignmentGraph::synthesize_tail_anchors ( const Alignment alignment,
const HandleGraph align_graph,
const GSSWAligner aligner,
size_t  min_anchor_size,
size_t  max_alt_alns,
bool  dynamic_alt_alns,
size_t  max_gap,
double  pessimistic_tail_gap_multiplier 
)

Do some exploratory alignments of the tails of the graph, outside the outermost existing anchors, and define new anchoring paths from them. After this, you can call resect_snarls_from_paths, in order to get better coverage of possible combinations of snarl traversals in parts of the alignment that didn't originally have anchors. Produces only perfect match anchors, so it is still safe to use score_anchors_as_matches. The Alignment passed must be the same Alignment that owns the sequence into which iterators were passed when the MultipathAlignmentGraph was constructed! TODO: Shouldn't the class hold a reference to the Alignment then?

◆ to_dot()

void vg::MultipathAlignmentGraph::to_dot ( ostream &  out,
const Alignment alignment = nullptr 
) const

Converts a MultipathAlignmentGraph to a GraphViz Dot representation, output to the given ostream. If given the Alignment query we are working on, can produce information about subpath iterators.

◆ topological_sort()

void vg::MultipathAlignmentGraph::topological_sort ( vector< size_t > &  order_out)

Fills input vector with node indices of a topological sort. Reachability edges must be in the graph.

◆ trim_and_check_for_empty()

bool vg::MultipathAlignmentGraph::trim_and_check_for_empty ( const Alignment alignment,
bool  trim_Ns,
PathNode path_node,
bool  preserve_tail_anchors,
int64_t *  removed_start_from_length = nullptr,
int64_t *  removed_end_from_length = nullptr 
)
staticprotected

Trim down the given PathNode of everything except softclips. Return true if it all gets trimmed away and should be removed. Fills in removed_start_from_length and/or removed_end_from_length with the bases in the graph removed from the path on each end during trimming, if set. If preserve tail anchors is true, then a null anchor (no bases and no path) will be preserved if the read segment corresponds to the beginning or end of the alignment sequence.

◆ trim_hanging_indels()

void vg::MultipathAlignmentGraph::trim_hanging_indels ( const Alignment alignment,
bool  trim_Ns = true,
bool  preserve_tail_anchors = false 
)

Removes non-softclip indels from path nodes. Does not update edges–should be called prior to adding computing edges. If preserve tail anchors is true, then a null anchor (no bases and no path) will be preserved if the read segment orresponds to the beginning or end of the alignment sequence.

◆ trim_to_branch_points()

void vg::MultipathAlignmentGraph::trim_to_branch_points ( const HandleGraph graph,
size_t  max_trim_length = 1 
)

Remove the ends of paths, up to a maximum length, if they cause the path to extend past a branch point in the graph.

◆ zip_alignments()

pair< path_t, int32_t > vg::MultipathAlignmentGraph::zip_alignments ( vector< pair< path_t, int32_t >> &  alt_alns,
bool  from_left,
const Alignment alignment,
const HandleGraph align_graph,
string::const_iterator  begin,
const GSSWAligner aligner 
)
staticprotected

If a list of aligned subsequences are identifical in a prefix/suffix, remove that prefix/suffix from all of the alignments and return it as a separate alignment. If there is no shared prefix/suffix, returns an empty path with 0 score.

Member Data Documentation

◆ has_reachability_edges

bool vg::MultipathAlignmentGraph::has_reachability_edges = false
protected

We keep a flag for whether the reachability edges are set. This is for error checking, and is kind of a forgery (you should just check the actual edge records), but the state system we use is confusing so we want to make sure we have lots of asserts to enforce it. If this is set and you want it unset, use clear_reachability_edges(). If this is unset and you want it set, use add_reachability_edges().

◆ path_nodes

vector<PathNode> vg::MultipathAlignmentGraph::path_nodes
protected

Nodes representing walked MEMs in the graph.

◆ pessimistic_tail_gap_memo

thread_local unordered_map< double, vector< int64_t > > vg::MultipathAlignmentGraph::pessimistic_tail_gap_memo
staticprotected

Memo for the transcendental pessimistic tail gap function (thread local to maintain thread-safety)

◆ tail_gap_memo_max_size

const size_t vg::MultipathAlignmentGraph::tail_gap_memo_max_size = 1000
staticprotected

The largest size we will memoize up to.


The documentation for this class was generated from the following files: