vg
tools for working with variation graphs
Classes | Typedefs | Functions | Variables
vg::algorithms Namespace Reference

Classes

class  Anchor
 
struct  Edge
 
struct  GFADuplicatePathError
 
struct  GFAFormatError
 This exception will be thrown if the GFA data is not acceptable. More...
 
struct  GFAIDMapInfo
 
class  GFAParser
 
struct  Graph
 
struct  kmer_t
 Stores a kmer in the context of a graph. More...
 
struct  Node
 
class  TracedScore
 
struct  walk_t
 Record a <=k-length walk in the context of a graph. More...
 

Typedefs

using BinnedDepthIndex = unordered_map< string, map< size_t, map< size_t, pair< float, float > >> >
 
using path_offset_collection_t = unordered_map< path_handle_t, vector< pair< size_t, bool > >>
 

Functions

template<class DistHeuristic >
vector< handle_ta_star (const HandleGraph *graph, const pos_t &pos_1, const pos_t &pos_2, const DistHeuristic &dist_heuristic, bool find_min=true, int64_t extremal_distance=numeric_limits< int64_t >::max(), bool monotonic_heuristic=true)
 
unordered_map< path_handle_t, vector< pair< size_t, bool > > > alignment_path_offsets (const PathPositionHandleGraph &graph, const Alignment &aln, bool just_min, bool nearby, size_t search_limit, const std::function< bool(const path_handle_t &)> *path_filter)
 
unordered_map< path_handle_t, vector< pair< size_t, bool > > > multipath_alignment_path_offsets (const PathPositionHandleGraph &graph, const multipath_alignment_t &mp_aln, const std::function< bool(const path_handle_t &)> *path_filter)
 
void annotate_with_initial_path_positions (const PathPositionHandleGraph &graph, Alignment &aln, size_t search_limit, const std::function< bool(const path_handle_t &)> *path_filter)
 
void annotate_with_node_path_positions (const PathPositionHandleGraph &graph, Alignment &aln, size_t search_limit, const std::function< bool(const path_handle_t &)> *path_filter)
 
void annotate_with_path_positions (const PathPositionHandleGraph &graph, Alignment &aln, bool just_min, size_t search_limit, const std::function< bool(const path_handle_t &)> *path_filter)
 
void annotate_with_initial_path_positions (const PathPositionHandleGraph &graph, vector< Alignment > &alns, size_t search_limit, const std::function< bool(const path_handle_t &)> *path_filter)
 
size_t min_approx_path_distance (const PathPositionHandleGraph *graph, const pos_t &pos1, const pos_t &pos2, uint64_t max_search)
 use the embedded paths to get an estimated minimum distance between the positions More...
 
void back_translate_in_place (const NamedNodeBackTranslation *translation, Path &path)
 
void back_translate_in_place (const NamedNodeBackTranslation *translation, Snarl &snarl)
 
void back_translate_in_place (const NamedNodeBackTranslation *translation, SnarlTraversal &traversal)
 
void back_translate_in_place (const NamedNodeBackTranslation *translation, Visit &visit)
 
ostream & operator<< (ostream &out, const Anchor &anchor)
 Explain an Anchor to the given stream. More...
 
ostream & operator<< (ostream &out, const TracedScore &value)
 Print operator. More...
 
void sort_and_shadow (const std::vector< Anchor > &items, std::vector< size_t > &indexes)
 
void sort_and_shadow (std::vector< Anchor > &items)
 
TracedScore chain_items_dp (vector< TracedScore > &best_chain_score, const VectorView< Anchor > &to_chain, const SnarlDistanceIndex &distance_index, const HandleGraph &graph, int gap_open, int gap_extension, size_t max_lookback_bases, size_t min_lookback_items, size_t lookback_item_hard_cap, size_t initial_lookback_threshold, double lookback_scale_factor, double min_good_transition_score_per_base, int item_bonus, size_t max_indel_bases)
 
vector< size_t > chain_items_traceback (const vector< TracedScore > &best_chain_score, const VectorView< Anchor > &to_chain, const TracedScore &best_past_ending_score_ever)
 
pair< int, vector< size_t > > find_best_chain (const VectorView< Anchor > &to_chain, const SnarlDistanceIndex &distance_index, const HandleGraph &graph, int gap_open, int gap_extension, size_t max_lookback_bases, size_t min_lookback_items, size_t lookback_item_hard_cap, size_t initial_lookback_threshold, double lookback_scale_factor, double min_good_transition_score_per_base, int item_bonus, size_t max_indel_bases)
 
int score_best_chain (const VectorView< Anchor > &to_chain, const SnarlDistanceIndex &distance_index, const HandleGraph &graph, int gap_open, int gap_extension)
 
size_t get_graph_distance (const Anchor &from, const Anchor &to, const SnarlDistanceIndex &distance_index, const HandleGraph &graph)
 Get distance in the graph, or std::numeric_limits<size_t>::max() if unreachable. More...
 
size_t get_read_distance (const Anchor &from, const Anchor &to)
 Get distance in the read, or std::numeric_limits<size_t>::max() if unreachable. More...
 
void traverse_components (const HandleGraph &graph, function< void(void)> &on_new_comp, function< void(handle_t)> &on_node)
 
size_t num_components (const HandleGraph &graph)
 
vector< size_t > component_sizes (const HandleGraph &graph)
 
vector< unordered_set< path_handle_t > > component_paths (const PathHandleGraph &graph)
 
template<typename Int1 , typename Int2 >
void reallocate_atomic_int_vector (vector< atomic< Int1 >> *&vec1, vector< atomic< Int2 >> *&vec2)
 
vector< unordered_set< path_handle_t > > component_paths_parallel (const PathHandleGraph &graph)
 
template<typename Collection >
size_t count_covered (Collection &segments)
 
void packed_depths (const Packer &packer, const string &path_name, size_t min_coverage, ostream &out_stream)
 
pair< double, double > packed_depth_of_bin (const Packer &packer, step_handle_t start_step, step_handle_t end_plus_one_step, size_t min_coverage, bool include_deletions)
 
vector< tuple< size_t, size_t, double, double > > binned_packed_depth (const Packer &packer, const string &path_name, size_t bin_size, size_t min_coverage, bool include_deletions)
 
BinnedDepthIndex binned_packed_depth_index (const Packer &packer, const vector< string > &path_names, size_t min_bin_size, size_t max_bin_size, double exp_growth_factor, size_t min_coverage, bool include_deletions, bool std_err)
 
pair< float, float > get_depth_from_index (const BinnedDepthIndex &depth_index, const string &path_name, size_t start_offset, size_t end_offset)
 Query index created above. More...
 
pair< double, double > sample_mapping_depth (const HandleGraph &graph, const string &input_filename, size_t max_nodes, size_t random_seed, size_t min_coverage, size_t min_mapq, const string &format)
 
pair< double, double > sample_gam_depth (const HandleGraph &graph, const vector< Alignment > &alignments, size_t max_nodes, size_t random_seed, size_t min_coverage, size_t min_mapq)
 
void path_depths (const PathHandleGraph &graph, const string &path_name, size_t min_coverage, bool count_cycles, ostream &out_stream)
 
pair< double, double > path_depth_of_bin (const PathHandleGraph &graph, step_handle_t start_step, step_handle_t end_plus_one_step, size_t min_coverage, bool count_cycles)
 like packed_depth_of_bin (above), but use paths (as in path_depths) for measuring coverage More...
 
vector< tuple< size_t, size_t, double, double > > binned_path_depth (const PathHandleGraph &graph, const string &path_name, size_t bin_size, size_t min_coverage, bool count_cycles)
 
pair< double, double > sample_mapping_depth (const HandleGraph &graph, const vector< Alignment > &alignments, size_t max_nodes, size_t random_seed, size_t min_coverage, size_t min_mapq)
 As above, but read a vector instead of a stream. More...
 
void dfs (const HandleGraph &graph, const function< void(const handle_t &)> &handle_begin_fn, const function< void(const handle_t &)> &handle_end_fn, const function< bool(void)> &break_fn, const function< void(const edge_t &)> &edge_fn, const function< void(const edge_t &)> &tree_fn, const function< void(const edge_t &)> &edge_curr_fn, const function< void(const edge_t &)> &edge_cross_fn, const vector< handle_t > &sources, const unordered_set< handle_t > &sinks)
 
void dfs (const HandleGraph &graph, const function< void(const handle_t &)> &handle_begin_fn, const function< void(const handle_t &)> &handle_end_fn, const vector< handle_t > &sources, const unordered_set< handle_t > &sinks)
 
void dfs (const HandleGraph &graph, const function< void(const handle_t &)> &handle_begin_fn, const function< void(const handle_t &)> &handle_end_fn, const function< bool(void)> &break_fn)
 
list< bdsg::HashGraph > disjoint_components (const HandleGraph &graph)
 
bool is_head_node (handle_t h, const HandleGraph *g)
 
int32_t distance_to_head (handle_t h, int32_t limit, const HandleGraph *graph)
 
int32_t distance_to_head (handle_t h, int32_t limit, int32_t dist, unordered_set< handle_t > &seen, const HandleGraph *graph)
 
bool is_tail_node (handle_t h, const HandleGraph *g)
 
int32_t distance_to_tail (handle_t h, int32_t limit, const HandleGraph *graph)
 
int32_t distance_to_tail (handle_t h, int32_t limit, int32_t dist, unordered_set< handle_t > &seen, const HandleGraph *graph)
 
void expand_context_by_steps (const HandleGraph *source, MutableHandleGraph *subgraph, int64_t dist, bool expand_forward, bool expand_backward)
 
void expand_context_by_length (const HandleGraph *source, MutableHandleGraph *subgraph, int64_t dist, bool expand_forward, bool expand_backward)
 
void expand_context (const HandleGraph *source, MutableHandleGraph *subgraph, int64_t dist, bool use_steps, bool expand_forward, bool expand_backward)
 
void expand_context_with_paths (const PathHandleGraph *source, MutablePathMutableHandleGraph *subgraph, int64_t dist, bool use_steps, bool expand_forward, bool expand_backward)
 
unordered_map< id_t, id_textract_connecting_graph (const HandleGraph *source, DeletableHandleGraph *into, int64_t max_len, pos_t pos_1, pos_t pos_2, bool strict_max_len)
 
void extract_containing_graph (const HandleGraph *source, MutableHandleGraph *into, const vector< pos_t > &positions, const vector< size_t > &forward_search_lengths, const vector< size_t > &backward_search_lengths, size_t reversing_walk_length)
 
void extract_containing_graph (const HandleGraph *source, MutableHandleGraph *into, const vector< pos_t > &positions, size_t max_dist, size_t reversing_walk_length)
 
void extract_containing_graph (const HandleGraph *source, MutableHandleGraph *into, const vector< pos_t > &positions, const vector< size_t > &position_max_dist, size_t reversing_walk_length)
 
unordered_map< id_t, id_textract_extending_graph (const HandleGraph *source, DeletableHandleGraph *into, int64_t max_dist, pos_t pos, bool backward, bool preserve_cycles_on_src_node)
 
const gbwt::GBWT * find_gbwt (const HandleGraph *graph)
 
const gbwt::GBWT * find_gbwt (const HandleGraph *graph, std::unique_ptr< gbwt::GBWT > &holder, const std::string &filename)
 
const gbwtgraph::GBWTGraph * find_gbwtgraph (const HandleGraph *graph)
 
const NamedNodeBackTranslationfind_translation (const HandleGraph *graph)
 
void gfa_to_handle_graph (const string &filename, MutableHandleGraph *graph, GFAIDMapInfo *translation)
 
void gfa_to_handle_graph (const string &filename, MutableHandleGraph *graph, const string &translation_filename)
 Overload which serializes its translation to a file internally. More...
 
void gfa_to_handle_graph (istream &in, MutableHandleGraph *graph, GFAIDMapInfo *translation=nullptr)
 Load a GFA from a stream (assumed not to be seekable or reopenable) into a HandleGraph. More...
 
void gfa_to_path_handle_graph (const string &filename, MutablePathMutableHandleGraph *graph, GFAIDMapInfo *translation=nullptr, int64_t max_rgfa_rank=numeric_limits< int64_t >::max(), unordered_set< PathSense > *ignore_sense=nullptr)
 Same as gfa_to_handle_graph but also adds path elements from the GFA to the graph. More...
 
void gfa_to_path_handle_graph (const string &filename, MutablePathMutableHandleGraph *graph, int64_t max_rgfa_rank, const string &translation_filename, unordered_set< PathSense > *ignore_sense=nullptr)
 Overload which serializes its translation to a file internally. More...
 
void gfa_to_path_handle_graph (istream &in, MutablePathMutableHandleGraph *graph, GFAIDMapInfo *translation=nullptr, int64_t max_rgfa_rank=numeric_limits< int64_t >::max(), unordered_set< PathSense > *ignore_sense=nullptr)
 Load a GFA from a stream (assumed not to be seekable or reopenable) into a PathHandleGraph. More...
 
vector< handle_tid_order (const HandleGraph *g)
 
bool intersect_path_offsets (const path_offset_collection_t &a_offsets, const path_offset_collection_t &b_offsets, size_t maximum_distance)
 
void sort_path_offsets (path_offset_collection_t &offsets)
 
vector< pos_tjump_along_closest_path (const PathPositionHandleGraph *graph, const pos_t &pos, int64_t jump_dist, size_t max_search_dist)
 
pair< double, vector< handle_t > > widest_dijkstra (const HandleGraph *g, handle_t source, handle_t sink, function< double(const handle_t &)> node_weight_callback, function< double(const edge_t &)> edge_weight_callback, function< bool(const handle_t &)> is_node_ignored_callback, function< bool(const edge_t &)> is_edge_ignored_callback, bool greedy_avg)
 
vector< pair< double, vector< handle_t > > > yens_k_widest_paths (const HandleGraph *g, handle_t source, handle_t sink, size_t K, function< double(const handle_t &)> node_weight_callback, function< double(const edge_t &)> edge_weight_callback, bool greedy_avg, size_t max_path_length)
 
void for_each_kmer (const HandleGraph &graph, size_t k, size_t edge_max, const std::function< void(const kmer_t &)> &lambda)
 Iterate over all the kmers in the graph, running lambda on each. More...
 
std::ostream & operator<< (std::ostream &out, const kmer_t &kmer)
 Print a kmer_t to a stream. More...
 
void locally_expand_graph (const HandleGraph &parent, MutableHandleGraph &subgraph, handle_t from, int64_t max_dist)
 
void merge (handlegraph::MutablePathDeletableHandleGraph *graph, const vector< pair< handle_t, size_t >> &start, size_t length)
 
pair< vector< unordered_set< size_t > >, size_t > kargers_min_cut (Graph graph, const int seed)
 
pair< vector< unordered_set< size_t > >, size_t > compute_min_cut (Graph graph, const int seed)
 
vector< unordered_set< size_t > > min_cut_decomposition (Graph graph, const int seed)
 
path_offset_collection_t nearest_offsets_in_paths (const PathPositionHandleGraph *graph, const pos_t &pos, int64_t max_search, const std::function< bool(const path_handle_t &)> *path_filter)
 
map< string, vector< pair< size_t, bool > > > offsets_in_paths (const PathPositionHandleGraph *graph, const pos_t &pos)
 
path_offset_collection_t simple_offsets_in_paths (const PathPositionHandleGraph *graph, pos_t pos)
 A "simple" model for path position getting for debugging. More...
 
map< pos_t, char > next_pos_chars (const PathPositionHandleGraph &graph, pos_t pos)
 
void normalize (handlegraph::MutablePathDeletableHandleGraph *graph, int max_iter, bool debug, function< bool(const handle_t &, const handle_t &)> can_merge)
 
std::function< size_t(const Alignment &, const HandleGraph &)> pad_band_random_walk_internal (double band_padding_multiplier, size_t band_padding_memo_size, size_t max_padding, const std::function< size_t(const Alignment &, const HandleGraph &)> size_function)
 
std::function< size_t(const Alignment &, const HandleGraph &)> pad_band_random_walk (double band_padding_multiplier, size_t band_padding_memo_size, size_t max_padding)
 
std::function< size_t(const Alignment &, const HandleGraph &)> pad_band_min_random_walk (double band_padding_multiplier, size_t band_padding_memo_size, size_t max_padding)
 
std::function< size_t(const Alignment &, const HandleGraph &)> pad_band_constant (size_t band_padding)
 
void append_mapping_sequence (const Mapping &m, const string &node_seq, string &seq)
 use the given oriented node sequence and the mapping to reconstruct the sequence represented by the mapping More...
 
string path_string (const HandleGraph &graph, const Path &path)
 use the given graph and the path to determine our path string More...
 
pair_hash_set< edge_tfind_edges_to_prune (const HandleGraph &graph, size_t k, size_t edge_max)
 
size_t prune_complex (DeletableHandleGraph &graph, int path_length, int edge_max)
 
size_t prune_complex_with_head_tail (DeletableHandleGraph &graph, int path_length, int edge_max)
 
size_t prune_short_subgraphs (DeletableHandleGraph &graph, int min_size)
 
size_t remove_high_degree_nodes (DeletableHandleGraph &g, int max_degree)
 
void prune_to_connecting_graph (DeletableHandleGraph &graph, const handle_t &from, const handle_t &to)
 Remove all parts of the graph that are not on some path between the two handles. More...
 
int64_t ref_path_distance (const PathPositionHandleGraph *graph, const pos_t &pos_1, const pos_t &pos_2, const unordered_set< path_handle_t > &ref_paths, int64_t max_search_dist)
 
size_t bellman_ford_shortest_cycle_length (const HandleGraph *graph, const handle_t &source, const vector< handle_t > &layout, const unordered_map< handle_t, size_t > &handle_index, const vector< pair< size_t, size_t >> &feedback_edges)
 
size_t dijkstra_shortest_cycle_length (const HandleGraph *graph, const handle_t &source)
 Simple Dijkstra implementation that computes shortest cycle. More...
 
size_t shortest_cycle_length_internal (const HandleGraph *graph, const handle_t &source, const vector< handle_t > &layout, const unordered_map< handle_t, size_t > &handle_index, const vector< pair< size_t, size_t >> &feedback_edges)
 
size_t shortest_cycle_length (const HandleGraph *graph, const handle_t &source)
 
size_t shortest_cycle_length (const HandleGraph *graph)
 
bool simplify_siblings (handlegraph::MutablePathDeletableHandleGraph *graph, function< bool(const handle_t &, const handle_t &)> can_merge)
 
vector< pair< id_t, id_t > > sorted_id_ranges (const HandleGraph *graph)
 Get a sorted list of inclusive ranges of IDs used in the given HandleGraph. More...
 
void expand_subgraph_by_steps (const HandleGraph &source, MutableHandleGraph &subgraph, const uint64_t &steps, bool forward_only=false)
 expand the subgraph iteratively for this many steps More...
 
void expand_subgraph_to_node_count (const HandleGraph &source, MutableHandleGraph &subgraph, const uint64_t &node_count, bool forward_only=false)
 expand the subgraph iteratively until its node count is at least node_count More...
 
void expand_subgraph_by_length (const HandleGraph &source, MutableHandleGraph &subgraph, const uint64_t &length, bool forward_only=false)
 expand the subgraph iteratively to include at least length new sequence More...
 
void expand_subgraph_to_length (const HandleGraph &source, MutableHandleGraph &subgraph, const uint64_t &length, bool forward_only=false)
 expand the subgraph iterativel until its total sequence length is greater than length More...
 
void extract_context (const HandleGraph &source, MutableHandleGraph &subgraph, const handle_t &handle, const uint64_t &offset, const uint64_t &length, bool fwd, bool rev)
 expand the context around a single handle position More...
 
void extract_id_range (const HandleGraph &source, const nid_t &id1, const nid_t &id2, MutableHandleGraph &subgraph)
 extract the node id range More...
 
void extract_path_range (const PathPositionHandleGraph &source, path_handle_t path_handle, int64_t start, int64_t end, MutableHandleGraph &subgraph)
 
void add_subpaths_to_subgraph (const PathPositionHandleGraph &source, MutablePathHandleGraph &subgraph)
 
void add_connecting_edges_to_subgraph (const HandleGraph &source, MutableHandleGraph &subgraph)
 
void three_edge_connected_component_merges_dense (size_t node_count, size_t first_root, const function< void(size_t, const function< void(size_t)> &)> &for_each_connected_node, const function< void(size_t, size_t)> &same_component)
 
void three_edge_connected_components_dense (size_t node_count, size_t first_root, const function< void(size_t, const function< void(size_t)> &)> &for_each_connected_node, const function< void(const function< void(const function< void(size_t)> &)> &)> &component_callback)
 
void three_edge_connected_components_dense_cactus (size_t node_count, const function< void(size_t, const function< void(size_t)> &)> &for_each_connected_node, const function< void(const function< void(const function< void(size_t)> &)> &)> &component_callback)
 
template<typename TECCNode >
void three_edge_connected_components (const function< void(const function< void(TECCNode)> &)> &for_each_node, const function< void(TECCNode, const function< void(TECCNode)> &)> &for_each_connected_node, const function< void(const function< void(const function< void(TECCNode)> &)> &)> &component_callback)
 
template<typename TECCNode >
void three_edge_connected_component_merges (const function< void(const function< void(TECCNode)> &)> &for_each_node, const function< void(TECCNode, const function< void(TECCNode)> &)> &for_each_connected_node, const function< void(TECCNode, TECCNode)> &same_component)
 
void for_each_walk (const HandleGraph &graph, size_t k, size_t edge_max, const std::function< void(const walk_t &)> &lambda)
 Iterate over all the walks in the graph, running lambda on each. More...
 
std::ostream & operator<< (std::ostream &out, const walk_t &walk)
 Print a walk_t to a stream. More...
 
uint64_t walk_haplotype_frequency (const HandleGraph &graph, const gbwt::GBWT &haplotypes, const walk_t &walk)
 
std::vector< std::string > walk_haplotype_names (const HandleGraph &graph, const gbwt::GBWT &haplotypes, const walk_t &walk)
 

Variables

std::function< size_t(const Alignment &, const HandleGraph &)> pad_band_random_walk (double band_padding_multiplier=1.0, size_t band_padding_memo_size=2000, size_t max_padding=std::numeric_limits< size_t >::max())
 
std::function< size_t(const Alignment &, const HandleGraph &)> pad_band_min_random_walk (double band_padding_multiplier=1.0, size_t band_padding_memo_size=2000, size_t max_padding=std::numeric_limits< size_t >::max())
 
std::function< size_t(const Alignment &, const HandleGraph &)> pad_band_constant (size_t band_padding)
 Get a band padding function that uses a constant value. More...
 
constexpr size_t PRUNE_THREAD_BUFFER_SIZE = 1024 * 1024
 

Typedef Documentation

◆ BinnedDepthIndex

using vg::algorithms::BinnedDepthIndex = typedef unordered_map<string, map<size_t, map<size_t, pair<float, float> >> >

Use the above function to retrieve the binned depths of a list of paths, and store them indexed by start coordinate. If std_err is true, store <mean, stderr> instead of <mean, variance> For each path, a series of indexes is computed, for bin sizes from min_bin_size, min_bin_size^(exp_growth_factor), etc.

◆ path_offset_collection_t

using vg::algorithms::path_offset_collection_t = typedef unordered_map<path_handle_t, vector<pair<size_t, bool> >>

Represents a set of positions and orientations, along a collection of paths. Positions and orientations may or may not be stored sorted.

Function Documentation

◆ a_star()

template<class DistHeuristic >
vector< handle_t > vg::algorithms::a_star ( const HandleGraph graph,
const pos_t pos_1,
const pos_t pos_2,
const DistHeuristic &  dist_heuristic,
bool  find_min = true,
int64_t  extremal_distance = numeric_limits<int64_t>::max(),
bool  monotonic_heuristic = true 
)

Implements the A* heuristic-guided search algorithm. Returns the path from pos_1 to pos_2 that is either minimal or maximal length, according to the parameters. Allows an extremal distance beyond which the algorithm will cease looking for paths (this should be a large value when looking for minimal paths and a small value when looking for maximum paths). If there is no path between the positions, or none within the extremal length, an empty vector will be returned.

◆ add_connecting_edges_to_subgraph()

void vg::algorithms::add_connecting_edges_to_subgraph ( const HandleGraph source,
MutableHandleGraph subgraph 
)

We can accumulate a subgraph without accumulating all the edges between its nodes this helper ensures that we get the full set

◆ add_subpaths_to_subgraph()

void vg::algorithms::add_subpaths_to_subgraph ( const PathPositionHandleGraph source,
MutablePathHandleGraph subgraph 
)

Add subpaths to the subgraph for all paths visiting its nodes in the source graph.

Always generates correct path metadata, and a path for each contiguous fragment of any base path. Assumes the source graph does not contain any overlapping path fragments on a given base path, and that the subgraph does not already contain any paths on a base path also present in the source graph.

◆ alignment_path_offsets()

unordered_map< path_handle_t, vector< pair< size_t, bool > > > vg::algorithms::alignment_path_offsets ( const PathPositionHandleGraph graph,
const Alignment aln,
bool  just_min,
bool  nearby,
size_t  search_limit = 0,
const std::function< bool(const path_handle_t &)> *  path_filter = nullptr 
)

Gives the path positions of the alignment. If just_min is set, gives the minimum position on each path. Else, gives all Mapping start positions on each path. If nearby is set, will search for a nearby path. Will recurse with nearby set if it is not set on initial call and no positions are found. Respects search_limit in bp in that case. If search_limit is 0, read length is used.

If path_filter is set, and it returns false for a path, that path is not used to annotate the read.

◆ annotate_with_initial_path_positions() [1/2]

void vg::algorithms::annotate_with_initial_path_positions ( const PathPositionHandleGraph graph,
Alignment aln,
size_t  search_limit = 0,
const std::function< bool(const path_handle_t &)> *  path_filter = nullptr 
)

Use the graph to annotate an Alignment with the first position it touches on each reference path. Thread safe.

search_limit gives the maximum distance to search for a path if the alignment does not actually touch any paths. If 0, the alignment's sequence length is used.

If path_filter is set, and it returns false for a path, that path is not used to annotate the read.

◆ annotate_with_initial_path_positions() [2/2]

void vg::algorithms::annotate_with_initial_path_positions ( const PathPositionHandleGraph graph,
vector< Alignment > &  aln,
size_t  search_limit = 0,
const std::function< bool(const path_handle_t &)> *  path_filter = nullptr 
)

Use the graph annotate Alignments with the first position they touch on each reference path. Thread safe.

search_limit gives the maximum distance to search for a path if the alignment does not actually touch any paths. If 0, the alignment's sequence length is used.

If path_filter is set, and it returns false for a path, that path is not used to annotate the read.

◆ annotate_with_node_path_positions()

void vg::algorithms::annotate_with_node_path_positions ( const PathPositionHandleGraph graph,
Alignment aln,
size_t  search_limit = 0,
const std::function< bool(const path_handle_t &)> *  path_filter = nullptr 
)

Use the graph to annotate an Alignment with the first position it touches on each node it visits in each reference path. Thread safe. If no nodes on any path are visited, searches for a nearby path position to use.

search_limit gives the maximum distance to search for a path if the alignment does not actually touch any paths. If 0, the alignment's sequence length is used.

If path_filter is set, and it returns false for a path, that path is not used to annotate the read.

◆ annotate_with_path_positions()

void vg::algorithms::annotate_with_path_positions ( const PathPositionHandleGraph graph,
Alignment aln,
bool  just_min,
size_t  search_limit = 0,
const std::function< bool(const path_handle_t &)> *  path_filter = nullptr 
)

Use the graph to annotate an Alignment with positions on each reference path. Thread safe.

If just_min is set, gives the minimum position on each path. Else, gives all Mapping start positions on each path. If no positions on the path are found, looks for nearby path positions in graph space. Respects search_limit in bp in that case. If search_limit is 0, read length is used.

If path_filter is set, and it returns false for a path, that path is not used to annotate the read.

◆ append_mapping_sequence()

void vg::algorithms::append_mapping_sequence ( const Mapping m,
const string &  node_seq,
string &  seq 
)

use the given oriented node sequence and the mapping to reconstruct the sequence represented by the mapping

◆ back_translate_in_place() [1/4]

void vg::algorithms::back_translate_in_place ( const NamedNodeBackTranslation translation,
Path path 
)

Translate the given Path in place from node ID space to named segment space.

◆ back_translate_in_place() [2/4]

void vg::algorithms::back_translate_in_place ( const NamedNodeBackTranslation translation,
Snarl snarl 
)

Translate the given Snarl in place from node ID space to named segment space.

◆ back_translate_in_place() [3/4]

void vg::algorithms::back_translate_in_place ( const NamedNodeBackTranslation translation,
SnarlTraversal traversal 
)

Translate the given SnarlTraversal in place from node ID space to named segment space. Visits that end up being to snarls where both boundaries are from the same orientation of the same segment will be removed. Multiple visits in a row to the same orientation of the same segment will be elided.

TODO: Work out a way to preserve traversals around cycles while not repeating ourselves for visits to diffrent pieces of the same chopped segment.

◆ back_translate_in_place() [4/4]

void vg::algorithms::back_translate_in_place ( const NamedNodeBackTranslation translation,
Visit visit 
)

Translate the given Visit in place from node ID space to named segment space.

◆ bellman_ford_shortest_cycle_length()

size_t vg::algorithms::bellman_ford_shortest_cycle_length ( const HandleGraph graph,
const handle_t source,
const vector< handle_t > &  layout,
const unordered_map< handle_t, size_t > &  handle_index,
const vector< pair< size_t, size_t >> &  feedback_edges 
)

An implementation of Bellman-Ford with Yen's ordering improvement applied to a layout ideally has a small feedback arc set

◆ binned_packed_depth()

vector< tuple< size_t, size_t, double, double > > vg::algorithms::binned_packed_depth ( const Packer packer,
const string &  path_name,
size_t  bin_size,
size_t  min_coverage,
bool  include_deletions 
)

Use all available threads to estimate the binned packed coverage of a path using above fucntion Each element is a bin's 0-based open-ended interval in the path, and its coverage mean,variance.

◆ binned_packed_depth_index()

BinnedDepthIndex vg::algorithms::binned_packed_depth_index ( const Packer packer,
const vector< string > &  path_names,
size_t  min_bin_size,
size_t  max_bin_size,
double  exp_growth_factor,
size_t  min_coverage,
bool  include_deletions,
bool  std_err 
)

◆ binned_path_depth()

vector< tuple< size_t, size_t, double, double > > vg::algorithms::binned_path_depth ( const PathHandleGraph graph,
const string &  path_name,
size_t  bin_size,
size_t  min_coverage,
bool  count_cycles 
)

◆ chain_items_dp()

TracedScore vg::algorithms::chain_items_dp ( vector< TracedScore > &  best_chain_score,
const VectorView< Anchor > &  to_chain,
const SnarlDistanceIndex &  distance_index,
const HandleGraph graph,
int  gap_open,
int  gap_extension,
size_t  max_lookback_bases = 150,
size_t  min_lookback_items = 0,
size_t  lookback_item_hard_cap = 100,
size_t  initial_lookback_threshold = 10,
double  lookback_scale_factor = 2.0,
double  min_good_transition_score_per_base = -0.1,
int  item_bonus = 0,
size_t  max_indel_bases = 100 
)

Fill in the given DP table for the best chain score ending with each item. Returns the best observed score overall from that table, with provenance to its location in the table, if tracked in the type. Assumes some items exist.

Input items must be sorted by start position in the read.

Takes the given per-item bonus for each item collected.

Uses a finite lookback in items and in read bases when checking where we can come from to reach an item. Also, once a given number of good-looking predecessor items have been found, stop looking back.

Limits transitions to those involving indels of the given size or less, to avoid very bad transitions.

◆ chain_items_traceback()

vector< size_t > vg::algorithms::chain_items_traceback ( const vector< TracedScore > &  best_chain_score,
const VectorView< Anchor > &  to_chain,
const TracedScore best_past_ending_score_ever 
)

Trace back through in the given DP table from the best chain score.

◆ component_paths()

vector< unordered_set< path_handle_t > > vg::algorithms::component_paths ( const PathHandleGraph graph)

◆ component_paths_parallel()

vector< unordered_set< path_handle_t > > vg::algorithms::component_paths_parallel ( const PathHandleGraph graph)

◆ component_sizes()

vector< size_t > vg::algorithms::component_sizes ( const HandleGraph graph)

◆ compute_min_cut()

pair< vector< unordered_set< size_t > >, size_t > vg::algorithms::compute_min_cut ( Graph  graph,
const int  seed 
)

◆ count_covered()

template<typename Collection >
size_t vg::algorithms::count_covered ( Collection &  segments)

Count, from begin to end, the number of positions covered by ranges in the given collection. The collection will be sorted in place.

The collection must be a have a begin(), end(), and random [] access (like a vector).

No boundaries are needed because no positions can be covered without segments representing them.

◆ dfs() [1/3]

void vg::algorithms::dfs ( const HandleGraph graph,
const function< void(const handle_t &)> &  handle_begin_fn,
const function< void(const handle_t &)> &  handle_end_fn,
const function< bool(void)> &  break_fn 
)

◆ dfs() [2/3]

void vg::algorithms::dfs ( const HandleGraph graph,
const function< void(const handle_t &)> &  handle_begin_fn,
const function< void(const handle_t &)> &  handle_end_fn,
const function< bool(void)> &  break_fn,
const function< void(const edge_t &)> &  edge_fn,
const function< void(const edge_t &)> &  tree_fn,
const function< void(const edge_t &)> &  edge_curr_fn,
const function< void(const edge_t &)> &  edge_cross_fn,
const vector< handle_t > &  sources,
const unordered_set< handle_t > &  sinks 
)

◆ dfs() [3/3]

void vg::algorithms::dfs ( const HandleGraph graph,
const function< void(const handle_t &)> &  handle_begin_fn,
const function< void(const handle_t &)> &  handle_end_fn,
const vector< handle_t > &  sources,
const unordered_set< handle_t > &  sinks 
)

◆ dijkstra_shortest_cycle_length()

size_t vg::algorithms::dijkstra_shortest_cycle_length ( const HandleGraph graph,
const handle_t source 
)

Simple Dijkstra implementation that computes shortest cycle.

◆ disjoint_components()

list< bdsg::HashGraph > vg::algorithms::disjoint_components ( const HandleGraph graph)

Return a list of graphs, one for each connected component in the original graph. Node IDs are preserved from the original graph.

◆ distance_to_head() [1/2]

int32_t vg::algorithms::distance_to_head ( handle_t  h,
int32_t  limit,
const HandleGraph graph 
)

◆ distance_to_head() [2/2]

int32_t vg::algorithms::distance_to_head ( handle_t  h,
int32_t  limit,
int32_t  dist,
unordered_set< handle_t > &  seen,
const HandleGraph graph 
)

Get the distance in bases from start of node to start of closest head node of graph, or -1 if that distance exceeds the limit. dist increases by the number of bases of each previous node until you reach the head node seen is a set that holds the nodes that you have already gotten the distance of, but starts off empty

◆ distance_to_tail() [1/2]

int32_t vg::algorithms::distance_to_tail ( handle_t  h,
int32_t  limit,
const HandleGraph graph 
)

◆ distance_to_tail() [2/2]

int32_t vg::algorithms::distance_to_tail ( handle_t  h,
int32_t  limit,
int32_t  dist,
unordered_set< handle_t > &  seen,
const HandleGraph graph 
)

Get the distance in bases from end of node to end of closest tail node of graph, or -1 if that distance exceeds the limit. dist increases by the number of bases of each previous node until you reach the head node seen is a set that holds the nodes that you have already gotten the distance of, but starts off empty

◆ expand_context()

void vg::algorithms::expand_context ( const HandleGraph source,
MutableHandleGraph subgraph,
int64_t  dist,
bool  use_steps,
bool  expand_forward,
bool  expand_backward 
)

◆ expand_context_by_length()

void vg::algorithms::expand_context_by_length ( const HandleGraph source,
MutableHandleGraph subgraph,
int64_t  dist,
bool  expand_forward,
bool  expand_backward 
)

◆ expand_context_by_steps()

void vg::algorithms::expand_context_by_steps ( const HandleGraph source,
MutableHandleGraph subgraph,
int64_t  dist,
bool  expand_forward,
bool  expand_backward 
)

◆ expand_context_with_paths()

void vg::algorithms::expand_context_with_paths ( const PathHandleGraph source,
MutablePathMutableHandleGraph subgraph,
int64_t  dist,
bool  use_steps,
bool  expand_forward,
bool  expand_backward 
)

◆ expand_subgraph_by_length()

void vg::algorithms::expand_subgraph_by_length ( const HandleGraph source,
MutableHandleGraph subgraph,
const uint64_t &  length,
bool  forward_only 
)

expand the subgraph iteratively to include at least length new sequence

◆ expand_subgraph_by_steps()

void vg::algorithms::expand_subgraph_by_steps ( const HandleGraph source,
MutableHandleGraph subgraph,
const uint64_t &  steps,
bool  forward_only 
)

expand the subgraph iteratively for this many steps

◆ expand_subgraph_to_length()

void vg::algorithms::expand_subgraph_to_length ( const HandleGraph source,
MutableHandleGraph subgraph,
const uint64_t &  length,
bool  forward_only 
)

expand the subgraph iterativel until its total sequence length is greater than length

◆ expand_subgraph_to_node_count()

void vg::algorithms::expand_subgraph_to_node_count ( const HandleGraph source,
MutableHandleGraph subgraph,
const uint64_t &  node_count,
bool  forward_only 
)

expand the subgraph iteratively until its node count is at least node_count

◆ extract_connecting_graph()

unordered_map< id_t, id_t > vg::algorithms::extract_connecting_graph ( const HandleGraph source,
DeletableHandleGraph into,
int64_t  max_len,
pos_t  pos_1,
pos_t  pos_2,
bool  strict_max_len = false 
)

Fills a DeletableHandleGraph with the subgraph of a HandleGraph that connects two positions. The nodes that contain the two positions will be 'cut' at the position and will be tips in the returned graph. The algorithm guarantees that 'into' contains all walks between pos_1 and pos_2 under the maximum length except walks that include a cycle involving either position. If no walk between the two positions under the maximum length exists, 'into' will be left empty. An error is thrown if 'into' is not empty when passed to function.

Args: source graph to extract subgraph from into graph to extract into max_len guarantee finding walks along which pos_1 and pos_2 are this distance apart pos_1 start position, subgraph walks begin from here in same orientation pos_2 end position, subgraph walks end here in the same orientation strict_max_len only extract nodes and edges if they fall on some walk between pos_1 and pos_2 that is under the maximum length (implies only_walks = true)

Returns: a map from node ids in the extracted graph to the node ids in the original graph

◆ extract_containing_graph() [1/3]

void vg::algorithms::extract_containing_graph ( const HandleGraph source,
MutableHandleGraph into,
const vector< pos_t > &  positions,
const vector< size_t > &  position_forward_max_dist,
const vector< size_t > &  position_backward_max_dist,
size_t  reversing_walk_length = 0 
)

Same semantics as previous except that there is a separate maximum distance for different positions in the graph and for each search direction. Each distance is associated with the position with the same index. The forward distance is in the same orientation as the position, and the backward distance is in the reverse orientation of the position. Throws an error if the position and distance vectors are not the same length.

◆ extract_containing_graph() [2/3]

void vg::algorithms::extract_containing_graph ( const HandleGraph source,
MutableHandleGraph into,
const vector< pos_t > &  positions,
const vector< size_t > &  position_max_dist,
size_t  reversing_walk_length = 0 
)

Same semantics as previous except that there is a separate maximum distance for different positions in the graph. Each distance is associated with the position with the same index. Throws an error if the position and distance vectors are not the same length.

◆ extract_containing_graph() [3/3]

void vg::algorithms::extract_containing_graph ( const HandleGraph source,
MutableHandleGraph into,
const vector< pos_t > &  positions,
size_t  max_dist,
size_t  reversing_walk_length = 0 
)

Fills graph 'into' with the subgraph of the handle graph 'source' that contains all of the positions in the positions vector and all other nodes and edges that can be reached within a maximum distance from any of these positions. Optionally also finds nodes and edges that can be reached within some distance from the previously mentioned nodes, except along non- proper bidirected walks. Node IDs in the subgraph are retained from the source graph.

Args: source graph to extract subgraph from into graph to extract into positions search outward from these positions max_dist include all nodes and edges that can be reached in at most this distance reversing_walk_length also find graph material that can be reached

◆ extract_context()

void vg::algorithms::extract_context ( const HandleGraph source,
MutableHandleGraph subgraph,
const handle_t handle,
const uint64_t &  offset,
const uint64_t &  length,
bool  fwd,
bool  rev 
)

expand the context around a single handle position

◆ extract_extending_graph()

unordered_map< id_t, id_t > vg::algorithms::extract_extending_graph ( const HandleGraph source,
DeletableHandleGraph into,
int64_t  max_dist,
pos_t  pos,
bool  backward,
bool  preserve_cycles_on_src_node 
)

Fills graph 'into' with the subgraph of the handle graph 'source' that extends in one direction from a given position, up to a maximum distance. The node containing the position will be "cut" so that only the portion that is forward in the search direction remains. Node IDs may be changed in the extracted graph, but they can be translated back to node IDs in the original graph with the returned map, although that translation procedure MUST handle the node that pos is on specially, as it may be cut. translate_node_ids from path.hpp can do this as long as you pass along what part of the node was removed. The node containing the source position may optionally be duplicated to preserve cycles on it after its cut, but no other nodes will will duplicated.

Args: source graph to extract subgraph from into graph to extract into max_dist include all nodes and edges that can be reached in at most this distance pos extend from this position backward extend in this direction preserve_cycles_on_src if necessary, duplicate starting node to preserve cycles after cutting it

◆ extract_id_range()

void vg::algorithms::extract_id_range ( const HandleGraph source,
const nid_t id1,
const nid_t id2,
MutableHandleGraph subgraph 
)

extract the node id range

◆ extract_path_range()

void vg::algorithms::extract_path_range ( const PathPositionHandleGraph source,
path_handle_t  path_handle,
int64_t  start,
int64_t  end,
MutableHandleGraph subgraph 
)

extract the path range nodes aren't cut, so the returned graph may start before start and/or end after end if end < 0, then it will walk to the end of the path

◆ find_best_chain()

pair< int, vector< size_t > > vg::algorithms::find_best_chain ( const VectorView< Anchor > &  to_chain,
const SnarlDistanceIndex &  distance_index,
const HandleGraph graph,
int  gap_open,
int  gap_extension,
size_t  max_lookback_bases = 150,
size_t  min_lookback_items = 0,
size_t  lookback_item_hard_cap = 100,
size_t  initial_lookback_threshold = 10,
double  lookback_scale_factor = 2.0,
double  min_good_transition_score_per_base = -0.1,
int  item_bonus = 0,
size_t  max_indel_bases = 100 
)

Chain up the given group of items. Determines the best score and traceback that can be obtained by chaining items together.

Input items must be sorted by start position in the read.

Returns the score and the list of indexes of items visited to achieve that score, in order.

◆ find_edges_to_prune()

pair_hash_set<edge_t> vg::algorithms::find_edges_to_prune ( const HandleGraph graph,
size_t  k,
size_t  edge_max 
)

◆ find_gbwt() [1/2]

const gbwt::GBWT * vg::algorithms::find_gbwt ( const HandleGraph graph)

Find the GBWT that is part of the given handle graph, if any exists. Works on GBWTGraphs and GBZGraphs. Returns null if no such GBWT exists.

◆ find_gbwt() [2/2]

const gbwt::GBWT * vg::algorithms::find_gbwt ( const HandleGraph graph,
std::unique_ptr< gbwt::GBWT > &  holder,
const std::string &  filename 
)

Find a GBWT either by getting it from the given graph or loading it from the given filename into the given unique_ptr.

◆ find_gbwtgraph()

const gbwtgraph::GBWTGraph * vg::algorithms::find_gbwtgraph ( const HandleGraph graph)

Find the GBWTGraph that is part of the given handle graph, if any exists. Works on GBWTGraphs and GBZGraphs. Returns null if no such GBWTGraph exists.

◆ find_translation()

const NamedNodeBackTranslation * vg::algorithms::find_translation ( const HandleGraph graph)

Find the NamedNodeBackTranslation defining e.g. GFA segment space for the given handle graph, if any exists. Works on GFAs that have been loaded and finds their path space information.

◆ for_each_kmer()

void vg::algorithms::for_each_kmer ( const HandleGraph graph,
size_t  k,
size_t  edge_max,
const std::function< void(const kmer_t &)> &  lambda 
)

Iterate over all the kmers in the graph, running lambda on each.

◆ for_each_walk()

void vg::algorithms::for_each_walk ( const HandleGraph graph,
size_t  k,
size_t  edge_max,
const std::function< void(const walk_t &)> &  lambda 
)

Iterate over all the walks in the graph, running lambda on each.

◆ get_depth_from_index()

pair< float, float > vg::algorithms::get_depth_from_index ( const BinnedDepthIndex depth_index,
const string &  path_name,
size_t  start_offset,
size_t  end_offset 
)

Query index created above.

◆ get_graph_distance()

size_t vg::algorithms::get_graph_distance ( const Anchor from,
const Anchor to,
const SnarlDistanceIndex &  distance_index,
const HandleGraph graph 
)

Get distance in the graph, or std::numeric_limits<size_t>::max() if unreachable.

◆ get_read_distance()

size_t vg::algorithms::get_read_distance ( const Anchor from,
const Anchor to 
)

Get distance in the read, or std::numeric_limits<size_t>::max() if unreachable.

◆ gfa_to_handle_graph() [1/3]

void vg::algorithms::gfa_to_handle_graph ( const string &  filename,
MutableHandleGraph graph,
const string &  translation_filename 
)

Overload which serializes its translation to a file internally.

◆ gfa_to_handle_graph() [2/3]

void vg::algorithms::gfa_to_handle_graph ( const string &  filename,
MutableHandleGraph graph,
GFAIDMapInfo translation = nullptr 
)

Read a GFA file for a blunt-ended graph into a HandleGraph. Give "-" as a filename for stdin.

Throws GFAFormatError if the GFA file is not acceptable, and std::ios_base::failure if an IO operation fails. Throws invalid_argument if otherwise misused. Does not give max ID hints, and so might be very slow when loading into an ODGI graph.

◆ gfa_to_handle_graph() [3/3]

void vg::algorithms::gfa_to_handle_graph ( istream &  in,
MutableHandleGraph graph,
GFAIDMapInfo translation 
)

Load a GFA from a stream (assumed not to be seekable or reopenable) into a HandleGraph.

◆ gfa_to_path_handle_graph() [1/3]

void vg::algorithms::gfa_to_path_handle_graph ( const string &  filename,
MutablePathMutableHandleGraph graph,
GFAIDMapInfo translation,
int64_t  max_rgfa_rank,
unordered_set< PathSense > *  ignore_sense 
)

Same as gfa_to_handle_graph but also adds path elements from the GFA to the graph.

◆ gfa_to_path_handle_graph() [2/3]

void vg::algorithms::gfa_to_path_handle_graph ( const string &  filename,
MutablePathMutableHandleGraph graph,
int64_t  max_rgfa_rank,
const string &  translation_filename,
unordered_set< PathSense > *  ignore_sense 
)

Overload which serializes its translation to a file internally.

◆ gfa_to_path_handle_graph() [3/3]

void vg::algorithms::gfa_to_path_handle_graph ( istream &  in,
MutablePathMutableHandleGraph graph,
GFAIDMapInfo translation,
int64_t  max_rgfa_rank,
unordered_set< PathSense > *  ignore_sense 
)

Load a GFA from a stream (assumed not to be seekable or reopenable) into a PathHandleGraph.

◆ id_order()

vector< handle_t > vg::algorithms::id_order ( const HandleGraph g)

Order all the handles in the graph in ID order. All orientations are forward.

◆ intersect_path_offsets()

bool vg::algorithms::intersect_path_offsets ( const path_offset_collection_t a_offsets,
const path_offset_collection_t b_offsets,
size_t  maximum_distance 
)

Given two maps from path handle to (position, orientation) pair vectors, determine if any positions in the two sets are on the same path, within the given maximum distance.

The set expected to have more visits should be passed first.

Orientation is ignored.

The first set must be sorted, for binary search. We run binary search for each item in the second set, so the first set should be the larger one.

We run in b log a time.

◆ is_head_node()

bool vg::algorithms::is_head_node ( handle_t  h,
const HandleGraph g 
)

◆ is_tail_node()

bool vg::algorithms::is_tail_node ( handle_t  h,
const HandleGraph g 
)

◆ jump_along_closest_path()

vector< pos_t > vg::algorithms::jump_along_closest_path ( const PathPositionHandleGraph graph,
const pos_t pos,
int64_t  jump_dist,
size_t  max_search_dist 
)

returns a vector of positions that are found by jumping a fixed oriented distance along path(s) from the given position. if the position is not on a path, searches from the position to a path and adds/subtracts the search distance to the jump depending on the search direction. returns an empty vector if there is no path within the max search distance or if the jump distance goes past the end of the path

◆ kargers_min_cut()

pair< vector< unordered_set< size_t > >, size_t > vg::algorithms::kargers_min_cut ( Graph  graph,
const int  seed 
)

◆ locally_expand_graph()

void vg::algorithms::locally_expand_graph ( const HandleGraph parent,
MutableHandleGraph subgraph,
handle_t  from,
int64_t  max_dist 
)

Add to a subgraph (with same node ID space as parent) by walking forward from a given node and adding all walks up to a maximum distance away. The handle provided graph should be from the subgraph, not the parent graph.

◆ merge()

void vg::algorithms::merge ( handlegraph::MutablePathDeletableHandleGraph graph,
const vector< pair< handle_t, size_t >> &  start,
size_t  length 
)

Merge the given ranges of bases on the given handles together, rewriting paths. Sequences must match. Handles to a single node may occur no more than once.

◆ min_approx_path_distance()

size_t vg::algorithms::min_approx_path_distance ( const PathPositionHandleGraph graph,
const pos_t pos1,
const pos_t pos2,
uint64_t  max_search 
)

use the embedded paths to get an estimated minimum distance between the positions

◆ min_cut_decomposition()

vector< unordered_set< size_t > > vg::algorithms::min_cut_decomposition ( Graph  graph,
const int  seed 
)

◆ multipath_alignment_path_offsets()

unordered_map< path_handle_t, vector< pair< size_t, bool > > > vg::algorithms::multipath_alignment_path_offsets ( const PathPositionHandleGraph graph,
const multipath_alignment_t mp_aln,
const std::function< bool(const path_handle_t &)> *  path_filter = nullptr 
)

Find the position of a multipath alignment on paths. Returns the lowest offset position on a path for each contiguous stretch of the multipath alignment, but multiple positions on the same path may be returned if the multipath alignment is disconnected or fans out toward the sources or sinks.

If path_filter is set, and it returns false for a path, that path is not used to annotate the read.

◆ nearest_offsets_in_paths()

path_offset_collection_t vg::algorithms::nearest_offsets_in_paths ( const PathPositionHandleGraph graph,
const pos_t pos,
int64_t  max_search,
const std::function< bool(const path_handle_t &)> *  path_filter = nullptr 
)

Return, for the nearest position in a path to the given position, subject to the given max search distance, a mapping from path name to all positions on each path where that pos_t occurs. Stops search when path(s) are ancountered.

If path_filter is set, ignores paths for which it returns false.

◆ next_pos_chars()

map< pos_t, char > vg::algorithms::next_pos_chars ( const PathPositionHandleGraph graph,
pos_t  pos 
)

◆ normalize()

void vg::algorithms::normalize ( handlegraph::MutablePathDeletableHandleGraph graph,
int  max_iter = 1,
bool  debug = false,
function< bool(const handle_t &, const handle_t &)>  can_merge = nullptr 
)

Normalize a graph, performing up to the given number of iterations. Simplifies siblings and unchops runs of nodes, in a loop.

if "can_merge" specified, it must return true in order for a pair of nodes to get merged

◆ num_components()

size_t vg::algorithms::num_components ( const HandleGraph graph)

◆ offsets_in_paths()

map< string, vector< pair< size_t, bool > > > vg::algorithms::offsets_in_paths ( const PathPositionHandleGraph graph,
const pos_t pos 
)

Wrapper for the above to support some earlier code. Only looks for paths that directly touch the position, and returns the paths by name.

◆ operator<<() [1/4]

ostream & vg::algorithms::operator<< ( ostream &  out,
const Anchor anchor 
)

Explain an Anchor to the given stream.

◆ operator<<() [2/4]

ostream & vg::algorithms::operator<< ( ostream &  out,
const TracedScore value 
)

Print operator.

◆ operator<<() [3/4]

std::ostream & vg::algorithms::operator<< ( std::ostream &  out,
const kmer_t kmer 
)

Print a kmer_t to a stream.

◆ operator<<() [4/4]

std::ostream & vg::algorithms::operator<< ( std::ostream &  out,
const walk_t walk 
)

Print a walk_t to a stream.

◆ packed_depth_of_bin()

pair< double, double > vg::algorithms::packed_depth_of_bin ( const Packer packer,
step_handle_t  start_step,
step_handle_t  end_plus_one_step,
size_t  min_coverage,
bool  include_deletions 
)

Estimate the coverage along a given reference path interval [start_step, end_plus_one_step) Coverage is obtained only from positions along the path, and variation is not counted Except if "include_deletions" is true, then reference path positions covered by a deletion edge (which is contained in the bin) will get the deletion edge's coverage counted. Other types of events (such as SNPs) can throw off coverage in similar ways but deletions tend to be bigger (and easier to find), so we hope that counting them is enough. If one wants to infer deletions from the coverage, obviously this should be false, but if looking for a background coverage for genotyping, then setting it to true may be helpful

◆ packed_depths()

void vg::algorithms::packed_depths ( const Packer packer,
const string &  path_name,
size_t  min_coverage,
ostream &  out_stream 
)

print path-name offset base-coverage for every base on a path (just like samtools depth) ignoring things below min_coverage. offsets are 1-based in output stream

◆ pad_band_constant()

std::function<size_t(const Alignment&, const HandleGraph&)> vg::algorithms::pad_band_constant ( size_t  band_padding)

◆ pad_band_min_random_walk()

std::function<size_t(const Alignment&, const HandleGraph&)> vg::algorithms::pad_band_min_random_walk ( double  band_padding_multiplier,
size_t  band_padding_memo_size,
size_t  max_padding 
)

◆ pad_band_random_walk()

std::function<size_t(const Alignment&, const HandleGraph&)> vg::algorithms::pad_band_random_walk ( double  band_padding_multiplier,
size_t  band_padding_memo_size,
size_t  max_padding 
)

◆ pad_band_random_walk_internal()

std::function<size_t(const Alignment&, const HandleGraph&)> vg::algorithms::pad_band_random_walk_internal ( double  band_padding_multiplier,
size_t  band_padding_memo_size,
size_t  max_padding,
const std::function< size_t(const Alignment &, const HandleGraph &)>  size_function 
)

◆ path_depth_of_bin()

pair< double, double > vg::algorithms::path_depth_of_bin ( const PathHandleGraph graph,
step_handle_t  start_step,
step_handle_t  end_plus_one_step,
size_t  min_coverage,
bool  count_cycles 
)

like packed_depth_of_bin (above), but use paths (as in path_depths) for measuring coverage

◆ path_depths()

void vg::algorithms::path_depths ( const PathHandleGraph graph,
const string &  path_name,
size_t  min_coverage,
bool  count_cycles,
ostream &  out_stream 
)

print path-name offset base-coverage for every base on a path (just like samtools depth) ignoring things below min_coverage. offsets are 1-based in output stream coverage here is the number of steps from (unique) other paths

◆ path_string()

std::string vg::algorithms::path_string ( const HandleGraph graph,
const Path path 
)

use the given graph and the path to determine our path string

◆ prune_complex()

size_t vg::algorithms::prune_complex ( DeletableHandleGraph graph,
int  path_length,
int  edge_max 
)

Take all nodes that would introduce paths of > edge_max edge crossings, remove them, and link their neighbors to head_node or tail_node depending on which direction the path extension was stopped. For pruning graph prior to indexing with gcsa2. Returns the number of edges removed.

◆ prune_complex_with_head_tail()

size_t vg::algorithms::prune_complex_with_head_tail ( DeletableHandleGraph graph,
int  path_length,
int  edge_max 
)

Wrap the graph with heads and tails (for GCSA2 indexing) and then prune as with prune_complex. Returns the number of edges removed.

◆ prune_short_subgraphs()

size_t vg::algorithms::prune_short_subgraphs ( DeletableHandleGraph graph,
int  min_size 
)

Remove any weakly connected components that have total sequence length under the minimum size. Returns the number of nodes removed.

◆ prune_to_connecting_graph()

void vg::algorithms::prune_to_connecting_graph ( DeletableHandleGraph graph,
const handle_t from,
const handle_t to 
)

Remove all parts of the graph that are not on some path between the two handles.

◆ reallocate_atomic_int_vector()

template<typename Int1 , typename Int2 >
void vg::algorithms::reallocate_atomic_int_vector ( vector< atomic< Int1 >> *&  vec1,
vector< atomic< Int2 >> *&  vec2 
)

◆ ref_path_distance()

int64_t vg::algorithms::ref_path_distance ( const PathPositionHandleGraph graph,
const pos_t pos_1,
const pos_t pos_2,
const unordered_set< path_handle_t > &  ref_paths,
int64_t  max_search_dist 
)

Search the local region around two positions and return the longest distance between them along any paths found during this search. Returns numeric_limits<int64_t>::max() if no shared path is found.

◆ remove_high_degree_nodes()

size_t vg::algorithms::remove_high_degree_nodes ( DeletableHandleGraph graph,
int  max_degree 
)

Remove nodes with >= max_degree total edges on each side. Note that end-to-start self loops count twice. Returns the number of nodes removed.

◆ sample_gam_depth()

pair<double, double> vg::algorithms::sample_gam_depth ( const HandleGraph graph,
const vector< Alignment > &  alignments,
size_t  max_nodes,
size_t  random_seed,
size_t  min_coverage,
size_t  min_mapq 
)

◆ sample_mapping_depth() [1/2]

pair< double, double > vg::algorithms::sample_mapping_depth ( const HandleGraph graph,
const string &  input_filename,
size_t  max_nodes,
size_t  random_seed,
size_t  min_coverage,
size_t  min_mapq,
const string &  format = "GAM" 
)

Return the mean and variance of coverage of randomly sampled nodes from a mappings file Nodes with less than min_coverage are ignored The input_filename can be - for stdin The stream is scanned in parallel with all threads max_nodes is used to keep memory down valid formats are "GAM" and "GAF"

◆ sample_mapping_depth() [2/2]

pair<double, double> vg::algorithms::sample_mapping_depth ( const HandleGraph graph,
const vector< Alignment > &  alignments,
size_t  max_nodes,
size_t  random_seed,
size_t  min_coverage,
size_t  min_mapq 
)

As above, but read a vector instead of a stream.

◆ score_best_chain()

int vg::algorithms::score_best_chain ( const VectorView< Anchor > &  to_chain,
const SnarlDistanceIndex &  distance_index,
const HandleGraph graph,
int  gap_open,
int  gap_extension 
)

Score the given group of items. Determines the best score that can be obtained by chaining items together.

Input items must be sorted by start position in the read.

◆ shortest_cycle_length() [1/2]

size_t vg::algorithms::shortest_cycle_length ( const HandleGraph graph)

Returns the length of the shortest cycle in the entire graph, or numeric_limits<size_t>::max() if no cycle exists.

◆ shortest_cycle_length() [2/2]

size_t vg::algorithms::shortest_cycle_length ( const HandleGraph graph,
const handle_t source 
)

Returns the length of the shortest cycle containing the source node, or numeric_limits<size_t>::max() if no cycle exists.

◆ shortest_cycle_length_internal()

size_t vg::algorithms::shortest_cycle_length_internal ( const HandleGraph graph,
const handle_t source,
const vector< handle_t > &  layout,
const unordered_map< handle_t, size_t > &  handle_index,
const vector< pair< size_t, size_t >> &  feedback_edges 
)

◆ simple_offsets_in_paths()

path_offset_collection_t vg::algorithms::simple_offsets_in_paths ( const PathPositionHandleGraph graph,
pos_t  pos 
)

A "simple" model for path position getting for debugging.

◆ simplify_siblings()

bool vg::algorithms::simplify_siblings ( handlegraph::MutablePathDeletableHandleGraph graph,
function< bool(const handle_t &, const handle_t &)>  can_merge = nullptr 
)

Simplify siblings in the given graph.

When one base has two successors with the same base value, and those successors have the same set of predecessors, the successors will be merged.

Performs only a subset of the possible merges. Can only merge in from one side of a given node in a single invocation. Returns true if it made progress and there may be more merging to do.

Preserves paths.

Optional can_merge callback will only let nodes get merged together if this pairwise check returns true.

◆ sort_and_shadow() [1/2]

void vg::algorithms::sort_and_shadow ( const std::vector< Anchor > &  items,
std::vector< size_t > &  indexes 
)

Get rid of items that are shadowed or contained by (or are identical to) others.

Erases items that didn't survive from indexes, and sorts them by read start position.

◆ sort_and_shadow() [2/2]

void vg::algorithms::sort_and_shadow ( std::vector< Anchor > &  items)

Get rid of items that are shadowed or contained by (or are identical to) others.

Erases items that didn't survive from items, and sorts them by read start position.

◆ sort_path_offsets()

void vg::algorithms::sort_path_offsets ( path_offset_collection_t offsets)

Sort path offsets, so intersect_path_offsets() can use them as a target.

◆ sorted_id_ranges()

vector< pair< id_t, id_t > > vg::algorithms::sorted_id_ranges ( const HandleGraph graph)

Get a sorted list of inclusive ranges of IDs used in the given HandleGraph.

◆ three_edge_connected_component_merges()

template<typename TECCNode >
void vg::algorithms::three_edge_connected_component_merges ( const function< void(const function< void(TECCNode)> &)> &  for_each_node,
const function< void(TECCNode, const function< void(TECCNode)> &)> &  for_each_connected_node,
const function< void(TECCNode, TECCNode)> &  same_component 
)

Get the three-edge-connected components of an arbitrary graph (not necessarily a handle graph). Only recognizes one kind of edge and one kind of node. Nodes are arbitrary value types (which may need to be hashable).

Takes a function that loops an iteratee over all nodes, and a function that, given a node, loops an iteratee over all nodes connected to it.

Calls same_component with pairs of nodes in (at least) a spanning tree of the set of nodes in each component (not restricted to the input graph). Doing merge operations on a union-find can get you the set of components. The callback MUST NOT modify the graph!

If you have a graph where you can easily rank the nodes, don't use this. Use three_edge_connected_components_dense() instead. The first thing this function does is asign nodes a dense, 0-based rank space.

◆ three_edge_connected_component_merges_dense()

void vg::algorithms::three_edge_connected_component_merges_dense ( size_t  node_count,
size_t  first_root,
const function< void(size_t, const function< void(size_t)> &)> &  for_each_connected_node,
const function< void(size_t, size_t)> &  same_component 
)

Get the three-edge-connected components of an arbitrary graph (not necessarily a handle graph). Only recognizes one kind of edge and one kind of node. Nodes are dense positive integers starting with 0.

Takes a total node count, a suggested root (or 0), and a function that, given a node, loops an iteratee over all nodes connected to it.

Calls same_component with pairs of nodes in (at least) a spanning tree of the set of nodes in each component (not restricted to the input graph). Doing merge operations on a union-find can get you the set of components. The callback MUST NOT modify the graph!

This defines the data we track for each node in the graph

When in the DFS were we first visited?

When in the DFS were we last visited? Needed for finding replacement neighbors to implement path range absorption in part 1.3, when we're asked for a range to a neighbor that got eaten.

What is our "low point" in the search. This is the earliest dfs_counter for a node that this node or any node in its DFS subtree has a back-edge to.

What is the effective degree of this node in the graph with all the absorb-eject modifications applied?

What node has the continuation of this node's path? If equal to numeric_limits<number_t>::max(), the path ends after here. The node's path is the path from this node, into its DFS subtree, to (one of) the nodes in the subtree that has the back-edge that caused this node's low point to be so low. Basically a low point traceback.

Is this node actually on its own path? Nodes can be removed from their paths if those nodes don't matter any more (i.e. got absorbed) but their paths still need to be tails for other paths.

Has the node been visited yet? Must be 0. TODO: Move to its own vector to make zeroing them all free-ish with page table shenanigans.

Track the node that this stack frame represents

Track all the neighbors left to visit. When we visit a neighbor we pop it off the back.

When we look at the neighbors, we need to be able to tell the tree edge to the parent from further back edges to the parent. So we have a flag for whether we have seen the parent tree edge already, and the first neighbors entry that is our parent will get called the tree edge.

Track whether we made a recursive DFS call into the last neighbor or not. If we did, we need to do some work when we come out of it and return to this frame.

◆ three_edge_connected_components()

template<typename TECCNode >
void vg::algorithms::three_edge_connected_components ( const function< void(const function< void(TECCNode)> &)> &  for_each_node,
const function< void(TECCNode, const function< void(TECCNode)> &)> &  for_each_connected_node,
const function< void(const function< void(const function< void(TECCNode)> &)> &)> &  component_callback 
)

Get the three-edge-connected components of an arbitrary graph (not necessarily a handle graph). Only recognizes one kind of edge and one kind of node. Nodes are arbitrary value types (which may need to be hashable).

Takes a function that loops an iteratee over all nodes, and a function that, given a node, loops an iteratee over all nodes connected to it.

For each component identified, calls the given callback with a function that iterates over all nodes in the component.

If you have a graph where you can easily rank the nodes, don't use this. Use three_edge_connected_components_dense() instead. The first thing this function does is asign nodes a dense, 0-based rank space.

◆ three_edge_connected_components_dense()

void vg::algorithms::three_edge_connected_components_dense ( size_t  node_count,
size_t  first_root,
const function< void(size_t, const function< void(size_t)> &)> &  for_each_connected_node,
const function< void(const function< void(const function< void(size_t)> &)> &)> &  component_callback 
)

Get the three-edge-connected components of an arbitrary graph (not necessarily a handle graph). Only recognizes one kind of edge and one kind of node. Nodes are dense positive integers starting with 0.

Takes a total node count, a suggested root (or 0), and a function that, given a node, loops an iteratee over all nodes connected to it.

For each component identified, calls the given callback with a function that iterates over all nodes in the component.

◆ three_edge_connected_components_dense_cactus()

void vg::algorithms::three_edge_connected_components_dense_cactus ( size_t  node_count,
const function< void(size_t, const function< void(size_t)> &)> &  for_each_connected_node,
const function< void(const function< void(const function< void(size_t)> &)> &)> &  component_callback 
)

Get the three-edge-connected components of an arbitrary graph (not necessarily a handle graph). Only recognizes one kind of edge and one kind of node. Nodes are dense positive integers starting with 0.

Wraps the known good the 3 edge connected components algorithm from the pinchesAndCacti library.

Takes a total node count, and a function that, given a node, loops an iteratee over all nodes connected to it.

For each component identified, calls the given callback with a function that iterates over all nodes in the component.

◆ traverse_components()

void vg::algorithms::traverse_components ( const HandleGraph graph,
function< void(void)> &  on_new_comp,
function< void(handle_t)> &  on_node 
)

◆ walk_haplotype_frequency()

uint64_t vg::algorithms::walk_haplotype_frequency ( const HandleGraph graph,
const gbwt::GBWT &  haplotypes,
const walk_t walk 
)

◆ walk_haplotype_names()

std::vector< std::string > vg::algorithms::walk_haplotype_names ( const HandleGraph graph,
const gbwt::GBWT &  haplotypes,
const walk_t walk 
)

◆ widest_dijkstra()

pair< double, vector< handle_t > > vg::algorithms::widest_dijkstra ( const HandleGraph g,
handle_t  source,
handle_t  sink,
function< double(const handle_t &)>  node_weight_callback,
function< double(const edge_t &)>  edge_weight_callback,
function< bool(const handle_t &)>  is_node_ignored_callback,
function< bool(const edge_t &)>  is_edge_ignored_callbback,
bool  greedy_avg = false 
)

This Dijkstra is the same underlying algorithm as the one in dijkstra.hpp but the interface is different enough that I opted to make it a seprate thing rather than add loads of optional arguments. The key differences are these generalizations: – looks for the "widest" path (maximum minimum weight) instead of shortest – counts node and edge weights (via callbakcs) – returns the path as well as the score – option for ignoring certain nodes and edges in search (required by Yen's algorithm) – greedy_avg option switches the algorithm to a heuristic (no optimal guarantee) search using the running averages support instead of min-flow support as objective function.

◆ yens_k_widest_paths()

vector< pair< double, vector< handle_t > > > vg::algorithms::yens_k_widest_paths ( const HandleGraph g,
handle_t  source,
handle_t  sink,
size_t  K,
function< double(const handle_t &)>  node_weight_callback,
function< double(const edge_t &)>  edge_weight_callback,
bool  greedy_avg = false,
size_t  max_path_length = numeric_limits< size_t >::max() 
)

Find the k widest paths Search is aborted (and current list returned) if a path longer than max_path_length is added to the results

Variable Documentation

◆ pad_band_constant

std::function<size_t(const Alignment&, const HandleGraph&)> vg::algorithms::pad_band_constant(size_t band_padding)

Get a band padding function that uses a constant value.

◆ pad_band_min_random_walk

std::function<size_t(const Alignment&, const HandleGraph&)> vg::algorithms::pad_band_min_random_walk(double band_padding_multiplier=1.0, size_t band_padding_memo_size=2000, size_t max_padding=std::numeric_limits< size_t >::max())

Get a band padding function that scales the expected distance of a random walk, memoized out to the given length, using the minimum of graph size and read size as the length.

◆ pad_band_random_walk

std::function<size_t(const Alignment&, const HandleGraph&)> vg::algorithms::pad_band_random_walk(double band_padding_multiplier=1.0, size_t band_padding_memo_size=2000, size_t max_padding=std::numeric_limits< size_t >::max())

Get a band padding function that scales with the expected distance of a random walk, memoized out to the given length.

◆ PRUNE_THREAD_BUFFER_SIZE

constexpr size_t vg::algorithms::PRUNE_THREAD_BUFFER_SIZE = 1024 * 1024
constexpr