vg
tools for working with variation graphs
|
Classes | |
class | Anchor |
struct | Edge |
struct | GFADuplicatePathError |
struct | GFAFormatError |
This exception will be thrown if the GFA data is not acceptable. More... | |
struct | GFAIDMapInfo |
class | GFAParser |
struct | Graph |
struct | kmer_t |
Stores a kmer in the context of a graph. More... | |
struct | Node |
class | TracedScore |
struct | walk_t |
Record a <=k-length walk in the context of a graph. More... | |
Typedefs | |
using | BinnedDepthIndex = unordered_map< string, map< size_t, map< size_t, pair< float, float > >> > |
using | path_offset_collection_t = unordered_map< path_handle_t, vector< pair< size_t, bool > >> |
Functions | |
template<class DistHeuristic > | |
vector< handle_t > | a_star (const HandleGraph *graph, const pos_t &pos_1, const pos_t &pos_2, const DistHeuristic &dist_heuristic, bool find_min=true, int64_t extremal_distance=numeric_limits< int64_t >::max(), bool monotonic_heuristic=true) |
unordered_map< path_handle_t, vector< pair< size_t, bool > > > | alignment_path_offsets (const PathPositionHandleGraph &graph, const Alignment &aln, bool just_min, bool nearby, size_t search_limit, const std::function< bool(const path_handle_t &)> *path_filter) |
unordered_map< path_handle_t, vector< pair< size_t, bool > > > | multipath_alignment_path_offsets (const PathPositionHandleGraph &graph, const multipath_alignment_t &mp_aln, const std::function< bool(const path_handle_t &)> *path_filter) |
void | annotate_with_initial_path_positions (const PathPositionHandleGraph &graph, Alignment &aln, size_t search_limit, const std::function< bool(const path_handle_t &)> *path_filter) |
void | annotate_with_node_path_positions (const PathPositionHandleGraph &graph, Alignment &aln, size_t search_limit, const std::function< bool(const path_handle_t &)> *path_filter) |
void | annotate_with_path_positions (const PathPositionHandleGraph &graph, Alignment &aln, bool just_min, size_t search_limit, const std::function< bool(const path_handle_t &)> *path_filter) |
void | annotate_with_initial_path_positions (const PathPositionHandleGraph &graph, vector< Alignment > &alns, size_t search_limit, const std::function< bool(const path_handle_t &)> *path_filter) |
size_t | min_approx_path_distance (const PathPositionHandleGraph *graph, const pos_t &pos1, const pos_t &pos2, uint64_t max_search) |
use the embedded paths to get an estimated minimum distance between the positions More... | |
void | back_translate_in_place (const NamedNodeBackTranslation *translation, Path &path) |
void | back_translate_in_place (const NamedNodeBackTranslation *translation, Snarl &snarl) |
void | back_translate_in_place (const NamedNodeBackTranslation *translation, SnarlTraversal &traversal) |
void | back_translate_in_place (const NamedNodeBackTranslation *translation, Visit &visit) |
ostream & | operator<< (ostream &out, const Anchor &anchor) |
Explain an Anchor to the given stream. More... | |
ostream & | operator<< (ostream &out, const TracedScore &value) |
Print operator. More... | |
void | sort_and_shadow (const std::vector< Anchor > &items, std::vector< size_t > &indexes) |
void | sort_and_shadow (std::vector< Anchor > &items) |
TracedScore | chain_items_dp (vector< TracedScore > &best_chain_score, const VectorView< Anchor > &to_chain, const SnarlDistanceIndex &distance_index, const HandleGraph &graph, int gap_open, int gap_extension, size_t max_lookback_bases, size_t min_lookback_items, size_t lookback_item_hard_cap, size_t initial_lookback_threshold, double lookback_scale_factor, double min_good_transition_score_per_base, int item_bonus, size_t max_indel_bases) |
vector< size_t > | chain_items_traceback (const vector< TracedScore > &best_chain_score, const VectorView< Anchor > &to_chain, const TracedScore &best_past_ending_score_ever) |
pair< int, vector< size_t > > | find_best_chain (const VectorView< Anchor > &to_chain, const SnarlDistanceIndex &distance_index, const HandleGraph &graph, int gap_open, int gap_extension, size_t max_lookback_bases, size_t min_lookback_items, size_t lookback_item_hard_cap, size_t initial_lookback_threshold, double lookback_scale_factor, double min_good_transition_score_per_base, int item_bonus, size_t max_indel_bases) |
int | score_best_chain (const VectorView< Anchor > &to_chain, const SnarlDistanceIndex &distance_index, const HandleGraph &graph, int gap_open, int gap_extension) |
size_t | get_graph_distance (const Anchor &from, const Anchor &to, const SnarlDistanceIndex &distance_index, const HandleGraph &graph) |
Get distance in the graph, or std::numeric_limits<size_t>::max() if unreachable. More... | |
size_t | get_read_distance (const Anchor &from, const Anchor &to) |
Get distance in the read, or std::numeric_limits<size_t>::max() if unreachable. More... | |
void | traverse_components (const HandleGraph &graph, function< void(void)> &on_new_comp, function< void(handle_t)> &on_node) |
size_t | num_components (const HandleGraph &graph) |
vector< size_t > | component_sizes (const HandleGraph &graph) |
vector< unordered_set< path_handle_t > > | component_paths (const PathHandleGraph &graph) |
template<typename Int1 , typename Int2 > | |
void | reallocate_atomic_int_vector (vector< atomic< Int1 >> *&vec1, vector< atomic< Int2 >> *&vec2) |
vector< unordered_set< path_handle_t > > | component_paths_parallel (const PathHandleGraph &graph) |
template<typename Collection > | |
size_t | count_covered (Collection &segments) |
void | packed_depths (const Packer &packer, const string &path_name, size_t min_coverage, ostream &out_stream) |
pair< double, double > | packed_depth_of_bin (const Packer &packer, step_handle_t start_step, step_handle_t end_plus_one_step, size_t min_coverage, bool include_deletions) |
vector< tuple< size_t, size_t, double, double > > | binned_packed_depth (const Packer &packer, const string &path_name, size_t bin_size, size_t min_coverage, bool include_deletions) |
BinnedDepthIndex | binned_packed_depth_index (const Packer &packer, const vector< string > &path_names, size_t min_bin_size, size_t max_bin_size, double exp_growth_factor, size_t min_coverage, bool include_deletions, bool std_err) |
pair< float, float > | get_depth_from_index (const BinnedDepthIndex &depth_index, const string &path_name, size_t start_offset, size_t end_offset) |
Query index created above. More... | |
pair< double, double > | sample_mapping_depth (const HandleGraph &graph, const string &input_filename, size_t max_nodes, size_t random_seed, size_t min_coverage, size_t min_mapq, const string &format) |
pair< double, double > | sample_gam_depth (const HandleGraph &graph, const vector< Alignment > &alignments, size_t max_nodes, size_t random_seed, size_t min_coverage, size_t min_mapq) |
void | path_depths (const PathHandleGraph &graph, const string &path_name, size_t min_coverage, bool count_cycles, ostream &out_stream) |
pair< double, double > | path_depth_of_bin (const PathHandleGraph &graph, step_handle_t start_step, step_handle_t end_plus_one_step, size_t min_coverage, bool count_cycles) |
like packed_depth_of_bin (above), but use paths (as in path_depths) for measuring coverage More... | |
vector< tuple< size_t, size_t, double, double > > | binned_path_depth (const PathHandleGraph &graph, const string &path_name, size_t bin_size, size_t min_coverage, bool count_cycles) |
pair< double, double > | sample_mapping_depth (const HandleGraph &graph, const vector< Alignment > &alignments, size_t max_nodes, size_t random_seed, size_t min_coverage, size_t min_mapq) |
As above, but read a vector instead of a stream. More... | |
void | dfs (const HandleGraph &graph, const function< void(const handle_t &)> &handle_begin_fn, const function< void(const handle_t &)> &handle_end_fn, const function< bool(void)> &break_fn, const function< void(const edge_t &)> &edge_fn, const function< void(const edge_t &)> &tree_fn, const function< void(const edge_t &)> &edge_curr_fn, const function< void(const edge_t &)> &edge_cross_fn, const vector< handle_t > &sources, const unordered_set< handle_t > &sinks) |
void | dfs (const HandleGraph &graph, const function< void(const handle_t &)> &handle_begin_fn, const function< void(const handle_t &)> &handle_end_fn, const vector< handle_t > &sources, const unordered_set< handle_t > &sinks) |
void | dfs (const HandleGraph &graph, const function< void(const handle_t &)> &handle_begin_fn, const function< void(const handle_t &)> &handle_end_fn, const function< bool(void)> &break_fn) |
list< bdsg::HashGraph > | disjoint_components (const HandleGraph &graph) |
bool | is_head_node (handle_t h, const HandleGraph *g) |
int32_t | distance_to_head (handle_t h, int32_t limit, const HandleGraph *graph) |
int32_t | distance_to_head (handle_t h, int32_t limit, int32_t dist, unordered_set< handle_t > &seen, const HandleGraph *graph) |
bool | is_tail_node (handle_t h, const HandleGraph *g) |
int32_t | distance_to_tail (handle_t h, int32_t limit, const HandleGraph *graph) |
int32_t | distance_to_tail (handle_t h, int32_t limit, int32_t dist, unordered_set< handle_t > &seen, const HandleGraph *graph) |
void | expand_context_by_steps (const HandleGraph *source, MutableHandleGraph *subgraph, int64_t dist, bool expand_forward, bool expand_backward) |
void | expand_context_by_length (const HandleGraph *source, MutableHandleGraph *subgraph, int64_t dist, bool expand_forward, bool expand_backward) |
void | expand_context (const HandleGraph *source, MutableHandleGraph *subgraph, int64_t dist, bool use_steps, bool expand_forward, bool expand_backward) |
void | expand_context_with_paths (const PathHandleGraph *source, MutablePathMutableHandleGraph *subgraph, int64_t dist, bool use_steps, bool expand_forward, bool expand_backward) |
unordered_map< id_t, id_t > | extract_connecting_graph (const HandleGraph *source, DeletableHandleGraph *into, int64_t max_len, pos_t pos_1, pos_t pos_2, bool strict_max_len) |
void | extract_containing_graph (const HandleGraph *source, MutableHandleGraph *into, const vector< pos_t > &positions, const vector< size_t > &forward_search_lengths, const vector< size_t > &backward_search_lengths, size_t reversing_walk_length) |
void | extract_containing_graph (const HandleGraph *source, MutableHandleGraph *into, const vector< pos_t > &positions, size_t max_dist, size_t reversing_walk_length) |
void | extract_containing_graph (const HandleGraph *source, MutableHandleGraph *into, const vector< pos_t > &positions, const vector< size_t > &position_max_dist, size_t reversing_walk_length) |
unordered_map< id_t, id_t > | extract_extending_graph (const HandleGraph *source, DeletableHandleGraph *into, int64_t max_dist, pos_t pos, bool backward, bool preserve_cycles_on_src_node) |
const gbwt::GBWT * | find_gbwt (const HandleGraph *graph) |
const gbwt::GBWT * | find_gbwt (const HandleGraph *graph, std::unique_ptr< gbwt::GBWT > &holder, const std::string &filename) |
const gbwtgraph::GBWTGraph * | find_gbwtgraph (const HandleGraph *graph) |
const NamedNodeBackTranslation * | find_translation (const HandleGraph *graph) |
void | gfa_to_handle_graph (const string &filename, MutableHandleGraph *graph, GFAIDMapInfo *translation) |
void | gfa_to_handle_graph (const string &filename, MutableHandleGraph *graph, const string &translation_filename) |
Overload which serializes its translation to a file internally. More... | |
void | gfa_to_handle_graph (istream &in, MutableHandleGraph *graph, GFAIDMapInfo *translation=nullptr) |
Load a GFA from a stream (assumed not to be seekable or reopenable) into a HandleGraph. More... | |
void | gfa_to_path_handle_graph (const string &filename, MutablePathMutableHandleGraph *graph, GFAIDMapInfo *translation=nullptr, int64_t max_rgfa_rank=numeric_limits< int64_t >::max(), unordered_set< PathSense > *ignore_sense=nullptr) |
Same as gfa_to_handle_graph but also adds path elements from the GFA to the graph. More... | |
void | gfa_to_path_handle_graph (const string &filename, MutablePathMutableHandleGraph *graph, int64_t max_rgfa_rank, const string &translation_filename, unordered_set< PathSense > *ignore_sense=nullptr) |
Overload which serializes its translation to a file internally. More... | |
void | gfa_to_path_handle_graph (istream &in, MutablePathMutableHandleGraph *graph, GFAIDMapInfo *translation=nullptr, int64_t max_rgfa_rank=numeric_limits< int64_t >::max(), unordered_set< PathSense > *ignore_sense=nullptr) |
Load a GFA from a stream (assumed not to be seekable or reopenable) into a PathHandleGraph. More... | |
vector< handle_t > | id_order (const HandleGraph *g) |
bool | intersect_path_offsets (const path_offset_collection_t &a_offsets, const path_offset_collection_t &b_offsets, size_t maximum_distance) |
void | sort_path_offsets (path_offset_collection_t &offsets) |
vector< pos_t > | jump_along_closest_path (const PathPositionHandleGraph *graph, const pos_t &pos, int64_t jump_dist, size_t max_search_dist) |
pair< double, vector< handle_t > > | widest_dijkstra (const HandleGraph *g, handle_t source, handle_t sink, function< double(const handle_t &)> node_weight_callback, function< double(const edge_t &)> edge_weight_callback, function< bool(const handle_t &)> is_node_ignored_callback, function< bool(const edge_t &)> is_edge_ignored_callback, bool greedy_avg) |
vector< pair< double, vector< handle_t > > > | yens_k_widest_paths (const HandleGraph *g, handle_t source, handle_t sink, size_t K, function< double(const handle_t &)> node_weight_callback, function< double(const edge_t &)> edge_weight_callback, bool greedy_avg, size_t max_path_length) |
void | for_each_kmer (const HandleGraph &graph, size_t k, size_t edge_max, const std::function< void(const kmer_t &)> &lambda) |
Iterate over all the kmers in the graph, running lambda on each. More... | |
std::ostream & | operator<< (std::ostream &out, const kmer_t &kmer) |
Print a kmer_t to a stream. More... | |
void | locally_expand_graph (const HandleGraph &parent, MutableHandleGraph &subgraph, handle_t from, int64_t max_dist) |
void | merge (handlegraph::MutablePathDeletableHandleGraph *graph, const vector< pair< handle_t, size_t >> &start, size_t length) |
pair< vector< unordered_set< size_t > >, size_t > | kargers_min_cut (Graph graph, const int seed) |
pair< vector< unordered_set< size_t > >, size_t > | compute_min_cut (Graph graph, const int seed) |
vector< unordered_set< size_t > > | min_cut_decomposition (Graph graph, const int seed) |
path_offset_collection_t | nearest_offsets_in_paths (const PathPositionHandleGraph *graph, const pos_t &pos, int64_t max_search, const std::function< bool(const path_handle_t &)> *path_filter) |
map< string, vector< pair< size_t, bool > > > | offsets_in_paths (const PathPositionHandleGraph *graph, const pos_t &pos) |
path_offset_collection_t | simple_offsets_in_paths (const PathPositionHandleGraph *graph, pos_t pos) |
A "simple" model for path position getting for debugging. More... | |
map< pos_t, char > | next_pos_chars (const PathPositionHandleGraph &graph, pos_t pos) |
void | normalize (handlegraph::MutablePathDeletableHandleGraph *graph, int max_iter, bool debug, function< bool(const handle_t &, const handle_t &)> can_merge) |
std::function< size_t(const Alignment &, const HandleGraph &)> | pad_band_random_walk_internal (double band_padding_multiplier, size_t band_padding_memo_size, size_t max_padding, const std::function< size_t(const Alignment &, const HandleGraph &)> size_function) |
std::function< size_t(const Alignment &, const HandleGraph &)> | pad_band_random_walk (double band_padding_multiplier, size_t band_padding_memo_size, size_t max_padding) |
std::function< size_t(const Alignment &, const HandleGraph &)> | pad_band_min_random_walk (double band_padding_multiplier, size_t band_padding_memo_size, size_t max_padding) |
std::function< size_t(const Alignment &, const HandleGraph &)> | pad_band_constant (size_t band_padding) |
void | append_mapping_sequence (const Mapping &m, const string &node_seq, string &seq) |
use the given oriented node sequence and the mapping to reconstruct the sequence represented by the mapping More... | |
string | path_string (const HandleGraph &graph, const Path &path) |
use the given graph and the path to determine our path string More... | |
pair_hash_set< edge_t > | find_edges_to_prune (const HandleGraph &graph, size_t k, size_t edge_max) |
size_t | prune_complex (DeletableHandleGraph &graph, int path_length, int edge_max) |
size_t | prune_complex_with_head_tail (DeletableHandleGraph &graph, int path_length, int edge_max) |
size_t | prune_short_subgraphs (DeletableHandleGraph &graph, int min_size) |
size_t | remove_high_degree_nodes (DeletableHandleGraph &g, int max_degree) |
void | prune_to_connecting_graph (DeletableHandleGraph &graph, const handle_t &from, const handle_t &to) |
Remove all parts of the graph that are not on some path between the two handles. More... | |
int64_t | ref_path_distance (const PathPositionHandleGraph *graph, const pos_t &pos_1, const pos_t &pos_2, const unordered_set< path_handle_t > &ref_paths, int64_t max_search_dist) |
size_t | bellman_ford_shortest_cycle_length (const HandleGraph *graph, const handle_t &source, const vector< handle_t > &layout, const unordered_map< handle_t, size_t > &handle_index, const vector< pair< size_t, size_t >> &feedback_edges) |
size_t | dijkstra_shortest_cycle_length (const HandleGraph *graph, const handle_t &source) |
Simple Dijkstra implementation that computes shortest cycle. More... | |
size_t | shortest_cycle_length_internal (const HandleGraph *graph, const handle_t &source, const vector< handle_t > &layout, const unordered_map< handle_t, size_t > &handle_index, const vector< pair< size_t, size_t >> &feedback_edges) |
size_t | shortest_cycle_length (const HandleGraph *graph, const handle_t &source) |
size_t | shortest_cycle_length (const HandleGraph *graph) |
bool | simplify_siblings (handlegraph::MutablePathDeletableHandleGraph *graph, function< bool(const handle_t &, const handle_t &)> can_merge) |
vector< pair< id_t, id_t > > | sorted_id_ranges (const HandleGraph *graph) |
Get a sorted list of inclusive ranges of IDs used in the given HandleGraph. More... | |
void | expand_subgraph_by_steps (const HandleGraph &source, MutableHandleGraph &subgraph, const uint64_t &steps, bool forward_only=false) |
expand the subgraph iteratively for this many steps More... | |
void | expand_subgraph_to_node_count (const HandleGraph &source, MutableHandleGraph &subgraph, const uint64_t &node_count, bool forward_only=false) |
expand the subgraph iteratively until its node count is at least node_count More... | |
void | expand_subgraph_by_length (const HandleGraph &source, MutableHandleGraph &subgraph, const uint64_t &length, bool forward_only=false) |
expand the subgraph iteratively to include at least length new sequence More... | |
void | expand_subgraph_to_length (const HandleGraph &source, MutableHandleGraph &subgraph, const uint64_t &length, bool forward_only=false) |
expand the subgraph iterativel until its total sequence length is greater than length More... | |
void | extract_context (const HandleGraph &source, MutableHandleGraph &subgraph, const handle_t &handle, const uint64_t &offset, const uint64_t &length, bool fwd, bool rev) |
expand the context around a single handle position More... | |
void | extract_id_range (const HandleGraph &source, const nid_t &id1, const nid_t &id2, MutableHandleGraph &subgraph) |
extract the node id range More... | |
void | extract_path_range (const PathPositionHandleGraph &source, path_handle_t path_handle, int64_t start, int64_t end, MutableHandleGraph &subgraph) |
void | add_subpaths_to_subgraph (const PathPositionHandleGraph &source, MutablePathHandleGraph &subgraph) |
void | add_connecting_edges_to_subgraph (const HandleGraph &source, MutableHandleGraph &subgraph) |
void | three_edge_connected_component_merges_dense (size_t node_count, size_t first_root, const function< void(size_t, const function< void(size_t)> &)> &for_each_connected_node, const function< void(size_t, size_t)> &same_component) |
void | three_edge_connected_components_dense (size_t node_count, size_t first_root, const function< void(size_t, const function< void(size_t)> &)> &for_each_connected_node, const function< void(const function< void(const function< void(size_t)> &)> &)> &component_callback) |
void | three_edge_connected_components_dense_cactus (size_t node_count, const function< void(size_t, const function< void(size_t)> &)> &for_each_connected_node, const function< void(const function< void(const function< void(size_t)> &)> &)> &component_callback) |
template<typename TECCNode > | |
void | three_edge_connected_components (const function< void(const function< void(TECCNode)> &)> &for_each_node, const function< void(TECCNode, const function< void(TECCNode)> &)> &for_each_connected_node, const function< void(const function< void(const function< void(TECCNode)> &)> &)> &component_callback) |
template<typename TECCNode > | |
void | three_edge_connected_component_merges (const function< void(const function< void(TECCNode)> &)> &for_each_node, const function< void(TECCNode, const function< void(TECCNode)> &)> &for_each_connected_node, const function< void(TECCNode, TECCNode)> &same_component) |
void | for_each_walk (const HandleGraph &graph, size_t k, size_t edge_max, const std::function< void(const walk_t &)> &lambda) |
Iterate over all the walks in the graph, running lambda on each. More... | |
std::ostream & | operator<< (std::ostream &out, const walk_t &walk) |
Print a walk_t to a stream. More... | |
uint64_t | walk_haplotype_frequency (const HandleGraph &graph, const gbwt::GBWT &haplotypes, const walk_t &walk) |
std::vector< std::string > | walk_haplotype_names (const HandleGraph &graph, const gbwt::GBWT &haplotypes, const walk_t &walk) |
Variables | |
std::function< size_t(const Alignment &, const HandleGraph &)> | pad_band_random_walk (double band_padding_multiplier=1.0, size_t band_padding_memo_size=2000, size_t max_padding=std::numeric_limits< size_t >::max()) |
std::function< size_t(const Alignment &, const HandleGraph &)> | pad_band_min_random_walk (double band_padding_multiplier=1.0, size_t band_padding_memo_size=2000, size_t max_padding=std::numeric_limits< size_t >::max()) |
std::function< size_t(const Alignment &, const HandleGraph &)> | pad_band_constant (size_t band_padding) |
Get a band padding function that uses a constant value. More... | |
constexpr size_t | PRUNE_THREAD_BUFFER_SIZE = 1024 * 1024 |
using vg::algorithms::BinnedDepthIndex = typedef unordered_map<string, map<size_t, map<size_t, pair<float, float> >> > |
Use the above function to retrieve the binned depths of a list of paths, and store them indexed by start coordinate. If std_err is true, store <mean, stderr> instead of <mean, variance> For each path, a series of indexes is computed, for bin sizes from min_bin_size, min_bin_size^(exp_growth_factor), etc.
using vg::algorithms::path_offset_collection_t = typedef unordered_map<path_handle_t, vector<pair<size_t, bool> >> |
Represents a set of positions and orientations, along a collection of paths. Positions and orientations may or may not be stored sorted.
vector< handle_t > vg::algorithms::a_star | ( | const HandleGraph * | graph, |
const pos_t & | pos_1, | ||
const pos_t & | pos_2, | ||
const DistHeuristic & | dist_heuristic, | ||
bool | find_min = true , |
||
int64_t | extremal_distance = numeric_limits<int64_t>::max() , |
||
bool | monotonic_heuristic = true |
||
) |
Implements the A* heuristic-guided search algorithm. Returns the path from pos_1 to pos_2 that is either minimal or maximal length, according to the parameters. Allows an extremal distance beyond which the algorithm will cease looking for paths (this should be a large value when looking for minimal paths and a small value when looking for maximum paths). If there is no path between the positions, or none within the extremal length, an empty vector will be returned.
void vg::algorithms::add_connecting_edges_to_subgraph | ( | const HandleGraph & | source, |
MutableHandleGraph & | subgraph | ||
) |
We can accumulate a subgraph without accumulating all the edges between its nodes this helper ensures that we get the full set
void vg::algorithms::add_subpaths_to_subgraph | ( | const PathPositionHandleGraph & | source, |
MutablePathHandleGraph & | subgraph | ||
) |
Add subpaths to the subgraph for all paths visiting its nodes in the source graph.
Always generates correct path metadata, and a path for each contiguous fragment of any base path. Assumes the source graph does not contain any overlapping path fragments on a given base path, and that the subgraph does not already contain any paths on a base path also present in the source graph.
unordered_map< path_handle_t, vector< pair< size_t, bool > > > vg::algorithms::alignment_path_offsets | ( | const PathPositionHandleGraph & | graph, |
const Alignment & | aln, | ||
bool | just_min, | ||
bool | nearby, | ||
size_t | search_limit = 0 , |
||
const std::function< bool(const path_handle_t &)> * | path_filter = nullptr |
||
) |
Gives the path positions of the alignment. If just_min is set, gives the minimum position on each path. Else, gives all Mapping start positions on each path. If nearby is set, will search for a nearby path. Will recurse with nearby set if it is not set on initial call and no positions are found. Respects search_limit in bp in that case. If search_limit is 0, read length is used.
If path_filter is set, and it returns false for a path, that path is not used to annotate the read.
void vg::algorithms::annotate_with_initial_path_positions | ( | const PathPositionHandleGraph & | graph, |
Alignment & | aln, | ||
size_t | search_limit = 0 , |
||
const std::function< bool(const path_handle_t &)> * | path_filter = nullptr |
||
) |
Use the graph to annotate an Alignment with the first position it touches on each reference path. Thread safe.
search_limit gives the maximum distance to search for a path if the alignment does not actually touch any paths. If 0, the alignment's sequence length is used.
If path_filter is set, and it returns false for a path, that path is not used to annotate the read.
void vg::algorithms::annotate_with_initial_path_positions | ( | const PathPositionHandleGraph & | graph, |
vector< Alignment > & | aln, | ||
size_t | search_limit = 0 , |
||
const std::function< bool(const path_handle_t &)> * | path_filter = nullptr |
||
) |
Use the graph annotate Alignments with the first position they touch on each reference path. Thread safe.
search_limit gives the maximum distance to search for a path if the alignment does not actually touch any paths. If 0, the alignment's sequence length is used.
If path_filter is set, and it returns false for a path, that path is not used to annotate the read.
void vg::algorithms::annotate_with_node_path_positions | ( | const PathPositionHandleGraph & | graph, |
Alignment & | aln, | ||
size_t | search_limit = 0 , |
||
const std::function< bool(const path_handle_t &)> * | path_filter = nullptr |
||
) |
Use the graph to annotate an Alignment with the first position it touches on each node it visits in each reference path. Thread safe. If no nodes on any path are visited, searches for a nearby path position to use.
search_limit gives the maximum distance to search for a path if the alignment does not actually touch any paths. If 0, the alignment's sequence length is used.
If path_filter is set, and it returns false for a path, that path is not used to annotate the read.
void vg::algorithms::annotate_with_path_positions | ( | const PathPositionHandleGraph & | graph, |
Alignment & | aln, | ||
bool | just_min, | ||
size_t | search_limit = 0 , |
||
const std::function< bool(const path_handle_t &)> * | path_filter = nullptr |
||
) |
Use the graph to annotate an Alignment with positions on each reference path. Thread safe.
If just_min is set, gives the minimum position on each path. Else, gives all Mapping start positions on each path. If no positions on the path are found, looks for nearby path positions in graph space. Respects search_limit in bp in that case. If search_limit is 0, read length is used.
If path_filter is set, and it returns false for a path, that path is not used to annotate the read.
void vg::algorithms::append_mapping_sequence | ( | const Mapping & | m, |
const string & | node_seq, | ||
string & | seq | ||
) |
use the given oriented node sequence and the mapping to reconstruct the sequence represented by the mapping
void vg::algorithms::back_translate_in_place | ( | const NamedNodeBackTranslation * | translation, |
Path & | path | ||
) |
Translate the given Path in place from node ID space to named segment space.
void vg::algorithms::back_translate_in_place | ( | const NamedNodeBackTranslation * | translation, |
Snarl & | snarl | ||
) |
Translate the given Snarl in place from node ID space to named segment space.
void vg::algorithms::back_translate_in_place | ( | const NamedNodeBackTranslation * | translation, |
SnarlTraversal & | traversal | ||
) |
Translate the given SnarlTraversal in place from node ID space to named segment space. Visits that end up being to snarls where both boundaries are from the same orientation of the same segment will be removed. Multiple visits in a row to the same orientation of the same segment will be elided.
TODO: Work out a way to preserve traversals around cycles while not repeating ourselves for visits to diffrent pieces of the same chopped segment.
void vg::algorithms::back_translate_in_place | ( | const NamedNodeBackTranslation * | translation, |
Visit & | visit | ||
) |
Translate the given Visit in place from node ID space to named segment space.
size_t vg::algorithms::bellman_ford_shortest_cycle_length | ( | const HandleGraph * | graph, |
const handle_t & | source, | ||
const vector< handle_t > & | layout, | ||
const unordered_map< handle_t, size_t > & | handle_index, | ||
const vector< pair< size_t, size_t >> & | feedback_edges | ||
) |
An implementation of Bellman-Ford with Yen's ordering improvement applied to a layout ideally has a small feedback arc set
vector< tuple< size_t, size_t, double, double > > vg::algorithms::binned_packed_depth | ( | const Packer & | packer, |
const string & | path_name, | ||
size_t | bin_size, | ||
size_t | min_coverage, | ||
bool | include_deletions | ||
) |
Use all available threads to estimate the binned packed coverage of a path using above fucntion Each element is a bin's 0-based open-ended interval in the path, and its coverage mean,variance.
BinnedDepthIndex vg::algorithms::binned_packed_depth_index | ( | const Packer & | packer, |
const vector< string > & | path_names, | ||
size_t | min_bin_size, | ||
size_t | max_bin_size, | ||
double | exp_growth_factor, | ||
size_t | min_coverage, | ||
bool | include_deletions, | ||
bool | std_err | ||
) |
vector< tuple< size_t, size_t, double, double > > vg::algorithms::binned_path_depth | ( | const PathHandleGraph & | graph, |
const string & | path_name, | ||
size_t | bin_size, | ||
size_t | min_coverage, | ||
bool | count_cycles | ||
) |
TracedScore vg::algorithms::chain_items_dp | ( | vector< TracedScore > & | best_chain_score, |
const VectorView< Anchor > & | to_chain, | ||
const SnarlDistanceIndex & | distance_index, | ||
const HandleGraph & | graph, | ||
int | gap_open, | ||
int | gap_extension, | ||
size_t | max_lookback_bases = 150 , |
||
size_t | min_lookback_items = 0 , |
||
size_t | lookback_item_hard_cap = 100 , |
||
size_t | initial_lookback_threshold = 10 , |
||
double | lookback_scale_factor = 2.0 , |
||
double | min_good_transition_score_per_base = -0.1 , |
||
int | item_bonus = 0 , |
||
size_t | max_indel_bases = 100 |
||
) |
Fill in the given DP table for the best chain score ending with each item. Returns the best observed score overall from that table, with provenance to its location in the table, if tracked in the type. Assumes some items exist.
Input items must be sorted by start position in the read.
Takes the given per-item bonus for each item collected.
Uses a finite lookback in items and in read bases when checking where we can come from to reach an item. Also, once a given number of good-looking predecessor items have been found, stop looking back.
Limits transitions to those involving indels of the given size or less, to avoid very bad transitions.
vector< size_t > vg::algorithms::chain_items_traceback | ( | const vector< TracedScore > & | best_chain_score, |
const VectorView< Anchor > & | to_chain, | ||
const TracedScore & | best_past_ending_score_ever | ||
) |
Trace back through in the given DP table from the best chain score.
vector< unordered_set< path_handle_t > > vg::algorithms::component_paths | ( | const PathHandleGraph & | graph | ) |
vector< unordered_set< path_handle_t > > vg::algorithms::component_paths_parallel | ( | const PathHandleGraph & | graph | ) |
vector< size_t > vg::algorithms::component_sizes | ( | const HandleGraph & | graph | ) |
pair< vector< unordered_set< size_t > >, size_t > vg::algorithms::compute_min_cut | ( | Graph | graph, |
const int | seed | ||
) |
size_t vg::algorithms::count_covered | ( | Collection & | segments | ) |
Count, from begin to end, the number of positions covered by ranges in the given collection. The collection will be sorted in place.
The collection must be a have a begin(), end(), and random [] access (like a vector).
No boundaries are needed because no positions can be covered without segments representing them.
void vg::algorithms::dfs | ( | const HandleGraph & | graph, |
const function< void(const handle_t &)> & | handle_begin_fn, | ||
const function< void(const handle_t &)> & | handle_end_fn, | ||
const function< bool(void)> & | break_fn | ||
) |
void vg::algorithms::dfs | ( | const HandleGraph & | graph, |
const function< void(const handle_t &)> & | handle_begin_fn, | ||
const function< void(const handle_t &)> & | handle_end_fn, | ||
const function< bool(void)> & | break_fn, | ||
const function< void(const edge_t &)> & | edge_fn, | ||
const function< void(const edge_t &)> & | tree_fn, | ||
const function< void(const edge_t &)> & | edge_curr_fn, | ||
const function< void(const edge_t &)> & | edge_cross_fn, | ||
const vector< handle_t > & | sources, | ||
const unordered_set< handle_t > & | sinks | ||
) |
void vg::algorithms::dfs | ( | const HandleGraph & | graph, |
const function< void(const handle_t &)> & | handle_begin_fn, | ||
const function< void(const handle_t &)> & | handle_end_fn, | ||
const vector< handle_t > & | sources, | ||
const unordered_set< handle_t > & | sinks | ||
) |
size_t vg::algorithms::dijkstra_shortest_cycle_length | ( | const HandleGraph * | graph, |
const handle_t & | source | ||
) |
Simple Dijkstra implementation that computes shortest cycle.
list< bdsg::HashGraph > vg::algorithms::disjoint_components | ( | const HandleGraph & | graph | ) |
Return a list of graphs, one for each connected component in the original graph. Node IDs are preserved from the original graph.
int32_t vg::algorithms::distance_to_head | ( | handle_t | h, |
int32_t | limit, | ||
const HandleGraph * | graph | ||
) |
int32_t vg::algorithms::distance_to_head | ( | handle_t | h, |
int32_t | limit, | ||
int32_t | dist, | ||
unordered_set< handle_t > & | seen, | ||
const HandleGraph * | graph | ||
) |
Get the distance in bases from start of node to start of closest head node of graph, or -1 if that distance exceeds the limit. dist increases by the number of bases of each previous node until you reach the head node seen is a set that holds the nodes that you have already gotten the distance of, but starts off empty
int32_t vg::algorithms::distance_to_tail | ( | handle_t | h, |
int32_t | limit, | ||
const HandleGraph * | graph | ||
) |
int32_t vg::algorithms::distance_to_tail | ( | handle_t | h, |
int32_t | limit, | ||
int32_t | dist, | ||
unordered_set< handle_t > & | seen, | ||
const HandleGraph * | graph | ||
) |
Get the distance in bases from end of node to end of closest tail node of graph, or -1 if that distance exceeds the limit. dist increases by the number of bases of each previous node until you reach the head node seen is a set that holds the nodes that you have already gotten the distance of, but starts off empty
void vg::algorithms::expand_context | ( | const HandleGraph * | source, |
MutableHandleGraph * | subgraph, | ||
int64_t | dist, | ||
bool | use_steps, | ||
bool | expand_forward, | ||
bool | expand_backward | ||
) |
void vg::algorithms::expand_context_by_length | ( | const HandleGraph * | source, |
MutableHandleGraph * | subgraph, | ||
int64_t | dist, | ||
bool | expand_forward, | ||
bool | expand_backward | ||
) |
void vg::algorithms::expand_context_by_steps | ( | const HandleGraph * | source, |
MutableHandleGraph * | subgraph, | ||
int64_t | dist, | ||
bool | expand_forward, | ||
bool | expand_backward | ||
) |
void vg::algorithms::expand_context_with_paths | ( | const PathHandleGraph * | source, |
MutablePathMutableHandleGraph * | subgraph, | ||
int64_t | dist, | ||
bool | use_steps, | ||
bool | expand_forward, | ||
bool | expand_backward | ||
) |
void vg::algorithms::expand_subgraph_by_length | ( | const HandleGraph & | source, |
MutableHandleGraph & | subgraph, | ||
const uint64_t & | length, | ||
bool | forward_only | ||
) |
expand the subgraph iteratively to include at least length new sequence
void vg::algorithms::expand_subgraph_by_steps | ( | const HandleGraph & | source, |
MutableHandleGraph & | subgraph, | ||
const uint64_t & | steps, | ||
bool | forward_only | ||
) |
expand the subgraph iteratively for this many steps
void vg::algorithms::expand_subgraph_to_length | ( | const HandleGraph & | source, |
MutableHandleGraph & | subgraph, | ||
const uint64_t & | length, | ||
bool | forward_only | ||
) |
expand the subgraph iterativel until its total sequence length is greater than length
void vg::algorithms::expand_subgraph_to_node_count | ( | const HandleGraph & | source, |
MutableHandleGraph & | subgraph, | ||
const uint64_t & | node_count, | ||
bool | forward_only | ||
) |
expand the subgraph iteratively until its node count is at least node_count
unordered_map< id_t, id_t > vg::algorithms::extract_connecting_graph | ( | const HandleGraph * | source, |
DeletableHandleGraph * | into, | ||
int64_t | max_len, | ||
pos_t | pos_1, | ||
pos_t | pos_2, | ||
bool | strict_max_len = false |
||
) |
Fills a DeletableHandleGraph with the subgraph of a HandleGraph that connects two positions. The nodes that contain the two positions will be 'cut' at the position and will be tips in the returned graph. The algorithm guarantees that 'into' contains all walks between pos_1 and pos_2 under the maximum length except walks that include a cycle involving either position. If no walk between the two positions under the maximum length exists, 'into' will be left empty. An error is thrown if 'into' is not empty when passed to function.
Args: source graph to extract subgraph from into graph to extract into max_len guarantee finding walks along which pos_1 and pos_2 are this distance apart pos_1 start position, subgraph walks begin from here in same orientation pos_2 end position, subgraph walks end here in the same orientation strict_max_len only extract nodes and edges if they fall on some walk between pos_1 and pos_2 that is under the maximum length (implies only_walks = true)
Returns: a map from node ids in the extracted graph to the node ids in the original graph
void vg::algorithms::extract_containing_graph | ( | const HandleGraph * | source, |
MutableHandleGraph * | into, | ||
const vector< pos_t > & | positions, | ||
const vector< size_t > & | position_forward_max_dist, | ||
const vector< size_t > & | position_backward_max_dist, | ||
size_t | reversing_walk_length = 0 |
||
) |
Same semantics as previous except that there is a separate maximum distance for different positions in the graph and for each search direction. Each distance is associated with the position with the same index. The forward distance is in the same orientation as the position, and the backward distance is in the reverse orientation of the position. Throws an error if the position and distance vectors are not the same length.
void vg::algorithms::extract_containing_graph | ( | const HandleGraph * | source, |
MutableHandleGraph * | into, | ||
const vector< pos_t > & | positions, | ||
const vector< size_t > & | position_max_dist, | ||
size_t | reversing_walk_length = 0 |
||
) |
Same semantics as previous except that there is a separate maximum distance for different positions in the graph. Each distance is associated with the position with the same index. Throws an error if the position and distance vectors are not the same length.
void vg::algorithms::extract_containing_graph | ( | const HandleGraph * | source, |
MutableHandleGraph * | into, | ||
const vector< pos_t > & | positions, | ||
size_t | max_dist, | ||
size_t | reversing_walk_length = 0 |
||
) |
Fills graph 'into' with the subgraph of the handle graph 'source' that contains all of the positions in the positions vector and all other nodes and edges that can be reached within a maximum distance from any of these positions. Optionally also finds nodes and edges that can be reached within some distance from the previously mentioned nodes, except along non- proper bidirected walks. Node IDs in the subgraph are retained from the source graph.
Args: source graph to extract subgraph from into graph to extract into positions search outward from these positions max_dist include all nodes and edges that can be reached in at most this distance reversing_walk_length also find graph material that can be reached
void vg::algorithms::extract_context | ( | const HandleGraph & | source, |
MutableHandleGraph & | subgraph, | ||
const handle_t & | handle, | ||
const uint64_t & | offset, | ||
const uint64_t & | length, | ||
bool | fwd, | ||
bool | rev | ||
) |
expand the context around a single handle position
unordered_map< id_t, id_t > vg::algorithms::extract_extending_graph | ( | const HandleGraph * | source, |
DeletableHandleGraph * | into, | ||
int64_t | max_dist, | ||
pos_t | pos, | ||
bool | backward, | ||
bool | preserve_cycles_on_src_node | ||
) |
Fills graph 'into' with the subgraph of the handle graph 'source' that extends in one direction from a given position, up to a maximum distance. The node containing the position will be "cut" so that only the portion that is forward in the search direction remains. Node IDs may be changed in the extracted graph, but they can be translated back to node IDs in the original graph with the returned map, although that translation procedure MUST handle the node that pos is on specially, as it may be cut. translate_node_ids from path.hpp can do this as long as you pass along what part of the node was removed. The node containing the source position may optionally be duplicated to preserve cycles on it after its cut, but no other nodes will will duplicated.
Args: source graph to extract subgraph from into graph to extract into max_dist include all nodes and edges that can be reached in at most this distance pos extend from this position backward extend in this direction preserve_cycles_on_src if necessary, duplicate starting node to preserve cycles after cutting it
void vg::algorithms::extract_id_range | ( | const HandleGraph & | source, |
const nid_t & | id1, | ||
const nid_t & | id2, | ||
MutableHandleGraph & | subgraph | ||
) |
extract the node id range
void vg::algorithms::extract_path_range | ( | const PathPositionHandleGraph & | source, |
path_handle_t | path_handle, | ||
int64_t | start, | ||
int64_t | end, | ||
MutableHandleGraph & | subgraph | ||
) |
extract the path range nodes aren't cut, so the returned graph may start before start and/or end after end if end < 0, then it will walk to the end of the path
pair< int, vector< size_t > > vg::algorithms::find_best_chain | ( | const VectorView< Anchor > & | to_chain, |
const SnarlDistanceIndex & | distance_index, | ||
const HandleGraph & | graph, | ||
int | gap_open, | ||
int | gap_extension, | ||
size_t | max_lookback_bases = 150 , |
||
size_t | min_lookback_items = 0 , |
||
size_t | lookback_item_hard_cap = 100 , |
||
size_t | initial_lookback_threshold = 10 , |
||
double | lookback_scale_factor = 2.0 , |
||
double | min_good_transition_score_per_base = -0.1 , |
||
int | item_bonus = 0 , |
||
size_t | max_indel_bases = 100 |
||
) |
Chain up the given group of items. Determines the best score and traceback that can be obtained by chaining items together.
Input items must be sorted by start position in the read.
Returns the score and the list of indexes of items visited to achieve that score, in order.
pair_hash_set<edge_t> vg::algorithms::find_edges_to_prune | ( | const HandleGraph & | graph, |
size_t | k, | ||
size_t | edge_max | ||
) |
const gbwt::GBWT * vg::algorithms::find_gbwt | ( | const HandleGraph * | graph | ) |
Find the GBWT that is part of the given handle graph, if any exists. Works on GBWTGraphs and GBZGraphs. Returns null if no such GBWT exists.
const gbwt::GBWT * vg::algorithms::find_gbwt | ( | const HandleGraph * | graph, |
std::unique_ptr< gbwt::GBWT > & | holder, | ||
const std::string & | filename | ||
) |
Find a GBWT either by getting it from the given graph or loading it from the given filename into the given unique_ptr.
const gbwtgraph::GBWTGraph * vg::algorithms::find_gbwtgraph | ( | const HandleGraph * | graph | ) |
Find the GBWTGraph that is part of the given handle graph, if any exists. Works on GBWTGraphs and GBZGraphs. Returns null if no such GBWTGraph exists.
const NamedNodeBackTranslation * vg::algorithms::find_translation | ( | const HandleGraph * | graph | ) |
Find the NamedNodeBackTranslation defining e.g. GFA segment space for the given handle graph, if any exists. Works on GFAs that have been loaded and finds their path space information.
void vg::algorithms::for_each_kmer | ( | const HandleGraph & | graph, |
size_t | k, | ||
size_t | edge_max, | ||
const std::function< void(const kmer_t &)> & | lambda | ||
) |
Iterate over all the kmers in the graph, running lambda on each.
void vg::algorithms::for_each_walk | ( | const HandleGraph & | graph, |
size_t | k, | ||
size_t | edge_max, | ||
const std::function< void(const walk_t &)> & | lambda | ||
) |
Iterate over all the walks in the graph, running lambda on each.
pair< float, float > vg::algorithms::get_depth_from_index | ( | const BinnedDepthIndex & | depth_index, |
const string & | path_name, | ||
size_t | start_offset, | ||
size_t | end_offset | ||
) |
Query index created above.
size_t vg::algorithms::get_graph_distance | ( | const Anchor & | from, |
const Anchor & | to, | ||
const SnarlDistanceIndex & | distance_index, | ||
const HandleGraph & | graph | ||
) |
Get distance in the graph, or std::numeric_limits<size_t>::max() if unreachable.
Get distance in the read, or std::numeric_limits<size_t>::max() if unreachable.
void vg::algorithms::gfa_to_handle_graph | ( | const string & | filename, |
MutableHandleGraph * | graph, | ||
const string & | translation_filename | ||
) |
Overload which serializes its translation to a file internally.
void vg::algorithms::gfa_to_handle_graph | ( | const string & | filename, |
MutableHandleGraph * | graph, | ||
GFAIDMapInfo * | translation = nullptr |
||
) |
Read a GFA file for a blunt-ended graph into a HandleGraph. Give "-" as a filename for stdin.
Throws GFAFormatError if the GFA file is not acceptable, and std::ios_base::failure if an IO operation fails. Throws invalid_argument if otherwise misused. Does not give max ID hints, and so might be very slow when loading into an ODGI graph.
void vg::algorithms::gfa_to_handle_graph | ( | istream & | in, |
MutableHandleGraph * | graph, | ||
GFAIDMapInfo * | translation | ||
) |
Load a GFA from a stream (assumed not to be seekable or reopenable) into a HandleGraph.
void vg::algorithms::gfa_to_path_handle_graph | ( | const string & | filename, |
MutablePathMutableHandleGraph * | graph, | ||
GFAIDMapInfo * | translation, | ||
int64_t | max_rgfa_rank, | ||
unordered_set< PathSense > * | ignore_sense | ||
) |
Same as gfa_to_handle_graph but also adds path elements from the GFA to the graph.
void vg::algorithms::gfa_to_path_handle_graph | ( | const string & | filename, |
MutablePathMutableHandleGraph * | graph, | ||
int64_t | max_rgfa_rank, | ||
const string & | translation_filename, | ||
unordered_set< PathSense > * | ignore_sense | ||
) |
Overload which serializes its translation to a file internally.
void vg::algorithms::gfa_to_path_handle_graph | ( | istream & | in, |
MutablePathMutableHandleGraph * | graph, | ||
GFAIDMapInfo * | translation, | ||
int64_t | max_rgfa_rank, | ||
unordered_set< PathSense > * | ignore_sense | ||
) |
Load a GFA from a stream (assumed not to be seekable or reopenable) into a PathHandleGraph.
vector< handle_t > vg::algorithms::id_order | ( | const HandleGraph * | g | ) |
Order all the handles in the graph in ID order. All orientations are forward.
bool vg::algorithms::intersect_path_offsets | ( | const path_offset_collection_t & | a_offsets, |
const path_offset_collection_t & | b_offsets, | ||
size_t | maximum_distance | ||
) |
Given two maps from path handle to (position, orientation) pair vectors, determine if any positions in the two sets are on the same path, within the given maximum distance.
The set expected to have more visits should be passed first.
Orientation is ignored.
The first set must be sorted, for binary search. We run binary search for each item in the second set, so the first set should be the larger one.
We run in b log a time.
bool vg::algorithms::is_head_node | ( | handle_t | h, |
const HandleGraph * | g | ||
) |
bool vg::algorithms::is_tail_node | ( | handle_t | h, |
const HandleGraph * | g | ||
) |
vector< pos_t > vg::algorithms::jump_along_closest_path | ( | const PathPositionHandleGraph * | graph, |
const pos_t & | pos, | ||
int64_t | jump_dist, | ||
size_t | max_search_dist | ||
) |
returns a vector of positions that are found by jumping a fixed oriented distance along path(s) from the given position. if the position is not on a path, searches from the position to a path and adds/subtracts the search distance to the jump depending on the search direction. returns an empty vector if there is no path within the max search distance or if the jump distance goes past the end of the path
pair< vector< unordered_set< size_t > >, size_t > vg::algorithms::kargers_min_cut | ( | Graph | graph, |
const int | seed | ||
) |
void vg::algorithms::locally_expand_graph | ( | const HandleGraph & | parent, |
MutableHandleGraph & | subgraph, | ||
handle_t | from, | ||
int64_t | max_dist | ||
) |
Add to a subgraph (with same node ID space as parent) by walking forward from a given node and adding all walks up to a maximum distance away. The handle provided graph should be from the subgraph, not the parent graph.
void vg::algorithms::merge | ( | handlegraph::MutablePathDeletableHandleGraph * | graph, |
const vector< pair< handle_t, size_t >> & | start, | ||
size_t | length | ||
) |
Merge the given ranges of bases on the given handles together, rewriting paths. Sequences must match. Handles to a single node may occur no more than once.
size_t vg::algorithms::min_approx_path_distance | ( | const PathPositionHandleGraph * | graph, |
const pos_t & | pos1, | ||
const pos_t & | pos2, | ||
uint64_t | max_search | ||
) |
use the embedded paths to get an estimated minimum distance between the positions
vector< unordered_set< size_t > > vg::algorithms::min_cut_decomposition | ( | Graph | graph, |
const int | seed | ||
) |
unordered_map< path_handle_t, vector< pair< size_t, bool > > > vg::algorithms::multipath_alignment_path_offsets | ( | const PathPositionHandleGraph & | graph, |
const multipath_alignment_t & | mp_aln, | ||
const std::function< bool(const path_handle_t &)> * | path_filter = nullptr |
||
) |
Find the position of a multipath alignment on paths. Returns the lowest offset position on a path for each contiguous stretch of the multipath alignment, but multiple positions on the same path may be returned if the multipath alignment is disconnected or fans out toward the sources or sinks.
If path_filter is set, and it returns false for a path, that path is not used to annotate the read.
path_offset_collection_t vg::algorithms::nearest_offsets_in_paths | ( | const PathPositionHandleGraph * | graph, |
const pos_t & | pos, | ||
int64_t | max_search, | ||
const std::function< bool(const path_handle_t &)> * | path_filter = nullptr |
||
) |
Return, for the nearest position in a path to the given position, subject to the given max search distance, a mapping from path name to all positions on each path where that pos_t occurs. Stops search when path(s) are ancountered.
If path_filter is set, ignores paths for which it returns false.
map< pos_t, char > vg::algorithms::next_pos_chars | ( | const PathPositionHandleGraph & | graph, |
pos_t | pos | ||
) |
void vg::algorithms::normalize | ( | handlegraph::MutablePathDeletableHandleGraph * | graph, |
int | max_iter = 1 , |
||
bool | debug = false , |
||
function< bool(const handle_t &, const handle_t &)> | can_merge = nullptr |
||
) |
Normalize a graph, performing up to the given number of iterations. Simplifies siblings and unchops runs of nodes, in a loop.
if "can_merge" specified, it must return true in order for a pair of nodes to get merged
size_t vg::algorithms::num_components | ( | const HandleGraph & | graph | ) |
map< string, vector< pair< size_t, bool > > > vg::algorithms::offsets_in_paths | ( | const PathPositionHandleGraph * | graph, |
const pos_t & | pos | ||
) |
Wrapper for the above to support some earlier code. Only looks for paths that directly touch the position, and returns the paths by name.
ostream & vg::algorithms::operator<< | ( | ostream & | out, |
const Anchor & | anchor | ||
) |
Explain an Anchor to the given stream.
ostream & vg::algorithms::operator<< | ( | ostream & | out, |
const TracedScore & | value | ||
) |
Print operator.
std::ostream & vg::algorithms::operator<< | ( | std::ostream & | out, |
const kmer_t & | kmer | ||
) |
Print a kmer_t to a stream.
std::ostream & vg::algorithms::operator<< | ( | std::ostream & | out, |
const walk_t & | walk | ||
) |
Print a walk_t to a stream.
pair< double, double > vg::algorithms::packed_depth_of_bin | ( | const Packer & | packer, |
step_handle_t | start_step, | ||
step_handle_t | end_plus_one_step, | ||
size_t | min_coverage, | ||
bool | include_deletions | ||
) |
Estimate the coverage along a given reference path interval [start_step, end_plus_one_step) Coverage is obtained only from positions along the path, and variation is not counted Except if "include_deletions" is true, then reference path positions covered by a deletion edge (which is contained in the bin) will get the deletion edge's coverage counted. Other types of events (such as SNPs) can throw off coverage in similar ways but deletions tend to be bigger (and easier to find), so we hope that counting them is enough. If one wants to infer deletions from the coverage, obviously this should be false, but if looking for a background coverage for genotyping, then setting it to true may be helpful
void vg::algorithms::packed_depths | ( | const Packer & | packer, |
const string & | path_name, | ||
size_t | min_coverage, | ||
ostream & | out_stream | ||
) |
print path-name offset base-coverage for every base on a path (just like samtools depth) ignoring things below min_coverage. offsets are 1-based in output stream
std::function<size_t(const Alignment&, const HandleGraph&)> vg::algorithms::pad_band_constant | ( | size_t | band_padding | ) |
std::function<size_t(const Alignment&, const HandleGraph&)> vg::algorithms::pad_band_min_random_walk | ( | double | band_padding_multiplier, |
size_t | band_padding_memo_size, | ||
size_t | max_padding | ||
) |
std::function<size_t(const Alignment&, const HandleGraph&)> vg::algorithms::pad_band_random_walk | ( | double | band_padding_multiplier, |
size_t | band_padding_memo_size, | ||
size_t | max_padding | ||
) |
std::function<size_t(const Alignment&, const HandleGraph&)> vg::algorithms::pad_band_random_walk_internal | ( | double | band_padding_multiplier, |
size_t | band_padding_memo_size, | ||
size_t | max_padding, | ||
const std::function< size_t(const Alignment &, const HandleGraph &)> | size_function | ||
) |
pair< double, double > vg::algorithms::path_depth_of_bin | ( | const PathHandleGraph & | graph, |
step_handle_t | start_step, | ||
step_handle_t | end_plus_one_step, | ||
size_t | min_coverage, | ||
bool | count_cycles | ||
) |
like packed_depth_of_bin (above), but use paths (as in path_depths) for measuring coverage
void vg::algorithms::path_depths | ( | const PathHandleGraph & | graph, |
const string & | path_name, | ||
size_t | min_coverage, | ||
bool | count_cycles, | ||
ostream & | out_stream | ||
) |
print path-name offset base-coverage for every base on a path (just like samtools depth) ignoring things below min_coverage. offsets are 1-based in output stream coverage here is the number of steps from (unique) other paths
std::string vg::algorithms::path_string | ( | const HandleGraph & | graph, |
const Path & | path | ||
) |
use the given graph and the path to determine our path string
size_t vg::algorithms::prune_complex | ( | DeletableHandleGraph & | graph, |
int | path_length, | ||
int | edge_max | ||
) |
Take all nodes that would introduce paths of > edge_max edge crossings, remove them, and link their neighbors to head_node or tail_node depending on which direction the path extension was stopped. For pruning graph prior to indexing with gcsa2. Returns the number of edges removed.
size_t vg::algorithms::prune_complex_with_head_tail | ( | DeletableHandleGraph & | graph, |
int | path_length, | ||
int | edge_max | ||
) |
Wrap the graph with heads and tails (for GCSA2 indexing) and then prune as with prune_complex. Returns the number of edges removed.
size_t vg::algorithms::prune_short_subgraphs | ( | DeletableHandleGraph & | graph, |
int | min_size | ||
) |
Remove any weakly connected components that have total sequence length under the minimum size. Returns the number of nodes removed.
void vg::algorithms::prune_to_connecting_graph | ( | DeletableHandleGraph & | graph, |
const handle_t & | from, | ||
const handle_t & | to | ||
) |
Remove all parts of the graph that are not on some path between the two handles.
void vg::algorithms::reallocate_atomic_int_vector | ( | vector< atomic< Int1 >> *& | vec1, |
vector< atomic< Int2 >> *& | vec2 | ||
) |
int64_t vg::algorithms::ref_path_distance | ( | const PathPositionHandleGraph * | graph, |
const pos_t & | pos_1, | ||
const pos_t & | pos_2, | ||
const unordered_set< path_handle_t > & | ref_paths, | ||
int64_t | max_search_dist | ||
) |
Search the local region around two positions and return the longest distance between them along any paths found during this search. Returns numeric_limits<int64_t>::max() if no shared path is found.
size_t vg::algorithms::remove_high_degree_nodes | ( | DeletableHandleGraph & | graph, |
int | max_degree | ||
) |
Remove nodes with >= max_degree total edges on each side. Note that end-to-start self loops count twice. Returns the number of nodes removed.
pair<double, double> vg::algorithms::sample_gam_depth | ( | const HandleGraph & | graph, |
const vector< Alignment > & | alignments, | ||
size_t | max_nodes, | ||
size_t | random_seed, | ||
size_t | min_coverage, | ||
size_t | min_mapq | ||
) |
pair< double, double > vg::algorithms::sample_mapping_depth | ( | const HandleGraph & | graph, |
const string & | input_filename, | ||
size_t | max_nodes, | ||
size_t | random_seed, | ||
size_t | min_coverage, | ||
size_t | min_mapq, | ||
const string & | format = "GAM" |
||
) |
Return the mean and variance of coverage of randomly sampled nodes from a mappings file Nodes with less than min_coverage are ignored The input_filename can be - for stdin The stream is scanned in parallel with all threads max_nodes is used to keep memory down valid formats are "GAM" and "GAF"
pair<double, double> vg::algorithms::sample_mapping_depth | ( | const HandleGraph & | graph, |
const vector< Alignment > & | alignments, | ||
size_t | max_nodes, | ||
size_t | random_seed, | ||
size_t | min_coverage, | ||
size_t | min_mapq | ||
) |
As above, but read a vector instead of a stream.
int vg::algorithms::score_best_chain | ( | const VectorView< Anchor > & | to_chain, |
const SnarlDistanceIndex & | distance_index, | ||
const HandleGraph & | graph, | ||
int | gap_open, | ||
int | gap_extension | ||
) |
Score the given group of items. Determines the best score that can be obtained by chaining items together.
Input items must be sorted by start position in the read.
size_t vg::algorithms::shortest_cycle_length | ( | const HandleGraph * | graph | ) |
Returns the length of the shortest cycle in the entire graph, or numeric_limits<size_t>::max() if no cycle exists.
size_t vg::algorithms::shortest_cycle_length | ( | const HandleGraph * | graph, |
const handle_t & | source | ||
) |
Returns the length of the shortest cycle containing the source node, or numeric_limits<size_t>::max() if no cycle exists.
size_t vg::algorithms::shortest_cycle_length_internal | ( | const HandleGraph * | graph, |
const handle_t & | source, | ||
const vector< handle_t > & | layout, | ||
const unordered_map< handle_t, size_t > & | handle_index, | ||
const vector< pair< size_t, size_t >> & | feedback_edges | ||
) |
path_offset_collection_t vg::algorithms::simple_offsets_in_paths | ( | const PathPositionHandleGraph * | graph, |
pos_t | pos | ||
) |
A "simple" model for path position getting for debugging.
bool vg::algorithms::simplify_siblings | ( | handlegraph::MutablePathDeletableHandleGraph * | graph, |
function< bool(const handle_t &, const handle_t &)> | can_merge = nullptr |
||
) |
Simplify siblings in the given graph.
When one base has two successors with the same base value, and those successors have the same set of predecessors, the successors will be merged.
Performs only a subset of the possible merges. Can only merge in from one side of a given node in a single invocation. Returns true if it made progress and there may be more merging to do.
Preserves paths.
Optional can_merge callback will only let nodes get merged together if this pairwise check returns true.
void vg::algorithms::sort_and_shadow | ( | const std::vector< Anchor > & | items, |
std::vector< size_t > & | indexes | ||
) |
Get rid of items that are shadowed or contained by (or are identical to) others.
Erases items that didn't survive from indexes, and sorts them by read start position.
void vg::algorithms::sort_and_shadow | ( | std::vector< Anchor > & | items | ) |
Get rid of items that are shadowed or contained by (or are identical to) others.
Erases items that didn't survive from items, and sorts them by read start position.
void vg::algorithms::sort_path_offsets | ( | path_offset_collection_t & | offsets | ) |
Sort path offsets, so intersect_path_offsets() can use them as a target.
vector< pair< id_t, id_t > > vg::algorithms::sorted_id_ranges | ( | const HandleGraph * | graph | ) |
Get a sorted list of inclusive ranges of IDs used in the given HandleGraph.
void vg::algorithms::three_edge_connected_component_merges | ( | const function< void(const function< void(TECCNode)> &)> & | for_each_node, |
const function< void(TECCNode, const function< void(TECCNode)> &)> & | for_each_connected_node, | ||
const function< void(TECCNode, TECCNode)> & | same_component | ||
) |
Get the three-edge-connected components of an arbitrary graph (not necessarily a handle graph). Only recognizes one kind of edge and one kind of node. Nodes are arbitrary value types (which may need to be hashable).
Takes a function that loops an iteratee over all nodes, and a function that, given a node, loops an iteratee over all nodes connected to it.
Calls same_component with pairs of nodes in (at least) a spanning tree of the set of nodes in each component (not restricted to the input graph). Doing merge operations on a union-find can get you the set of components. The callback MUST NOT modify the graph!
If you have a graph where you can easily rank the nodes, don't use this. Use three_edge_connected_components_dense() instead. The first thing this function does is asign nodes a dense, 0-based rank space.
void vg::algorithms::three_edge_connected_component_merges_dense | ( | size_t | node_count, |
size_t | first_root, | ||
const function< void(size_t, const function< void(size_t)> &)> & | for_each_connected_node, | ||
const function< void(size_t, size_t)> & | same_component | ||
) |
Get the three-edge-connected components of an arbitrary graph (not necessarily a handle graph). Only recognizes one kind of edge and one kind of node. Nodes are dense positive integers starting with 0.
Takes a total node count, a suggested root (or 0), and a function that, given a node, loops an iteratee over all nodes connected to it.
Calls same_component with pairs of nodes in (at least) a spanning tree of the set of nodes in each component (not restricted to the input graph). Doing merge operations on a union-find can get you the set of components. The callback MUST NOT modify the graph!
This defines the data we track for each node in the graph
When in the DFS were we first visited?
When in the DFS were we last visited? Needed for finding replacement neighbors to implement path range absorption in part 1.3, when we're asked for a range to a neighbor that got eaten.
What is our "low point" in the search. This is the earliest dfs_counter for a node that this node or any node in its DFS subtree has a back-edge to.
What is the effective degree of this node in the graph with all the absorb-eject modifications applied?
What node has the continuation of this node's path? If equal to numeric_limits<number_t>::max(), the path ends after here. The node's path is the path from this node, into its DFS subtree, to (one of) the nodes in the subtree that has the back-edge that caused this node's low point to be so low. Basically a low point traceback.
Is this node actually on its own path? Nodes can be removed from their paths if those nodes don't matter any more (i.e. got absorbed) but their paths still need to be tails for other paths.
Has the node been visited yet? Must be 0. TODO: Move to its own vector to make zeroing them all free-ish with page table shenanigans.
Track the node that this stack frame represents
Track all the neighbors left to visit. When we visit a neighbor we pop it off the back.
When we look at the neighbors, we need to be able to tell the tree edge to the parent from further back edges to the parent. So we have a flag for whether we have seen the parent tree edge already, and the first neighbors entry that is our parent will get called the tree edge.
Track whether we made a recursive DFS call into the last neighbor or not. If we did, we need to do some work when we come out of it and return to this frame.
void vg::algorithms::three_edge_connected_components | ( | const function< void(const function< void(TECCNode)> &)> & | for_each_node, |
const function< void(TECCNode, const function< void(TECCNode)> &)> & | for_each_connected_node, | ||
const function< void(const function< void(const function< void(TECCNode)> &)> &)> & | component_callback | ||
) |
Get the three-edge-connected components of an arbitrary graph (not necessarily a handle graph). Only recognizes one kind of edge and one kind of node. Nodes are arbitrary value types (which may need to be hashable).
Takes a function that loops an iteratee over all nodes, and a function that, given a node, loops an iteratee over all nodes connected to it.
For each component identified, calls the given callback with a function that iterates over all nodes in the component.
If you have a graph where you can easily rank the nodes, don't use this. Use three_edge_connected_components_dense() instead. The first thing this function does is asign nodes a dense, 0-based rank space.
void vg::algorithms::three_edge_connected_components_dense | ( | size_t | node_count, |
size_t | first_root, | ||
const function< void(size_t, const function< void(size_t)> &)> & | for_each_connected_node, | ||
const function< void(const function< void(const function< void(size_t)> &)> &)> & | component_callback | ||
) |
Get the three-edge-connected components of an arbitrary graph (not necessarily a handle graph). Only recognizes one kind of edge and one kind of node. Nodes are dense positive integers starting with 0.
Takes a total node count, a suggested root (or 0), and a function that, given a node, loops an iteratee over all nodes connected to it.
For each component identified, calls the given callback with a function that iterates over all nodes in the component.
void vg::algorithms::three_edge_connected_components_dense_cactus | ( | size_t | node_count, |
const function< void(size_t, const function< void(size_t)> &)> & | for_each_connected_node, | ||
const function< void(const function< void(const function< void(size_t)> &)> &)> & | component_callback | ||
) |
Get the three-edge-connected components of an arbitrary graph (not necessarily a handle graph). Only recognizes one kind of edge and one kind of node. Nodes are dense positive integers starting with 0.
Wraps the known good the 3 edge connected components algorithm from the pinchesAndCacti library.
Takes a total node count, and a function that, given a node, loops an iteratee over all nodes connected to it.
For each component identified, calls the given callback with a function that iterates over all nodes in the component.
void vg::algorithms::traverse_components | ( | const HandleGraph & | graph, |
function< void(void)> & | on_new_comp, | ||
function< void(handle_t)> & | on_node | ||
) |
uint64_t vg::algorithms::walk_haplotype_frequency | ( | const HandleGraph & | graph, |
const gbwt::GBWT & | haplotypes, | ||
const walk_t & | walk | ||
) |
std::vector< std::string > vg::algorithms::walk_haplotype_names | ( | const HandleGraph & | graph, |
const gbwt::GBWT & | haplotypes, | ||
const walk_t & | walk | ||
) |
pair< double, vector< handle_t > > vg::algorithms::widest_dijkstra | ( | const HandleGraph * | g, |
handle_t | source, | ||
handle_t | sink, | ||
function< double(const handle_t &)> | node_weight_callback, | ||
function< double(const edge_t &)> | edge_weight_callback, | ||
function< bool(const handle_t &)> | is_node_ignored_callback, | ||
function< bool(const edge_t &)> | is_edge_ignored_callbback, | ||
bool | greedy_avg = false |
||
) |
This Dijkstra is the same underlying algorithm as the one in dijkstra.hpp but the interface is different enough that I opted to make it a seprate thing rather than add loads of optional arguments. The key differences are these generalizations: – looks for the "widest" path (maximum minimum weight) instead of shortest – counts node and edge weights (via callbakcs) – returns the path as well as the score – option for ignoring certain nodes and edges in search (required by Yen's algorithm) – greedy_avg option switches the algorithm to a heuristic (no optimal guarantee) search using the running averages support instead of min-flow support as objective function.
vector< pair< double, vector< handle_t > > > vg::algorithms::yens_k_widest_paths | ( | const HandleGraph * | g, |
handle_t | source, | ||
handle_t | sink, | ||
size_t | K, | ||
function< double(const handle_t &)> | node_weight_callback, | ||
function< double(const edge_t &)> | edge_weight_callback, | ||
bool | greedy_avg = false , |
||
size_t | max_path_length = numeric_limits< size_t >::max() |
||
) |
Find the k widest paths Search is aborted (and current list returned) if a path longer than max_path_length is added to the results
std::function<size_t(const Alignment&, const HandleGraph&)> vg::algorithms::pad_band_constant(size_t band_padding) |
Get a band padding function that uses a constant value.
std::function<size_t(const Alignment&, const HandleGraph&)> vg::algorithms::pad_band_min_random_walk(double band_padding_multiplier=1.0, size_t band_padding_memo_size=2000, size_t max_padding=std::numeric_limits< size_t >::max()) |
Get a band padding function that scales the expected distance of a random walk, memoized out to the given length, using the minimum of graph size and read size as the length.
std::function<size_t(const Alignment&, const HandleGraph&)> vg::algorithms::pad_band_random_walk(double band_padding_multiplier=1.0, size_t band_padding_memo_size=2000, size_t max_padding=std::numeric_limits< size_t >::max()) |
Get a band padding function that scales with the expected distance of a random walk, memoized out to the given length.
|
constexpr |