|
| OrientedDistanceClusterer (OrientedDistanceMeasurer &distance_measurer, size_t max_expected_dist_approx_error=8) |
| Constructor. More...
|
|
vector< pair< pair< size_t, size_t >, int64_t > > | pair_clusters (const Alignment &alignment_1, const Alignment &alignment_2, const vector< cluster_t * > &left_clusters, const vector< cluster_t * > &right_clusters, const vector< pair< size_t, size_t >> &left_alt_cluster_anchors, const vector< pair< size_t, size_t >> &right_alt_cluster_anchors, int64_t optimal_separation, int64_t max_deviation) |
| Concrete implementation of virtual method from MEMClusterer. More...
|
|
| MEMClusterer ()=default |
|
virtual | ~MEMClusterer ()=default |
|
vector< cluster_t > | clusters (const Alignment &alignment, const vector< MaximalExactMatch > &mems, const GSSWAligner *Aligner, size_t min_mem_length=1, int32_t max_qual_score=60, int32_t log_likelihood_approx_factor=0, size_t min_median_mem_coverage_for_split=0, double suboptimal_edge_pruning_factor=.75, double cluster_multiplicity_diff=10.0, const match_fanouts_t *fanouts=nullptr) |
|
|
unordered_map< pair< size_t, size_t >, int64_t > | get_on_strand_distance_tree (size_t num_items, const function< pos_t(size_t)> &get_position, const function< int64_t(size_t)> &get_offset) |
|
void | extend_dist_tree_by_permutations (const function< pos_t(size_t)> &get_position, const function< int64_t(size_t)> &get_offset, size_t num_items, int64_t max_failed_distance_probes, size_t decrement_frequency, unordered_map< pair< size_t, size_t >, int64_t > &recorded_finite_dists, map< pair< size_t, size_t >, size_t > &num_infinite_dists, UnionFind &component_union_find, size_t &num_possible_merges_remaining) |
|
void | extend_dist_tree_by_buckets (const function< pos_t(size_t)> &get_position, const function< int64_t(size_t)> &get_offset, size_t num_items, unordered_map< pair< size_t, size_t >, int64_t > &recorded_finite_dists, UnionFind &component_union_find, size_t &num_possible_merges_remaining) |
|
void | exclude_dist_tree_merges (const function< pos_t(size_t)> &get_position, map< pair< size_t, size_t >, size_t > &num_infinite_dists, UnionFind &component_union_find, size_t &num_possible_merges_remaining, int64_t max_failed_distance_probes) |
|
vector< unordered_map< size_t, int64_t > > | flatten_distance_tree (size_t num_items, const unordered_map< pair< size_t, size_t >, int64_t > &recorded_finite_dists) |
|
vector< pair< size_t, size_t > > | compute_tail_mem_coverage (const Alignment &alignment, const vector< MaximalExactMatch > &mems) |
|
HitGraph | make_hit_graph (const Alignment &alignment, const vector< MaximalExactMatch > &mems, const GSSWAligner *aligner, size_t min_mem_length, const match_fanouts_t *fanouts) |
| Concrete implementation of virtual method from MEMClusterer. More...
|
|
int32_t | estimate_edge_score (const MaximalExactMatch *mem_1, const MaximalExactMatch *mem_2, int64_t graph_dist, const GSSWAligner *aligner) const |
|
void | deduplicate_cluster_pairs (vector< pair< pair< size_t, size_t >, int64_t >> &cluster_pairs, int64_t optimal_separation) |
|
vector< unordered_map< size_t, int64_t > > vg::OrientedDistanceClusterer::flatten_distance_tree |
( |
size_t |
num_items, |
|
|
const unordered_map< pair< size_t, size_t >, int64_t > & |
recorded_finite_dists |
|
) |
| |
|
protected |
Given a number of nodes, and a map from node pair to signed relative distance on a consistent strand (defining a forrest of trees, as generated by get_on_strand_distance_tree()), flatten all the trees.
Returns a vector of maps from node ID to relative position in linear space, one map per input tree.
Assumes all the distances are transitive, even though this isn't quite true in graph space.
unordered_map< pair< size_t, size_t >, int64_t > vg::OrientedDistanceClusterer::get_on_strand_distance_tree |
( |
size_t |
num_items, |
|
|
const function< pos_t(size_t)> & |
get_position, |
|
|
const function< int64_t(size_t)> & |
get_offset |
|
) |
| |
|
protected |
Given a certain number of items, and a callback to get each item's position, and a callback to a fixed offset from that position build a distance forest with trees for items that we can verify are on the same strand of the same molecule.
We use the distance approximation to cluster the MEM hits according to the strand they fall on using the oriented distance estimation function.
Returns a map from item pair (lower number first) to distance (which may be negative) from the first to the second along the items' forward strand.