|
| ZipCodeForest () |
| Constructor. More...
|
|
template<typename Minimizer > |
void | fill_in_forest (const vector< Seed > &seeds, const VectorView< Minimizer > &minimizers, const SnarlDistanceIndex &distance_index, size_t gap_distance_limit, size_t distance_limit=std::numeric_limits< size_t >::max()) |
|
template<typename Minimizer > |
void | print_self (const vector< Seed > *seeds, const VectorView< Minimizer > *minimizers) const |
|
void | validate_zip_forest (const SnarlDistanceIndex &distance_index, const vector< Seed > *seeds, size_t distance_limit=std::numeric_limits< size_t >::max()) const |
|
template<typename Minimizer > |
void | get_cyclic_snarl_intervals (forest_growing_state_t &forest_state, const VectorView< Minimizer > &minimizers, const ZipCodeForest::interval_state_t &snarl_interval, const ZipCodeForest::interval_state_t &parent_interval, const forward_list< ZipCodeForest::interval_state_t > &child_intervals, forward_list< ZipCodeForest::interval_state_t > &next_intervals) const |
|
|
void | sort_one_interval (forest_growing_state_t &forest_state, const interval_state_t &interval) const |
|
void | radix_sort_zipcodes (vector< size_t > &zipcode_sort_order, const vector< sort_value_t > &sort_values_by_seed, const interval_state_t &interval, bool reverse_order, size_t min_value, size_t max_value, bool sort_by_chain_component=false) const |
|
void | default_sort_zipcodes (vector< size_t > &zipcode_sort_order, const vector< sort_value_t > &sort_values_by_seed, const interval_state_t &interval, bool reverse_order) const |
|
void | get_next_intervals (forest_growing_state_t &forest_state, const interval_state_t &interval, std::forward_list< interval_state_t > &next_intervals) const |
|
template<typename Minimizer > |
void | get_cyclic_snarl_intervals (forest_growing_state_t &forest_state, const VectorView< Minimizer > &minimizers, const interval_state_t &snarl_interval, const interval_state_t &parent_interval, const forward_list< interval_state_t > &child_intervals, forward_list< interval_state_t > &next_intervals) const |
|
void | open_chain (forest_growing_state_t &forest_state, const size_t &depth, size_t seed_index, bool chain_is_reversed) |
|
void | close_chain (forest_growing_state_t &forest_state, const size_t &depth, const Seed &last_seed, bool chain_is_reversed) |
|
void | add_child_to_chain (forest_growing_state_t &forest_state, const size_t &depth, const size_t &seed_index, bool child_is_reversed, bool chain_is_reversed) |
|
void | open_snarl (forest_growing_state_t &forest_state, const size_t &depth) |
|
void | close_snarl (forest_growing_state_t &forest_state, const size_t &depth, const Seed &last_seed, bool last_is_reversed, bool is_cyclic_snarl) |
|
void | add_snarl_distances (forest_growing_state_t &forest_state, const size_t &depth, const Seed &seed, bool child_is_reversed, bool snarl_is_reversed, bool to_snarl_end, bool is_cyclic_snarl) |
|
A collection of ZipCodeTrees The ZipCodeForest takes a set of seeds and makes ZipCodeTrees There will be a separate tree for each connected component or slice of a chain that is too far from anything else on both sides, using the given distance limit
template<typename Minimizer >
void vg::ZipCodeForest::fill_in_forest |
( |
const vector< Seed > & |
seeds, |
|
|
const VectorView< Minimizer > & |
minimizers, |
|
|
const SnarlDistanceIndex & |
distance_index, |
|
|
size_t |
gap_distance_limit, |
|
|
size_t |
distance_limit = std::numeric_limits<size_t>::max() |
|
) |
| |
Populate the zip forest If a distance limit is given, then also partition the tree into subtrees that are farther than the distance_limit from each other Otherwise, the forest will just be connected components The gap_distance_limit is the limit for making runs of seeds in a cyclic snarl- it should be roughly the distance that the dynamic programming is willing to jump to connect two consecutive minimizers TODO: I think the distance_limit should just be the same as the gap_distance_limit If a distance_limit is given, then distances larger than the distance limit are not guaranteed to be accurate, but will be greater than the distance_limit
template<typename Minimizer >
Given intervals representing child chains on a cyclic snarl, re-partition them and get new intervals representing runs of seeds that are "close" in each chain. Like in get_next_intervals, new intervals are added to next_intervals in their sort order. Two seeds are close to each other if: (1) the distance between them on the read is <= t, where t is a given distance limit, (2) the minimum distance between them on the chain is <= t, and (3) they are on the same strand in the read. Runs are sorted by their latest position in the read, and oriented according to the orientation of the read through the snarl. The orientation of the read in the snarl's parent chain and in the snarl children are estimated by finding the spearman correlation of the seeds. If the orientation of a run is unclear, then it is duplicated to be oriented in each direction