vg
tools for working with variation graphs
|
#include <minimizer_mapper.hpp>
Classes | |
struct | aligner_stats_t |
Struct to represent per-DP-method stats. More... | |
struct | Minimizer |
Public Types | |
enum | RescueAlgorithm { rescue_none, rescue_dozeu, rescue_gssw } |
Implemented rescue algorithms: no rescue, dozeu, GSSW. More... | |
typedef SnarlDistanceIndexClusterer::Seed | Seed |
The information we store for each seed. More... | |
Public Member Functions | |
MinimizerMapper (const gbwtgraph::GBWTGraph &graph, const gbwtgraph::DefaultMinimizerIndex &minimizer_index, SnarlDistanceIndex *distance_index, const ZipCodeCollection *zipcodes, const PathPositionHandleGraph *path_graph=nullptr) | |
virtual void | set_alignment_scores (const int8_t *score_matrix, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus) |
void | map (Alignment &aln, AlignmentEmitter &alignment_emitter) |
vector< Alignment > | map (Alignment &aln) |
vector< Alignment > | map_from_chains (Alignment &aln) |
vector< Alignment > | map_from_extensions (Alignment &aln) |
pair< vector< Alignment >, vector< Alignment > > | map_paired (Alignment &aln1, Alignment &aln2, vector< pair< Alignment, Alignment >> &ambiguous_pair_buffer) |
pair< vector< Alignment >, vector< Alignment > > | map_paired (Alignment &aln1, Alignment &aln2) |
bool | fragment_distr_is_finalized () |
void | finalize_fragment_length_distr () |
void | force_fragment_length_distr (double mean, double stdev) |
double | get_fragment_length_mean () const |
double | get_fragment_length_stdev () const |
size_t | get_fragment_length_sample_size () const |
size_t | get_distance_limit (size_t read_length) const |
virtual void | set_alignment_scores (int8_t match, int8_t mismatch, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus) |
Set all the aligner scoring parameters and create the stored aligner instances. More... | |
virtual void | set_alignment_scores (std::istream &matrix_stream, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus) |
virtual void | set_alignment_scores (const int8_t *score_matrix, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus) |
![]() | |
virtual void | set_alignment_scores (int8_t match, int8_t mismatch, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus) |
Set all the aligner scoring parameters and create the stored aligner instances. More... | |
virtual void | set_alignment_scores (std::istream &matrix_stream, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus) |
Static Public Attributes | |
static constexpr size_t | default_hit_cap = 10 |
Use all minimizers with at most hit_cap hits. More... | |
static constexpr size_t | default_hard_hit_cap = 500 |
Ignore all minimizers with more than hard_hit_cap hits. More... | |
static constexpr double | default_minimizer_score_fraction = 0.9 |
static constexpr size_t | default_minimizer_downsampling_window_count = 0 |
Window count for minimizer downsampling. More... | |
static constexpr size_t | default_minimizer_downsampling_max_window_length = std::numeric_limits<size_t>::max() |
static constexpr size_t | default_minimizer_coverage_flank = 250 |
static constexpr size_t | default_max_unique_min = 500 |
Maximum number of distinct minimizers to take. More... | |
static constexpr size_t | default_num_bp_per_min = 1000 |
Number of minimzers to select based on read_len/num_min_per_bp. More... | |
static constexpr bool | default_exclude_overlapping_min = false |
If set, exclude overlapping minimizers. More... | |
static constexpr size_t | default_min_extensions = 2 |
Accept at least this many clusters for gapless extension. More... | |
static constexpr size_t | default_max_extensions = 800 |
How many clusters should we produce gapless extensions for, max? More... | |
static constexpr double | default_cluster_score_threshold = 50 |
this much, then don't extend it More... | |
static constexpr double | default_pad_cluster_score_threshold = 20 |
static constexpr double | default_cluster_coverage_threshold = 0.3 |
static constexpr double | default_extension_set_score_threshold = 20 |
static constexpr int | default_extension_score_threshold = 1 |
static constexpr int | default_min_extension_sets = 2 |
static constexpr int | default_extension_set_min_score = 20 |
static constexpr size_t | default_max_local_extensions = numeric_limits<size_t>::max() |
How many extensions should we try as seeds within a mapping location? More... | |
static constexpr size_t | default_max_alignments = 8 |
How many alignments should we make, max? More... | |
static constexpr size_t | default_max_extension_mismatches = GaplessExtender::MAX_MISMATCHES |
static constexpr bool | default_align_from_chains = false |
static constexpr double | default_zipcode_tree_scale = 2.0 |
static constexpr double | default_zipcode_tree_score_threshold = 50 |
How far do we want to go down looking at zip code trees to make fragments? More... | |
static constexpr double | default_pad_zipcode_tree_score_threshold = 20 |
static constexpr double | default_zipcode_tree_coverage_threshold = 0.3 |
static constexpr size_t | default_min_to_fragment = 4 |
How many things should we produce fragments for, min? More... | |
static constexpr size_t | default_max_to_fragment = 10 |
How many things should we produce fragments for, max? More... | |
static constexpr size_t | default_gapless_extension_limit = 0 |
static constexpr size_t | default_fragment_max_lookback_bases = 300 |
How many bases should we look back when making fragments? More... | |
static constexpr double | default_fragment_max_lookback_bases_per_base = 0.03 |
How many bases should we look back when making fragments, per base of read length? More... | |
static constexpr size_t | default_max_fragments = std::numeric_limits<size_t>::max() |
How many fragments should we try and make when fragmenting something? More... | |
static constexpr double | default_fragment_gap_scale = 1.0 |
static constexpr double | default_fragment_points_per_possible_match = 0 |
static constexpr size_t | default_fragment_max_indel_bases = 2000 |
How many bases of indel should we allow in fragments? More... | |
static constexpr double | default_fragment_max_indel_bases_per_base = 0.2 |
How many bases of indel should we allow in fragments per base of read length? More... | |
static constexpr size_t | default_max_chain_connection = 100 |
static constexpr size_t | default_max_tail_length = 100 |
Similarly, what is the maximum tail length we will try to WFA align? More... | |
static constexpr double | default_fragment_score_fraction = 0.1 |
static constexpr double | default_fragment_max_min_score = std::numeric_limits<double>::max() |
How high should we get the score threshold based on the best fragment's score get? More... | |
static constexpr double | default_fragment_min_score = 60 |
static constexpr double | default_fragment_set_score_threshold = 0 |
static constexpr int | default_min_chaining_problems = 1 |
static constexpr int | default_max_chaining_problems = std::numeric_limits<int>::max() |
Do no more than this many chaining problems. More... | |
static constexpr size_t | default_max_direct_to_chain = 0 |
static constexpr size_t | default_max_lookback_bases = 3000 |
How many bases should we look back when chaining? More... | |
static constexpr double | default_max_lookback_bases_per_base = 0.3 |
How many bases should we look back when chaining, per base of read length? More... | |
static constexpr int | default_item_bonus = 0 |
static constexpr double | default_item_scale = 1.0 |
static constexpr double | default_gap_scale = 1.0 |
static constexpr double | default_points_per_possible_match = 0 |
static constexpr size_t | default_max_indel_bases = 2000 |
How many bases of indel should we allow in chaining? More... | |
static constexpr double | default_max_indel_bases_per_base = 0.2 |
How many bases of indel should we allow in chaining, per base of read length? More... | |
static constexpr double | default_chain_score_threshold = 100 |
static constexpr int | default_min_chains = 4 |
static constexpr size_t | default_max_chains_per_tree = 1 |
Allow up to this many chains per tree. More... | |
static constexpr double | default_min_chain_score_per_base = 0.01 |
static constexpr int | default_max_min_chain_score = 200 |
Limit the min chain score to no more than this. More... | |
static constexpr size_t | default_max_skipped_bases = 0 |
static constexpr size_t | default_max_tail_dp_length = 30000 |
static constexpr size_t | default_max_middle_dp_length = std::numeric_limits<int32_t>::max() |
How long of a DP can we do before something might go wrong with BandedGlobalAligner or the GBWT-based WFA? More... | |
static constexpr size_t | default_max_dp_cells = std::numeric_limits<size_t>::max() |
static constexpr size_t | default_max_tail_gap = std::numeric_limits<size_t>::max() |
How many gap bases should we allow in a Dozeu tail alignment, max? More... | |
static constexpr size_t | default_max_middle_gap = std::numeric_limits<size_t>::max() |
How many gap bases should we allow in a between-seed alignment, max? More... | |
static constexpr int | default_wfa_max_mismatches = 2 |
How many mismatch bases (or equivalent score of indels) should we allow in WFA connections and tails? More... | |
static constexpr double | default_wfa_max_mismatches_per_base = 0.1 |
How many mismatch bases (or equivalent score of indels) should we allow in WFA connections and tails per base of read sequence? More... | |
static constexpr int | default_wfa_max_max_mismatches = 20 |
How many mismatch bases (or equivalent score of indels) should we allow in WFA connections and tails maximum, at any read length? More... | |
static constexpr int | default_wfa_distance = WFAExtender::ErrorModel::default_distance().min |
How far behind the leader should the WFA be allowed to get? More... | |
static constexpr double | default_wfa_distance_per_base = WFAExtender::ErrorModel::default_distance().per_base |
How far behind the leader should the WFA be allowed to get, per base of read sequence? More... | |
static constexpr int | default_wfa_max_distance = WFAExtender::ErrorModel::default_distance().max |
How far behind the leader should the WFA be allowed to get, at any read length? More... | |
static constexpr bool | default_sort_by_chain_score = false |
Should alignments be ranked by chain score instead of base-level score? More... | |
static constexpr double | default_min_unique_node_fraction = 0.0 |
How much of an alignment needs to be from distinct nodes to be a distinct alignment? More... | |
static constexpr bool | default_use_explored_cap = false |
static constexpr size_t | default_mapq_score_window = 0 |
What number of bp should we re-scale scores to for MAPQ, for calibration? 0 for off. More... | |
static constexpr double | default_mapq_score_scale = 1.0 |
How should we scale scores before mapq, for calibration. More... | |
static constexpr size_t | default_max_multimaps = 1 |
static constexpr size_t | default_distance_limit = 200 |
static constexpr bool | default_do_dp = true |
If false, skip computing base-level alignments. More... | |
static constexpr bool | default_set_refpos = false |
Set refpos field of alignments to positions on nodes they visit. More... | |
static constexpr bool | default_track_provenance = false |
static constexpr bool | default_track_correctness = false |
static constexpr bool | default_track_position = false |
Track linear reference position for placements in log output. More... | |
static constexpr bool | default_show_work = false |
If set, log what the mapper is thinking in its mapping of each read. More... | |
static constexpr double | default_paired_distance_stdevs = 2.0 |
static constexpr double | default_paired_rescue_score_limit = 0.9 |
How close does an alignment have to be to the best alignment for us to rescue on it. More... | |
static constexpr double | default_rescue_subgraph_stdevs = 4.0 |
How many stdevs from the mean do we extract a subgraph from? More... | |
static constexpr size_t | default_rescue_seed_limit = 100 |
Do not attempt rescue if there are more seeds in the rescue subgraph. More... | |
static constexpr size_t | default_max_rescue_attempts = 15 |
For paired end mapping, how many times should we attempt rescue (per read)? More... | |
static constexpr size_t | default_max_dozeu_cells = (size_t)(1.5 * 1024 * 1024) |
static constexpr size_t | default_max_fragment_length = 2000 |
What is the maximum fragment length that we accept as valid for paired-end reads? More... | |
Protected Types | |
typedef SnarlDistanceIndexClusterer::Cluster | Cluster |
The information we store for each cluster. More... | |
using | ImmutablePath = structures::ImmutableList< Mapping > |
Protected Member Functions | |
double | distance_to_annotation (int64_t distance) const |
std::vector< algorithms::Anchor > | to_anchors (const Alignment &aln, const VectorView< Minimizer > &minimizers, std::vector< Seed > &seeds) const |
Convert a collection of seeds to a collection of chaining anchors. More... | |
WFAAlignment | to_wfa_alignment (const algorithms::Anchor &anchor, const Alignment &aln, const Aligner *aligner) const |
std::vector< Minimizer > | find_minimizers (const std::string &sequence, Funnel &funnel) const |
void | flag_repetitive_minimizers (std::vector< Minimizer > &minimizers_in_read_order) const |
std::vector< size_t > | sort_minimizers_by_score (const std::vector< Minimizer > &minimizers_in_read_order, LazyRNG &rng) const |
std::vector< Seed > | find_seeds (const std::vector< Minimizer > &minimizers_in_read_order, const VectorView< Minimizer > &minimizers, const Alignment &aln, Funnel &funnel) const |
void | tag_seeds (const Alignment &aln, const std::vector< Seed >::const_iterator &begin, const std::vector< Seed >::const_iterator &end, const VectorView< Minimizer > &minimizers, size_t funnel_offset, Funnel &funnel) const |
void | score_cluster (Cluster &cluster, size_t i, const VectorView< Minimizer > &minimizers, const std::vector< Seed > &seeds, size_t seq_length, Funnel &funnel) const |
std::pair< double, double > | score_tree (const ZipCodeForest &zip_code_forest, size_t i, const VectorView< Minimizer > &minimizers, const std::vector< Seed > &seeds, size_t seq_length, Funnel &funnel) const |
vector< GaplessExtension > | extend_seed_group (const std::vector< size_t > &seed_group, size_t source_num, const VectorView< Minimizer > &minimizers, const std::vector< Seed > &seeds, const string &sequence, size_t max_mismatches, vector< vector< size_t >> *minimizer_kept_count=nullptr, Funnel *funnel=nullptr, std::vector< std::vector< size_t >> *seeds_used=nullptr) const |
std::vector< int > | score_extensions (const std::vector< std::vector< GaplessExtension >> &extensions, const Alignment &aln, Funnel &funnel) const |
std::vector< int > | score_extensions (const std::vector< std::pair< std::vector< GaplessExtension >, size_t >> &extensions, const Alignment &aln, Funnel &funnel) const |
double | get_read_coverage (const Alignment &aln, const VectorView< std::vector< size_t >> &seed_sets, const std::vector< Seed > &seeds, const VectorView< Minimizer > &minimizers) const |
void | do_fragmenting_on_trees (Alignment &aln, const ZipCodeForest &zip_code_forest, const std::vector< Seed > &seeds, const VectorView< MinimizerMapper::Minimizer > &minimizers, const vector< algorithms::Anchor > &seed_anchors, std::vector< std::vector< size_t >> &fragments, std::vector< double > &fragment_scores, std::vector< algorithms::Anchor > &fragment_anchors, std::vector< size_t > &fragment_source_tree, std::vector< std::vector< size_t >> &minimizer_kept_fragment_count, std::vector< double > &multiplicity_by_fragment, std::vector< Alignment > &alignments, SmallBitset &minimizer_explored, vector< double > &multiplicity_by_alignment, LazyRNG &rng, Funnel &funnel) const |
void | do_chaining_on_fragments (Alignment &aln, const ZipCodeForest &zip_code_forest, const std::vector< Seed > &seeds, const VectorView< MinimizerMapper::Minimizer > &minimizers, const std::vector< std::vector< size_t >> &fragments, const std::vector< double > &fragment_scores, const std::vector< algorithms::Anchor > &fragment_anchors, const std::vector< size_t > &fragment_source_tree, const std::vector< std::vector< size_t >> &minimizer_kept_fragment_count, const std::vector< double > &multiplicity_by_fragment, std::vector< std::vector< size_t >> &chains, std::vector< size_t > &chain_source_tree, std::vector< int > &chain_score_estimates, std::vector< std::vector< size_t >> &minimizer_kept_chain_count, std::vector< double > &multiplicity_by_chain, vector< double > &multiplicity_by_tree, std::unordered_map< size_t, std::vector< size_t >> &good_fragments_in, LazyRNG &rng, Funnel &funnel) const |
void | get_best_chain_stats (Alignment &aln, const ZipCodeForest &zip_code_forest, const std::vector< Seed > &seeds, const VectorView< MinimizerMapper::Minimizer > &minimizers, const std::vector< std::vector< size_t >> &fragments, const std::unordered_map< size_t, std::vector< size_t >> &good_fragments_in, const std::vector< std::vector< size_t >> &chains, const std::vector< size_t > &chain_source_tree, const vector< algorithms::Anchor > &seed_anchors, const std::vector< int > &chain_score_estimates, bool &best_chain_correct, double &best_chain_coverage, size_t &best_chain_longest_jump, double &best_chain_average_jump, size_t &best_chain_anchors, size_t &best_chain_anchor_length, Funnel &funnel) const |
void | do_alignment_on_chains (Alignment &aln, const std::vector< Seed > &seeds, const VectorView< MinimizerMapper::Minimizer > &minimizers, const vector< algorithms::Anchor > &seed_anchors, const std::vector< std::vector< size_t >> &chains, const std::vector< size_t > &chain_source_tree, const std::vector< double > &multiplicity_by_chain, const std::vector< int > &chain_score_estimates, const std::vector< std::vector< size_t >> &minimizer_kept_chain_count, vector< Alignment > &alignments, vector< double > &multiplicity_by_alignment, vector< size_t > &alignments_to_source, SmallBitset &minimizer_explored, aligner_stats_t &stats, bool &funnel_depleted, LazyRNG &rng, Funnel &funnel) const |
void | pick_mappings_from_alignments (Alignment &aln, const std::vector< Alignment > &alignments, const std::vector< double > &multiplicity_by_alignment, const std::vector< size_t > &alignments_to_source, const std::vector< int > &chain_score_estimates, std::vector< Alignment > &mappings, std::vector< double > &scores, std::vector< double > &multiplicity_by_mapping, bool &funnel_depleted, LazyRNG &rng, Funnel &funnel) const |
Alignment | find_chain_alignment (const Alignment &aln, const VectorView< algorithms::Anchor > &to_chain, const std::vector< size_t > &chain, aligner_stats_t *stats=nullptr) const |
void | find_optimal_tail_alignments (const Alignment &aln, const vector< GaplessExtension > &extended_seeds, LazyRNG &rng, Alignment &best, Alignment &second_best) const |
void | attempt_rescue (const Alignment &aligned_read, Alignment &rescued_alignment, const VectorView< Minimizer > &minimizers, bool rescue_forward) |
GaplessExtender::cluster_type | seeds_in_subgraph (const VectorView< Minimizer > &minimizers, const std::unordered_set< nid_t > &subgraph) const |
void | fix_dozeu_score (Alignment &rescued_alignment, const HandleGraph &rescue_graph, const std::vector< handle_t > &topological_order) const |
void | fix_dozeu_end_deletions (Alignment &rescued_alignment) const |
int64_t | distance_between (const pos_t &pos1, const pos_t &pos2) |
int64_t | distance_between (const Alignment &aln1, const Alignment &aln2) |
int64_t | unoriented_distance_between (const pos_t &pos1, const pos_t &pos2) const |
void | extension_to_alignment (const GaplessExtension &extension, Alignment &alignment) const |
void | wfa_alignment_to_alignment (const WFAAlignment &wfa_alignment, Alignment &alignment) const |
void | pair_all (std::array< vector< Alignment >, 2 > &mappings) const |
void | annotate_with_minimizer_statistics (Alignment &target, const VectorView< Minimizer > &minimizers, const std::vector< Seed > &seeds, size_t old_seed_count, size_t new_seed_offset, const Funnel &funnel) const |
double | compute_mapq_caps (const Alignment &aln, const VectorView< Minimizer > &minimizers, const SmallBitset &explored) |
vector< TreeSubgraph > | get_tail_forest (const GaplessExtension &extended_seed, size_t read_length, bool left_tails, size_t *longest_detectable_gap=nullptr) const |
pair< Path, size_t > | get_best_alignment_against_any_tree (const vector< TreeSubgraph > &trees, const string &sequence, const Position &default_position, bool pin_left, size_t longest_detectable_gap, LazyRNG &rng) const |
void | dfs_gbwt (const Position &from, size_t walk_distance, const function< void(const handle_t &)> &enter_handle, const function< void(void)> exit_handle) const |
void | dfs_gbwt (handle_t from_handle, size_t from_offset, size_t walk_distance, const function< void(const handle_t &)> &enter_handle, const function< void(void)> exit_handle) const |
void | dfs_gbwt (const gbwt::SearchState &start_state, size_t from_offset, size_t walk_distance, const function< void(const handle_t &)> &enter_handle, const function< void(void)> exit_handle) const |
double | score_alignment_pair (Alignment &aln1, Alignment &aln2, int64_t fragment_distance) |
template<typename Score = double> | |
void | process_until_threshold_a (size_t items, const function< Score(size_t)> &get_score, double threshold, size_t min_count, size_t max_count, LazyRNG &rng, const function< bool(size_t, size_t)> &process_item, const function< void(size_t)> &discard_item_by_count, const function< void(size_t)> &discard_item_by_score) const |
template<typename Score = double> | |
void | process_until_threshold_b (const vector< Score > &scores, double threshold, size_t min_count, size_t max_count, LazyRNG &rng, const function< bool(size_t, size_t)> &process_item, const function< void(size_t)> &discard_item_by_count, const function< void(size_t)> &discard_item_by_score) const |
template<typename Score = double> | |
void | process_until_threshold_c (size_t items, const function< Score(size_t)> &get_score, const function< bool(size_t, size_t)> &comparator, double threshold, size_t min_count, size_t max_count, LazyRNG &get_seed, const function< bool(size_t, size_t)> &process_item, const function< void(size_t)> &discard_item_by_count, const function< void(size_t)> &discard_item_by_score) const |
bool | validate_clusters (const std::vector< std::vector< Cluster >> &clusters, const std::vector< std::vector< Seed >> &seeds, size_t read_limit, size_t fragment_limit) const |
Do a brute check of the clusters. Print errors to stderr. More... | |
![]() | |
AlignerClient (double gc_content_estimate=vg::default_gc_content) | |
const GSSWAligner * | get_aligner (bool have_qualities=true) const |
const QualAdjAligner * | get_qual_adj_aligner () const |
const Aligner * | get_regular_aligner () const |
Static Protected Member Functions | |
static gbwtgraph::Payload | no_chain_info () |
How should we initialize chain info when it's not stored in the minimizer index? More... | |
static Seed | chain_info_to_seed (const pos_t &hit, size_t minimizer, const ZipCode &zip) |
static algorithms::Anchor | to_anchor (const Alignment &aln, const VectorView< Minimizer > &minimizers, std::vector< Seed > &seeds, size_t seed_number, const HandleGraph &graph, const Aligner *aligner) |
Convert a single seed to a single chaining anchor. More... | |
static algorithms::Anchor | to_anchor (const Alignment &aln, size_t read_start, size_t read_end, const std::vector< size_t > &sorted_seeds, const std::vector< algorithms::Anchor > &seed_anchors, const std::vector< size_t >::const_iterator &mismatch_begin, const std::vector< size_t >::const_iterator &mismatch_end, const HandleGraph &graph, const Aligner *aligner) |
static int | score_extension_group (const Alignment &aln, const vector< GaplessExtension > &extended_seeds, int gap_open_penalty, int gap_extend_penalty) |
static void | with_dagified_local_graph (const pos_t &left_anchor, const pos_t &right_anchor, size_t max_path_length, const HandleGraph &graph, const std::function< void(DeletableHandleGraph &, const std::function< std::pair< nid_t, bool >(const handle_t &)> &)> &callback) |
static size_t | longest_detectable_gap_in_range (const Alignment &aln, const std::string::const_iterator &sequence_begin, const std::string::const_iterator &sequence_end, const GSSWAligner *aligner) |
static std::pair< size_t, size_t > | align_sequence_between (const pos_t &left_anchor, const pos_t &right_anchor, size_t max_path_length, size_t max_gap_length, const HandleGraph *graph, const GSSWAligner *aligner, Alignment &alignment, const std::string *alignment_name=nullptr, size_t max_dp_cells=std::numeric_limits< size_t >::max(), const std::function< size_t(const Alignment &, const HandleGraph &)> &choose_band_padding=algorithms::pad_band_random_walk()) |
static std::pair< size_t, size_t > | align_sequence_between_consistently (const pos_t &left_anchor, const pos_t &right_anchor, size_t max_path_length, size_t max_gap_length, const HandleGraph *graph, const GSSWAligner *aligner, Alignment &alignment, const std::string *alignment_name=nullptr, size_t max_dp_cells=std::numeric_limits< size_t >::max(), const std::function< size_t(const Alignment &, const HandleGraph &)> &choose_band_padding=algorithms::pad_band_random_walk()) |
static WFAAlignment | connect_consistently (const std::string &sequence, const pos_t &left_anchor, const pos_t &right_anchor, const WFAExtender &wfa_extender) |
static double | window_breaking_quality (const VectorView< Minimizer > &minimizers, vector< size_t > &broken, const string &sequence, const string &quality_bytes) |
static double | faster_cap (const VectorView< Minimizer > &minimizers, vector< size_t > &minimizers_explored, const string &sequence, const string &quality_bytes) |
static void | for_each_agglomeration_interval (const VectorView< Minimizer > &minimizers, const string &sequence, const string &quality_bytes, const vector< size_t > &minimizer_indices, const function< void(size_t, size_t, size_t, size_t)> &iteratee) |
static double | get_log10_prob_of_disruption_in_interval (const VectorView< Minimizer > &minimizers, const string &sequence, const string &quality_bytes, const vector< size_t >::iterator &disrupt_begin, const vector< size_t >::iterator &disrupt_end, size_t left, size_t right) |
static double | get_prob_of_disruption_in_column (const VectorView< Minimizer > &minimizers, const string &sequence, const string &quality_bytes, const vector< size_t >::iterator &disrupt_begin, const vector< size_t >::iterator &disrupt_end, size_t index) |
static size_t | immutable_path_from_length (const ImmutablePath &path) |
static Path | to_path (const ImmutablePath &path) |
static string | log_name () |
Get the thread identifier prefix for logging. More... | |
static string | log_alignment (const Alignment &aln) |
Turn an Alignment into a conveniently-sized string for logging. More... | |
static string | log_alignment (const Path &path, bool force_condensed=false) |
Turn an Path from an alignment into a conveniently-sized string for logging. More... | |
static string | log_bits (const std::vector< bool > &bits) |
Turn a list of bit flags into a compact representation. More... | |
static void | dump_chaining_problem (const std::vector< algorithms::Anchor > &anchors, const std::vector< size_t > &cluster_seeds_sorted, const HandleGraph &graph) |
Dump a whole chaining problem. More... | |
static void | dump_debug_minimizers (const VectorView< Minimizer > &minimizers, const string &sequence, const vector< size_t > *to_include=nullptr, size_t start_offset=0, size_t length_limit=std::numeric_limits< size_t >::max()) |
Dump all the given minimizers, with optional subset restriction. More... | |
static void | dump_debug_extension_set (const HandleGraph &graph, const Alignment &aln, const vector< GaplessExtension > &extended_seeds) |
Dump all the extansions in an extension set. More... | |
static void | dump_debug_sequence (ostream &out, const string &sequence, size_t start_offset=0, size_t length_limit=std::numeric_limits< size_t >::max()) |
Print a sequence with base numbering. More... | |
static void | dump_debug_clustering (const Cluster &cluster, size_t cluster_number, const VectorView< Minimizer > &minimizers, const std::vector< Seed > &seeds) |
Print the seed content of a cluster. More... | |
static void | dump_debug_seeds (const VectorView< Minimizer > &minimizers, const std::vector< Seed > &seeds, const std::vector< size_t > &selected_seeds) |
Print information about a selected set of seeds. More... | |
static void | dump_debug_query (const Alignment &aln) |
Print information about a read to be aligned. More... | |
static void | dump_debug_query (const Alignment &aln1, const Alignment &aln2) |
Print information about a read pair to be aligned. More... | |
static void | dump_debug_dotplot (const std::string &name, const VectorView< Minimizer > &minimizers, const std::vector< Seed > &seeds, const std::vector< std::pair< std::string, std::vector< std::vector< size_t >>>> &seed_sets, const PathPositionHandleGraph *path_graph) |
static void | dump_debug_graph (const HandleGraph &graph) |
Dump a graph. More... | |
Protected Attributes | |
const PathPositionHandleGraph * | path_graph |
const gbwtgraph::DefaultMinimizerIndex & | minimizer_index |
SnarlDistanceIndex * | distance_index |
const ZipCodeCollection * | zipcodes |
const gbwtgraph::GBWTGraph & | gbwt_graph |
This is our primary graph. More... | |
std::unique_ptr< GaplessExtender > | extender |
SnarlDistanceIndexClusterer | clusterer |
We have a clusterer. More... | |
ZipCodeForest | zip_forest |
We have a zip code tree for finding distances between seeds. More... | |
std::function< size_t(const Alignment &, const HandleGraph &)> | choose_band_padding |
FragmentLengthDistribution | fragment_length_distr |
atomic_flag | warned_about_bad_distribution = ATOMIC_FLAG_INIT |
We may need to complain exactly once that the distribution is bad. More... | |
Static Protected Attributes | |
const static size_t | LONG_LIMIT = 256 |
Length at which we cut over to long-alignment logging. More... | |
const static size_t | MANY_LIMIT = 10 |
Count at which we cut over to summary logging. More... | |
Friends | |
class | TestMinimizerMapper |
Additional Inherited Members | |
![]() | |
static int8_t * | parse_matrix (std::istream &matrix_stream) |
Allocates an array to hold a 4x4 substitution matrix and returns it. More... | |
|
protected |
The information we store for each cluster.
|
protected |
We define a type for shared-tail lists of Mappings, to avoid constantly copying Path objects.
The information we store for each seed.
vg::MinimizerMapper::MinimizerMapper | ( | const gbwtgraph::GBWTGraph & | graph, |
const gbwtgraph::DefaultMinimizerIndex & | minimizer_index, | ||
SnarlDistanceIndex * | distance_index, | ||
const ZipCodeCollection * | zipcodes, | ||
const PathPositionHandleGraph * | path_graph = nullptr |
||
) |
Construct a new MinimizerMapper using the given indexes. The PathPositionhandleGraph can be nullptr, as we only use it for correctness tracking.
|
staticprotected |
Clip out the part of the graph between the given positions and global-align the sequence of the given Alignment to it. Populate the Alignment's path and score.
Finds an alignment against a graph path if it is <= max_path_length.
If one of the anchor positions is empty, does pinned alignment against the other position.
For pinned alignment, restricts the alignment to have gaps no longer than max_gap_length, and to use <= max_dp_cells cells. If too many DP cells would be used, produces a softclip alignment.
For connecting alignment, restricts the alignment to use <= max_dp_cells cells. If too many DP cells would be used, produces an Alignment with and empty path.
Returns the number of nodes and bases in the graph aligned against.
|
staticprotected |
Version of align_sequence_between() that guarantees that you get the same answer (modulo reverse-complementation) no matter whether the sequence and anchors are reverse-complemented or not.
|
protected |
Add annotations to an Alignment with statistics about the minimizers.
old_seed_count is the number of seeds in the seed vector actually created at the "seed" stage of the alignment process. new_seed_offset is where the first of thos eseeds appears in the funnel at the reseed stage.
|
protected |
Given an aligned read, extract a subgraph of the graph within a distance range based on the fragment length distribution and attempt to align the unaligned read to it. Rescue_forward is true if the aligned read is the first and false otherwise. Assumes that both reads are facing the same direction. TODO: This should be const, but some of the function calls are not.
|
inlinestaticprotected |
How do we convert chain info to an actual seed of the type we are using? Also needs to know the hit position, and the minimizer number.
|
protected |
Compute MAPQ caps based on all minimizers that are explored, for some definition of explored.
Needs access to the input alignment for sequence and quality information.
Returns only an "extended" cap at the moment.
|
staticprotected |
Produce a WFAAlignment of the given sequence between the given points that will be the same (modulo reverse-complementation) no matter whether the sequence and anchors are reverse-complemented or not.
|
protected |
The same as dfs_gbwt on a handle and an offset, but takes a gbwt::SearchState that defines only some haplotypes on a handle to start with.
|
protected |
Run a DFS on valid haplotypes in the GBWT starting from the given Position, and continuing up to the given number of bases.
Calls enter_handle when the DFS enters a haplotype visit to a particular handle, and exit_handle when it exits a visit. These let the caller maintain a stack and track the traversals.
The starting node is only entered if its offset isn't equal to its length (i.e. bases remain to be visited).
Stopping early is not permitted.
|
protected |
The same as dfs_gbwt on a Position, but takes a handle in the backing gbwt_graph and an offset from the start of the handle instead.
|
protected |
Get the distance between a pair of read alignments, or std::numeric_limits<int64_t>::max() if unreachable.
Get the distance between a pair of positions, or std::numeric_limits<int64_t>::max() if unreachable.
|
protected |
Convert an integer distance, with limits standing for no distance, to a double annotation that can safely be parsed back from JSON into an integer if it is integral.
|
protected |
|
protected |
Given a collection of fragments, filter down to the good ones and do chaining on them
|
protected |
Given a collection of zipcode trees, score the trees and do fragmenting on the best trees.
This will fill in the given vectors of fragments, fragment scores, etc.
If we do gapless extension, turn good full-length gapless extensions into alignments and return them in alignments Gapless extensions are considered good enough if they have fewer than default_max_extension_mismatches mismatches
|
staticprotected |
Dump a whole chaining problem.
|
staticprotected |
Print the seed content of a cluster.
|
staticprotected |
Dump dotplot information for seeds. Displays one or more named collections of runs of seeds.
|
staticprotected |
Dump all the extansions in an extension set.
|
staticprotected |
Dump a graph.
|
staticprotected |
Dump all the given minimizers, with optional subset restriction.
|
staticprotected |
Print information about a read to be aligned.
|
staticprotected |
Print information about a read pair to be aligned.
|
staticprotected |
Print information about a selected set of seeds.
|
staticprotected |
Print a sequence with base numbering.
|
protected |
Extends the seeds in a cluster or other grouping into a collection of GaplessExtension objects.
If funnel is set, the group is intended to come from the previous funnel stage and will be introduced in this one.
If seeds_used is not null, it should be an empty vector that gets filled with, for each gapless extension, the numbers of the seeds in seeds that are subsumed into the extension. They will be sorted by the stapled base (first base for forward strand, last base for reverse strand) in the read.
Note that multiple gapless extensions might cover each seed position or use each seed.
|
protected |
Convert the GaplessExtension into an alignment. This assumes that the extension is a full-length alignment and that the sequence field of the alignment has been set.
|
staticprotected |
Compute a bound on the Phred score probability of a mapping beign wrong due to base errors and unlocated minimizer hits prevented us from finding the true alignment.
Algorithm uses a "sweep line" dynamic programming approach. For a read with minimizers aligned to it:
000000000011111111112222222222 012345678901234567890123456789
Read: ****************************** Minimizer 1: ***** Minimizer 2: ***** Minimizer 3: ***** Minimizer 4: *****
For each distinct read interval of overlapping minimizers, e.g. in the example the intervals 3,4,5; 6,7; 8,9,10; 18,19,20; 21,22; and 23,24,25 we consider base errors that would result in the minimizers in the interval being incorrect
We use dynamic programming sweeping left-to-right over the intervals to compute the probability of the minimum number of base errors needed to disrupt all the minimizers.
Will sort minimizers_explored (which is indices into minimizers) by minimizer start position.
|
inline |
|
protected |
|
protected |
Find the minimizers in the sequence using the minimizer index, and return them sorted in read order.
|
protected |
Operating on the given input alignment, align the tails dangling off the given extended perfect-match seeds and produce an optimal alignment into the given output Alignment object, best, and the second best alignment into second_best.
Uses the given RNG to break ties.
|
protected |
Find seeds for all minimizers passing the filters. Takes in minimizers sorted in read order, and a view of them sorted in score order.
|
protected |
When dozeu doesn't have any seeds, it's scan heuristic can lead to inaccurate anchoring with the end result that one end of the alignment has a deletion that doesn't connect to an aligned base. This function removes those deletions
|
protected |
When we use dozeu for rescue, the reported alignment score is incorrect. 1) Dozeu only gives the full-length bonus once. 2) There is no penalty for a softclip at the edge of the subgraph. This function calculates the score correctly. If the score is <= 0, we realign the read using GSSW. TODO: This should be unnecessary.
|
protected |
Flag minimizers as being in repetitive regions of the read
|
staticprotected |
Given a collection of minimizers, and a list of the minimizers we actually care about (as indices into the collection), iterate over common intervals of overlapping minimizer agglomerations.
Calls the given callback with (left, right, bottom, top), where left is the first base of the agglomeration interval (inclusive), right is the last base of the agglomeration interval (exclusive), bottom is the index of the first minimizer with an agglomeration in the interval and top is the index of the last minimizer with an agglomeration in the interval (exclusive).
minimizer_indices must be sorted by agglomeration end, and then by agglomeration start, so they can be decomposed into nice rectangles.
Note that bottom and top are offsets into minimizer_indices, NOT minimizers itself. Only contiguous ranges in minimizer_indices actually make sense.
|
inline |
|
inline |
|
protected |
Find the best alignment of the given sequence against any of the trees provided in trees, where each tree is a TreeSubgraph over the GBWT graph. Each tree subgraph is rooted at the left in its own local coordinate space, even if we are pinning on the right.
If no mapping is possible (for example, because there are no trees), produce a pure insert at default_position.
Alignment is always pinned.
If pin_left is true, pin the alignment on the left to the root of each tree. Otherwise pin it on the right to the root of each tree.
Limits the length of the longest gap to longest_detectable_gap.
Returns alignments in gbwt_graph space.
|
protected |
Collect stats about the best chains for annotating the final alignment
|
inline |
Get the distance limit for the given read length
|
inline |
|
inline |
|
inline |
|
staticprotected |
Gives the log10 prob of a base error in the given interval of the read, accounting for the disruption of specified minimizers.
minimizers is the collection of all minimizers
disrupt_begin and disrupt_end are iterators defining a sequence of indices of minimizers in minimizers that are disrupted.
left and right are the inclusive and exclusive bounds of the interval of the read where the disruption occurs.
|
staticprotected |
Gives the raw probability of a base error in the given column of the read, accounting for the disruption of specified minimizers.
minimizers is the collection of all minimizers
disrupt_begin and disrupt_end are iterators defining a sequence of indices of minimizers in minimizers that are disrupted.
index is the position in the read where the disruption occurs.
|
protected |
Get the fraction of read bases covered by the given chains/fragments of seeds. A base is covered if it is between the first and last endpoints in the read of any of the given lists of seeds. The lists of seeds are each assumed to be colinear in the read.
|
protected |
Get all the trees defining tails off the specified side of the specified gapless extension. Should only be called if a tail on that side exists, or this is a waste of time.
If the gapless extension starts or ends at a node boundary, there may be multiple trees produced, each with a distinct root.
If the gapless extension abuts the edge of the read, an empty forest will be produced.
Each tree is represented as a TreeSubgraph over our gbwt_graph.
If left_tails is true, the trees read out of the left sides of the gapless extension. Otherwise they read out of the right side.
As a side effect, saves the length of the longest detectable gap in an alignment of a tail to the forest into the provided location, if set.
|
staticprotected |
Get the from length of an ImmutabelPath.
Can't be called path_from_length or it will shadow the one for Paths instead of overloading.
|
staticprotected |
Turn an Alignment into a conveniently-sized string for logging.
|
staticprotected |
Turn an Path from an alignment into a conveniently-sized string for logging.
|
staticprotected |
Turn a list of bit flags into a compact representation.
|
staticprotected |
Get the thread identifier prefix for logging.
|
staticprotected |
Determine the gap limit to use when aligning the given range of sequence bases for the given Alignment.
Accounts for the lognest gap that could be detected anywhere in the range, not just at the very beginning or the very end, or at a single point like GSSWAligner::longest_detectable_gap().
Map the given read. Return a vector of alignments that it maps to, winner first.
void vg::MinimizerMapper::map | ( | Alignment & | aln, |
AlignmentEmitter & | alignment_emitter | ||
) |
Map the given read, and send output to the given AlignmentEmitter. May be run from any thread. TODO: Can't be const because the clusterer's cluster_seeds isn't const.
Map the given read using chaining of seeds. Return a vector of alignments that it maps to, winner first.
Map the given read using gapless extensions. Return a vector of alignments that it maps to, winner first.
pair< vector< Alignment >, vector< Alignment > > vg::MinimizerMapper::map_paired | ( | Alignment & | aln1, |
Alignment & | aln2 | ||
) |
Map the given pair of reads, where aln1 is upstream of aln2 and they are oriented towards each other in the graph.
If the fragment length distribution is not yet fixed, reads will be mapped independently. Otherwise, they will be mapped according to the fragment length distribution.
pair< vector< Alignment >, vector< Alignment > > vg::MinimizerMapper::map_paired | ( | Alignment & | aln1, |
Alignment & | aln2, | ||
vector< pair< Alignment, Alignment >> & | ambiguous_pair_buffer | ||
) |
Map the given pair of reads, where aln1 is upstream of aln2 and they are oriented towards each other in the graph.
If the reads are ambiguous and there's no fragment length distribution fixed yet, they will be dropped into ambiguous_pair_buffer.
Otherwise, at least one result will be returned for them (although it may be the unmapped alignment).
|
inlinestaticprotected |
How should we initialize chain info when it's not stored in the minimizer index?
|
protected |
Set pair partner references for paired mapping results.
|
protected |
|
protected |
Given a count of items, a function to get the score of each, a score-difference-from-the-best cutoff, a min and max processed item count, and a function to get a sort-shuffling seed for breaking ties, process items in descending score order by calling process_item with the item's number and the number of other items with the same or better score, until min_count items are processed and either max_count items are processed or the score difference threshold is hit (or we run out of items).
If process_item returns false, the item is skipped and does not count against min_count or max_count.
Call discard_item_by_count with the item's number for all remaining items that would pass the score threshold.
Call discard_item_by_score with the item's number for all remaining items that would fail the score threshold.
|
protected |
Same as the other process_until_threshold functions, except using a vector to supply scores.
|
protected |
Same as the other process_until_threshold functions, except user supplies comparator to sort the items (must still be sorted by score).
|
protected |
Score a pair of alignments given the distance between them
|
protected |
Determine cluster score, read coverage, and a vector of flags for the minimizers present in the cluster. Score is the sum of the scores of distinct minimizers in the cluster, while read coverage is the fraction of the read covered by seeds in the cluster.
Puts the cluster in the funnel as coming from its seeds.
|
staticprotected |
Score the given group of gapless extensions. Determines the best score that can be obtained by chaining extensions together, using the given gap open and gap extend penalties to charge for either overlaps or gaps in coverage of the read.
Enforces that overlaps cannot result in containment.
Input extended seeds must be sorted by start position.
|
protected |
Score the set of extensions for each cluster using score_extension_group(). Return the scores in the same order as the extensions.
This version allows the collections of extensions to be scored to come with annotating read numbers, which are ignored.
|
protected |
Score the set of extensions for each cluster using score_extension_group(). Return the scores in the same order as the extension groups.
|
protected |
Determine score and read coverage for a zip code tree. Score is the sum of the scores of distinct minimizers in the tree, while read coverage is the fraction of the read covered by seeds in the tree.
Puts the tree in the funnel as coming from its seeds.
|
protected |
Return the all non-redundant seeds in the subgraph, including those from minimizers not used for mapping.
void vg::AlignerClient::set_alignment_scores |
Set the algner scoring parameters and create the stored aligner instances. The score matrix should by a 4 x 4 array in the order (ACGT). Other overloads of set_alignment_scores all call this one.
|
virtual |
Set the algner scoring parameters and create the stored aligner instances. The score matrix should by a 4 x 4 array in the order (ACGT). Other overloads of set_alignment_scores all call this one.
Reimplemented from vg::AlignerClient.
void vg::AlignerClient::set_alignment_scores |
Set all the aligner scoring parameters and create the stored aligner instances.
void vg::AlignerClient::set_alignment_scores |
Set the algner scoring parameters and create the stored aligner instances. The stream should contain a 4 x 4 whitespace-separated substitution matrix (in the order ACGT)
|
protected |
Return the indices of all the minimizers, sorted in descending order by their minimizers' scores.
|
protected |
If tracking correctness, mark seeds that are correctly mapped as correct in the funnel, based on proximity along paths to the input read's refpos. Otherwise, tag just as placed, with the seed's read interval. Assumes we are tracking provenance.
|
staticprotected |
Convert a single seed to a single chaining anchor.
|
staticprotected |
Convert a read region, and the seeds that that region covers the stapled bases of (sorted by stapled base), into a single chaining anchor. Takes an iterator range of positions within the base range that are mismatches.
|
protected |
Convert a collection of seeds to a collection of chaining anchors.
|
staticprotected |
Convert an ImmutablePath to a Path.
|
protected |
Convert an Anchor to a WFAAlignment, given the input read it is from and the Aligner to use for scoring. Accounts for fuill length bonuses if the anchor abuts the end of the read.
|
protected |
Get the unoriented distance between a pair of positions
|
protected |
Do a brute check of the clusters. Print errors to stderr.
|
protected |
Convert a WFAAlignment into a vg Alignment. This assumes that the WFAAlignment is a full-length alignment and that the sequence field of the vg Alignment has been set.
|
staticprotected |
Compute a bound on the Phred score probability of having created the agglomerations of the specified minimizers by base errors from the given sequence, which was sequenced with the given qualities.
No limit is imposed if broken is empty.
Takes the collection of all minimizers found, and a vector of the indices of minimizers we are interested in the agglomerations of. May modify the order of that index vector.
Also takes the sequence of the read (to avoid Ns) and the quality string (interpreted as a byte array).
Currently computes a lower-score-bound, upper-probability-bound, suitable for use as a mapping quality cap, by assuming the easiest-to-disrupt possible layout of the windows, and the lowest possible qualities for the disrupting bases.
|
staticprotected |
Clip out the part of the graph between the given positions, and dagify it from the perspective of the anchors. If a left anchor is set, all heads should correspond to the left anchor, and if a right anchor is set, all tails should correspond to the right anchor. At least one anchor must be set. Both anchors may be on the same node.
Calls the callback with an extracted, strand-split, dagified graph, and a function that translates from handle in the dagified graph to node ID and orientation in the base graph.
|
friend |
bool vg::MinimizerMapper::align_from_chains = default_align_from_chains |
double vg::MinimizerMapper::chain_score_threshold = default_chain_score_threshold |
|
protected |
We have a function for determinign band paddding for banded alignment when aligning from chains.
double vg::MinimizerMapper::cluster_coverage_threshold = default_cluster_coverage_threshold |
double vg::MinimizerMapper::cluster_score_threshold = default_cluster_score_threshold |
|
protected |
We have a clusterer.
|
staticconstexpr |
If true, produce alignments from extension sets by chaining gapless extensions up and aligning the sequences between them. If false, produce alignments by aligning the tails off of individual gapless extensions.
|
staticconstexpr |
If a chain's score is smaller than the best chain's score by more than this much, don't align it
|
staticconstexpr |
If the read coverage of a cluster is less than the best coverage of any tree by more than this much, don't extend it
|
staticconstexpr |
this much, then don't extend it
|
staticconstexpr |
|
staticconstexpr |
If false, skip computing base-level alignments.
|
staticconstexpr |
If set, exclude overlapping minimizers.
|
staticconstexpr |
|
staticconstexpr |
Even if we would have fewer than min_extension_sets results, don't process anything with a score smaller than this.
|
staticconstexpr |
|
staticconstexpr |
How much of a multiple should we apply to each transition's gap penalty at fragmenting?
|
staticconstexpr |
How many bases of indel should we allow in fragments?
|
staticconstexpr |
How many bases of indel should we allow in fragments per base of read length?
|
staticconstexpr |
How many bases should we look back when making fragments?
|
staticconstexpr |
How many bases should we look back when making fragments, per base of read length?
|
staticconstexpr |
How high should we get the score threshold based on the best fragment's score get?
|
staticconstexpr |
What minimum score in points should a fragment have in order to keep it? Needs to be set to some kind of significance threshold.
|
staticconstexpr |
|
staticconstexpr |
How good should a fragment be in order to keep it? Fragments with scores less than this fraction of the best fragment's score will not be used.
|
staticconstexpr |
If a fragment set's score is smaller than the best fragment set's score by more than this much, don't align it
|
staticconstexpr |
How much of a multiple should we apply to each transition's gap penalty at chaining?
|
staticconstexpr |
Do gapless extension to the seeds in each tree before fragmenting the tree if the read length is less than the limit.
|
staticconstexpr |
Ignore all minimizers with more than hard_hit_cap hits.
|
staticconstexpr |
Use all minimizers with at most hit_cap hits.
|
staticconstexpr |
How much of a bonus should we give to each item in fragmenting/chaining?
|
staticconstexpr |
How much of a multiple should we apply to each item's non-bonus score in fragmenting/chaining?
|
staticconstexpr |
How should we scale scores before mapq, for calibration.
|
staticconstexpr |
What number of bp should we re-scale scores to for MAPQ, for calibration? 0 for off.
|
staticconstexpr |
How many alignments should we make, max?
|
staticconstexpr |
When converting chains to alignments, what's the longest gap between items we will try to WFA align? Passing strings longer than ~100bp can cause WFAAligner to run for a pathologically long amount of time. May not be 0.
|
staticconstexpr |
Do no more than this many chaining problems.
|
staticconstexpr |
Allow up to this many chains per tree.
|
staticconstexpr |
Sometimes we don't do chaining but instead turn fragments directly into chains If this is 0, then do chaining. Otherwise take up to this many fragments and turn them into chains
|
staticconstexpr |
How big of an alignment in POA cells should we ever try to do with Dozeu? TODO: Lift this when Dozeu's allocator is able to work with >4 MB of memory. Each cell is 16 bits in Dozeu, and we leave some room for the query and padding to full SSE registers. Note that a very chopped graph might still break this!
|
staticconstexpr |
How many DP cells should we be willing to do for an end-pinned alignment? If we want to do more than this, just leave tail unaligned.
|
staticconstexpr |
How many mismatches should we allow in gapless extension (except for start node where the limit doesn't count)?
|
staticconstexpr |
How many clusters should we produce gapless extensions for, max?
|
staticconstexpr |
What is the maximum fragment length that we accept as valid for paired-end reads?
|
staticconstexpr |
How many fragments should we try and make when fragmenting something?
|
staticconstexpr |
How many bases of indel should we allow in chaining?
|
staticconstexpr |
How many bases of indel should we allow in chaining, per base of read length?
|
staticconstexpr |
How many extensions should we try as seeds within a mapping location?
|
staticconstexpr |
How many bases should we look back when chaining?
|
staticconstexpr |
How many bases should we look back when chaining, per base of read length?
|
staticconstexpr |
How long of a DP can we do before something might go wrong with BandedGlobalAligner or the GBWT-based WFA?
|
staticconstexpr |
How many gap bases should we allow in a between-seed alignment, max?
|
staticconstexpr |
Limit the min chain score to no more than this.
|
staticconstexpr |
|
staticconstexpr |
For paired end mapping, how many times should we attempt rescue (per read)?
|
staticconstexpr |
When turning chains into alignments, we can skip seeds to create gaps up to this length in the graph
|
staticconstexpr |
How long of a DP can we do before Dozeu gets lost at traceback due to 16-bit score overflow?
|
staticconstexpr |
How many gap bases should we allow in a Dozeu tail alignment, max?
|
staticconstexpr |
Similarly, what is the maximum tail length we will try to WFA align?
|
staticconstexpr |
How many things should we produce fragments for, max?
|
staticconstexpr |
Maximum number of distinct minimizers to take.
|
staticconstexpr |
Even if we would have fewer than min_chains results, don't process anything with a score smaller than this, per read base.
|
staticconstexpr |
Disregard the fragment set score thresholds when they would give us fewer than this many chainign problems done.
|
staticconstexpr |
Disregard the chain score thresholds when they would give us fewer than this many chains aligned.
|
staticconstexpr |
Disregard the extension set score thresholds when they would give us fewer than this many extension sets.
|
staticconstexpr |
Accept at least this many clusters for gapless extension.
|
staticconstexpr |
How many things should we produce fragments for, min?
|
staticconstexpr |
How much of an alignment needs to be from distinct nodes to be a distinct alignment?
|
staticconstexpr |
|
staticconstexpr |
|
staticconstexpr |
Window count for minimizer downsampling.
|
staticconstexpr |
Take minimizers between hit_cap and hard_hit_cap hits until this fraction of total score
|
staticconstexpr |
Number of minimzers to select based on read_len/num_min_per_bp.
|
staticconstexpr |
If the second best cluster's score is no more than this many points below the cutoff set by cluster_score_threshold, snap that cutoff down to the second best cluster's score, to avoid throwing away promising secondaries.
|
staticconstexpr |
If the second best tree's score is no more than this many points below the cutoff set by zipcode_tree_score_threshold, snap that cutoff down to the second best tree's score, to avoid throwing away promising secondaries.
|
staticconstexpr |
|
staticconstexpr |
How close does an alignment have to be to the best alignment for us to rescue on it.
|
staticconstexpr |
|
staticconstexpr |
Do not attempt rescue if there are more seeds in the rescue subgraph.
|
staticconstexpr |
How many stdevs from the mean do we extract a subgraph from?
|
staticconstexpr |
Set refpos field of alignments to positions on nodes they visit.
|
staticconstexpr |
If set, log what the mapper is thinking in its mapping of each read.
|
staticconstexpr |
Should alignments be ranked by chain score instead of base-level score?
|
staticconstexpr |
Guess which seed hits are correct by location in the linear reference and track if/when their descendants make it through stages of the algorithm. Only works if track_provenance is true.
|
staticconstexpr |
Track linear reference position for placements in log output.
|
staticconstexpr |
Track which internal work items came from which others during each stage of the mapping algorithm.
|
staticconstexpr |
If set, cap mapping quality based on minimizer layout in the read. Only really likely to help for short reads.
|
staticconstexpr |
How far behind the leader should the WFA be allowed to get?
|
staticconstexpr |
How far behind the leader should the WFA be allowed to get, per base of read sequence?
|
staticconstexpr |
How far behind the leader should the WFA be allowed to get, at any read length?
|
staticconstexpr |
How many mismatch bases (or equivalent score of indels) should we allow in WFA connections and tails maximum, at any read length?
|
staticconstexpr |
How many mismatch bases (or equivalent score of indels) should we allow in WFA connections and tails?
|
staticconstexpr |
How many mismatch bases (or equivalent score of indels) should we allow in WFA connections and tails per base of read sequence?
|
staticconstexpr |
If the read coverage of a tree is less than the best coverage of any tree by more than this much, don't extend it
|
staticconstexpr |
When making zipcode trees, at what multiple of the read length should the trees be split?
|
staticconstexpr |
How far do we want to go down looking at zip code trees to make fragments?
|
protected |
size_t vg::MinimizerMapper::distance_limit = default_distance_limit |
bool vg::MinimizerMapper::do_dp = default_do_dp |
bool vg::MinimizerMapper::exclude_overlapping_min = default_exclude_overlapping_min |
|
protected |
We have a gapless extender to extend seed hits in haplotype space. Because this needs a reference to an Aligner, and because changing the scoring parameters deletes all the alignmers, we need to keep this somewhere we can clear out.
int vg::MinimizerMapper::extension_score_threshold = default_extension_score_threshold |
int vg::MinimizerMapper::extension_set_min_score = default_extension_set_min_score |
double vg::MinimizerMapper::extension_set_score_threshold = default_extension_set_score_threshold |
double vg::MinimizerMapper::fragment_gap_scale = default_fragment_gap_scale |
|
protected |
We have a distribution for read fragment lengths that takes care of knowing when we've observed enough good ones to learn a good distribution.
size_t vg::MinimizerMapper::fragment_max_indel_bases = default_fragment_max_indel_bases |
double vg::MinimizerMapper::fragment_max_indel_bases_per_base = default_fragment_max_indel_bases_per_base |
size_t vg::MinimizerMapper::fragment_max_lookback_bases = default_fragment_max_lookback_bases |
double vg::MinimizerMapper::fragment_max_lookback_bases_per_base = default_fragment_max_lookback_bases_per_base |
double vg::MinimizerMapper::fragment_max_min_score = default_fragment_max_min_score |
double vg::MinimizerMapper::fragment_min_score = default_fragment_min_score |
double vg::MinimizerMapper::fragment_points_per_possible_match = default_fragment_points_per_possible_match |
double vg::MinimizerMapper::fragment_score_fraction = default_fragment_score_fraction |
double vg::MinimizerMapper::fragment_set_score_threshold = default_fragment_set_score_threshold |
double vg::MinimizerMapper::gap_scale = default_gap_scale |
size_t vg::MinimizerMapper::gapless_extension_limit = default_gapless_extension_limit |
|
protected |
This is our primary graph.
size_t vg::MinimizerMapper::hard_hit_cap = default_hard_hit_cap |
size_t vg::MinimizerMapper::hit_cap = default_hit_cap |
int vg::MinimizerMapper::item_bonus = default_item_bonus |
double vg::MinimizerMapper::item_scale = default_item_scale |
|
staticprotected |
Length at which we cut over to long-alignment logging.
|
staticprotected |
Count at which we cut over to summary logging.
double vg::MinimizerMapper::mapq_score_scale = default_mapq_score_scale |
size_t vg::MinimizerMapper::mapq_score_window = default_mapq_score_window |
size_t vg::MinimizerMapper::max_alignments = default_max_alignments |
size_t vg::MinimizerMapper::max_chain_connection = default_max_chain_connection |
int vg::MinimizerMapper::max_chaining_problems = default_max_chaining_problems |
size_t vg::MinimizerMapper::max_chains_per_tree = default_max_chains_per_tree |
size_t vg::MinimizerMapper::max_direct_to_chain = default_max_direct_to_chain |
size_t vg::MinimizerMapper::max_dozeu_cells = default_max_dozeu_cells |
size_t vg::MinimizerMapper::max_dp_cells = default_max_dp_cells |
size_t vg::MinimizerMapper::max_extension_mismatches = default_max_extension_mismatches |
size_t vg::MinimizerMapper::max_extensions = default_max_extensions |
size_t vg::MinimizerMapper::max_fragment_length = default_max_fragment_length |
size_t vg::MinimizerMapper::max_fragments = default_max_fragments |
size_t vg::MinimizerMapper::max_indel_bases = default_max_indel_bases |
double vg::MinimizerMapper::max_indel_bases_per_base = default_max_indel_bases_per_base |
size_t vg::MinimizerMapper::max_local_extensions = default_max_local_extensions |
size_t vg::MinimizerMapper::max_lookback_bases = default_max_lookback_bases |
double vg::MinimizerMapper::max_lookback_bases_per_base = default_max_lookback_bases_per_base |
size_t vg::MinimizerMapper::max_middle_dp_length = default_max_middle_dp_length |
size_t vg::MinimizerMapper::max_middle_gap = default_max_middle_gap |
int vg::MinimizerMapper::max_min_chain_score = default_max_min_chain_score |
size_t vg::MinimizerMapper::max_multimaps = default_max_multimaps |
size_t vg::MinimizerMapper::max_rescue_attempts = default_max_rescue_attempts |
size_t vg::MinimizerMapper::max_skipped_bases = default_max_skipped_bases |
size_t vg::MinimizerMapper::max_tail_dp_length = default_max_tail_dp_length |
size_t vg::MinimizerMapper::max_tail_gap = default_max_tail_gap |
size_t vg::MinimizerMapper::max_tail_length = default_max_tail_length |
size_t vg::MinimizerMapper::max_to_fragment = default_max_to_fragment |
size_t vg::MinimizerMapper::max_unique_min = default_max_unique_min |
double vg::MinimizerMapper::min_chain_score_per_base = default_min_chain_score_per_base |
int vg::MinimizerMapper::min_chaining_problems = default_min_chaining_problems |
int vg::MinimizerMapper::min_chains = default_min_chains |
int vg::MinimizerMapper::min_extension_sets = default_min_extension_sets |
size_t vg::MinimizerMapper::min_extensions = default_min_extensions |
size_t vg::MinimizerMapper::min_to_fragment = default_min_to_fragment |
double vg::MinimizerMapper::min_unique_node_fraction = default_min_unique_node_fraction |
size_t vg::MinimizerMapper::minimizer_coverage_flank = default_minimizer_coverage_flank |
size_t vg::MinimizerMapper::minimizer_downsampling_max_window_length = default_minimizer_downsampling_max_window_length |
size_t vg::MinimizerMapper::minimizer_downsampling_window_count = default_minimizer_downsampling_window_count |
|
protected |
double vg::MinimizerMapper::minimizer_score_fraction = default_minimizer_score_fraction |
size_t vg::MinimizerMapper::num_bp_per_min = default_num_bp_per_min |
double vg::MinimizerMapper::pad_cluster_score_threshold = default_pad_cluster_score_threshold |
double vg::MinimizerMapper::pad_zipcode_tree_score_threshold = default_pad_zipcode_tree_score_threshold |
double vg::MinimizerMapper::paired_distance_stdevs = default_paired_distance_stdevs |
double vg::MinimizerMapper::paired_rescue_score_limit = default_paired_rescue_score_limit |
|
protected |
double vg::MinimizerMapper::points_per_possible_match = default_points_per_possible_match |
string vg::MinimizerMapper::read_group |
Apply this read group name.
RescueAlgorithm vg::MinimizerMapper::rescue_algorithm = rescue_dozeu |
The algorithm used for rescue.
size_t vg::MinimizerMapper::rescue_seed_limit = default_rescue_seed_limit |
double vg::MinimizerMapper::rescue_subgraph_stdevs = default_rescue_subgraph_stdevs |
string vg::MinimizerMapper::sample_name |
Apply this sample name.
bool vg::MinimizerMapper::set_refpos = default_set_refpos |
bool vg::MinimizerMapper::show_work = default_show_work |
bool vg::MinimizerMapper::sort_by_chain_score = default_sort_by_chain_score |
bool vg::MinimizerMapper::track_correctness = default_track_correctness |
bool vg::MinimizerMapper::track_position = default_track_position |
bool vg::MinimizerMapper::track_provenance = default_track_provenance |
bool vg::MinimizerMapper::use_explored_cap = default_use_explored_cap |
|
protected |
We may need to complain exactly once that the distribution is bad.
atomic_flag vg::MinimizerMapper::warned_about_rescue_size = ATOMIC_FLAG_INIT |
Have we complained about hitting the size limit for rescue?
|
mutable |
Have we complained about hitting the size limit for tails?
int vg::MinimizerMapper::wfa_distance = default_wfa_distance |
double vg::MinimizerMapper::wfa_distance_per_base = default_wfa_distance_per_base |
int vg::MinimizerMapper::wfa_max_distance = default_wfa_max_distance |
int vg::MinimizerMapper::wfa_max_max_mismatches = default_wfa_max_max_mismatches |
int vg::MinimizerMapper::wfa_max_mismatches = default_wfa_max_mismatches |
double vg::MinimizerMapper::wfa_max_mismatches_per_base = default_wfa_max_mismatches_per_base |
|
protected |
We have a zip code tree for finding distances between seeds.
double vg::MinimizerMapper::zipcode_tree_coverage_threshold = default_zipcode_tree_coverage_threshold |
double vg::MinimizerMapper::zipcode_tree_scale = default_zipcode_tree_scale |
double vg::MinimizerMapper::zipcode_tree_score_threshold = default_zipcode_tree_score_threshold |
|
protected |