vg
tools for working with variation graphs
Classes | Public Types | Public Member Functions | Public Attributes | Static Public Attributes | Protected Types | Protected Member Functions | Static Protected Member Functions | Protected Attributes | Static Protected Attributes | Friends | List of all members
vg::MinimizerMapper Class Reference

#include <minimizer_mapper.hpp>

Inheritance diagram for vg::MinimizerMapper:
vg::AlignerClient

Classes

struct  aligner_stats_t
 Struct to represent per-DP-method stats. More...
 
struct  Minimizer
 

Public Types

enum  RescueAlgorithm { rescue_none, rescue_dozeu, rescue_gssw }
 Implemented rescue algorithms: no rescue, dozeu, GSSW. More...
 
typedef SnarlDistanceIndexClusterer::Seed Seed
 The information we store for each seed. More...
 

Public Member Functions

 MinimizerMapper (const gbwtgraph::GBWTGraph &graph, const gbwtgraph::DefaultMinimizerIndex &minimizer_index, SnarlDistanceIndex *distance_index, const ZipCodeCollection *zipcodes, const PathPositionHandleGraph *path_graph=nullptr)
 
virtual void set_alignment_scores (const int8_t *score_matrix, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus)
 
void map (Alignment &aln, AlignmentEmitter &alignment_emitter)
 
vector< Alignmentmap (Alignment &aln)
 
vector< Alignmentmap_from_chains (Alignment &aln)
 
vector< Alignmentmap_from_extensions (Alignment &aln)
 
pair< vector< Alignment >, vector< Alignment > > map_paired (Alignment &aln1, Alignment &aln2, vector< pair< Alignment, Alignment >> &ambiguous_pair_buffer)
 
pair< vector< Alignment >, vector< Alignment > > map_paired (Alignment &aln1, Alignment &aln2)
 
bool fragment_distr_is_finalized ()
 
void finalize_fragment_length_distr ()
 
void force_fragment_length_distr (double mean, double stdev)
 
double get_fragment_length_mean () const
 
double get_fragment_length_stdev () const
 
size_t get_fragment_length_sample_size () const
 
size_t get_distance_limit (size_t read_length) const
 
virtual void set_alignment_scores (int8_t match, int8_t mismatch, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus)
 Set all the aligner scoring parameters and create the stored aligner instances. More...
 
virtual void set_alignment_scores (std::istream &matrix_stream, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus)
 
virtual void set_alignment_scores (const int8_t *score_matrix, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus)
 
- Public Member Functions inherited from vg::AlignerClient
virtual void set_alignment_scores (int8_t match, int8_t mismatch, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus)
 Set all the aligner scoring parameters and create the stored aligner instances. More...
 
virtual void set_alignment_scores (std::istream &matrix_stream, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus)
 

Public Attributes

size_t hit_cap = default_hit_cap
 
size_t hard_hit_cap = default_hard_hit_cap
 
double minimizer_score_fraction = default_minimizer_score_fraction
 
size_t minimizer_downsampling_window_count = default_minimizer_downsampling_window_count
 
size_t minimizer_downsampling_max_window_length = default_minimizer_downsampling_max_window_length
 
size_t minimizer_coverage_flank = default_minimizer_coverage_flank
 
size_t max_unique_min = default_max_unique_min
 
size_t num_bp_per_min = default_num_bp_per_min
 
bool exclude_overlapping_min = default_exclude_overlapping_min
 
size_t min_extensions = default_min_extensions
 
size_t max_extensions = default_max_extensions
 
double cluster_score_threshold = default_cluster_score_threshold
 
double pad_cluster_score_threshold = default_pad_cluster_score_threshold
 
double cluster_coverage_threshold = default_cluster_coverage_threshold
 
double extension_set_score_threshold = default_extension_set_score_threshold
 
int extension_score_threshold = default_extension_score_threshold
 
int min_extension_sets = default_min_extension_sets
 
int extension_set_min_score = default_extension_set_min_score
 
size_t max_local_extensions = default_max_local_extensions
 
size_t max_alignments = default_max_alignments
 
size_t max_extension_mismatches = default_max_extension_mismatches
 
bool align_from_chains = default_align_from_chains
 
double zipcode_tree_scale = default_zipcode_tree_scale
 
double zipcode_tree_score_threshold = default_zipcode_tree_score_threshold
 
double pad_zipcode_tree_score_threshold = default_pad_zipcode_tree_score_threshold
 
double zipcode_tree_coverage_threshold = default_zipcode_tree_coverage_threshold
 
size_t min_to_fragment = default_min_to_fragment
 
size_t max_to_fragment = default_max_to_fragment
 
size_t gapless_extension_limit = default_gapless_extension_limit
 
size_t fragment_max_lookback_bases = default_fragment_max_lookback_bases
 
double fragment_max_lookback_bases_per_base = default_fragment_max_lookback_bases_per_base
 
size_t max_fragments = default_max_fragments
 
double fragment_gap_scale = default_fragment_gap_scale
 
double fragment_points_per_possible_match = default_fragment_points_per_possible_match
 
size_t fragment_max_indel_bases = default_fragment_max_indel_bases
 
double fragment_max_indel_bases_per_base = default_fragment_max_indel_bases_per_base
 
size_t max_chain_connection = default_max_chain_connection
 
size_t max_tail_length = default_max_tail_length
 
double fragment_score_fraction = default_fragment_score_fraction
 
double fragment_max_min_score = default_fragment_max_min_score
 
double fragment_min_score = default_fragment_min_score
 
double fragment_set_score_threshold = default_fragment_set_score_threshold
 
int min_chaining_problems = default_min_chaining_problems
 
int max_chaining_problems = default_max_chaining_problems
 
size_t max_direct_to_chain = default_max_direct_to_chain
 
size_t max_lookback_bases = default_max_lookback_bases
 
double max_lookback_bases_per_base = default_max_lookback_bases_per_base
 
int item_bonus = default_item_bonus
 
double item_scale = default_item_scale
 
double gap_scale = default_gap_scale
 
double points_per_possible_match = default_points_per_possible_match
 
size_t max_indel_bases = default_max_indel_bases
 
double max_indel_bases_per_base = default_max_indel_bases_per_base
 
double chain_score_threshold = default_chain_score_threshold
 
int min_chains = default_min_chains
 
size_t max_chains_per_tree = default_max_chains_per_tree
 
double min_chain_score_per_base = default_min_chain_score_per_base
 
int max_min_chain_score = default_max_min_chain_score
 
size_t max_skipped_bases = default_max_skipped_bases
 
size_t max_tail_dp_length = default_max_tail_dp_length
 
size_t max_middle_dp_length = default_max_middle_dp_length
 
size_t max_dp_cells = default_max_dp_cells
 
size_t max_tail_gap = default_max_tail_gap
 
size_t max_middle_gap = default_max_middle_gap
 
int wfa_max_mismatches = default_wfa_max_mismatches
 
double wfa_max_mismatches_per_base = default_wfa_max_mismatches_per_base
 
int wfa_max_max_mismatches = default_wfa_max_max_mismatches
 
int wfa_distance = default_wfa_distance
 
double wfa_distance_per_base = default_wfa_distance_per_base
 
int wfa_max_distance = default_wfa_max_distance
 
bool sort_by_chain_score = default_sort_by_chain_score
 
double min_unique_node_fraction = default_min_unique_node_fraction
 
bool use_explored_cap = default_use_explored_cap
 
size_t mapq_score_window = default_mapq_score_window
 
double mapq_score_scale = default_mapq_score_scale
 
size_t max_multimaps = default_max_multimaps
 
size_t distance_limit = default_distance_limit
 
bool do_dp = default_do_dp
 
bool set_refpos = default_set_refpos
 
bool track_provenance = default_track_provenance
 
bool track_correctness = default_track_correctness
 
bool track_position = default_track_position
 
bool show_work = default_show_work
 
double paired_distance_stdevs = default_paired_distance_stdevs
 
double paired_rescue_score_limit = default_paired_rescue_score_limit
 
double rescue_subgraph_stdevs = default_rescue_subgraph_stdevs
 
size_t rescue_seed_limit = default_rescue_seed_limit
 
size_t max_rescue_attempts = default_max_rescue_attempts
 
size_t max_dozeu_cells = default_max_dozeu_cells
 
size_t max_fragment_length = default_max_fragment_length
 
RescueAlgorithm rescue_algorithm = rescue_dozeu
 The algorithm used for rescue. More...
 
string sample_name
 Apply this sample name. More...
 
string read_group
 Apply this read group name. More...
 
atomic_flag warned_about_rescue_size = ATOMIC_FLAG_INIT
 Have we complained about hitting the size limit for rescue? More...
 
atomic_flag warned_about_tail_size = ATOMIC_FLAG_INIT
 Have we complained about hitting the size limit for tails? More...
 
- Public Attributes inherited from vg::AlignerClient
bool adjust_alignments_for_base_quality = false
 

Static Public Attributes

static constexpr size_t default_hit_cap = 10
 Use all minimizers with at most hit_cap hits. More...
 
static constexpr size_t default_hard_hit_cap = 500
 Ignore all minimizers with more than hard_hit_cap hits. More...
 
static constexpr double default_minimizer_score_fraction = 0.9
 
static constexpr size_t default_minimizer_downsampling_window_count = 0
 Window count for minimizer downsampling. More...
 
static constexpr size_t default_minimizer_downsampling_max_window_length = std::numeric_limits<size_t>::max()
 
static constexpr size_t default_minimizer_coverage_flank = 250
 
static constexpr size_t default_max_unique_min = 500
 Maximum number of distinct minimizers to take. More...
 
static constexpr size_t default_num_bp_per_min = 1000
 Number of minimzers to select based on read_len/num_min_per_bp. More...
 
static constexpr bool default_exclude_overlapping_min = false
 If set, exclude overlapping minimizers. More...
 
static constexpr size_t default_min_extensions = 2
 Accept at least this many clusters for gapless extension. More...
 
static constexpr size_t default_max_extensions = 800
 How many clusters should we produce gapless extensions for, max? More...
 
static constexpr double default_cluster_score_threshold = 50
 this much, then don't extend it More...
 
static constexpr double default_pad_cluster_score_threshold = 20
 
static constexpr double default_cluster_coverage_threshold = 0.3
 
static constexpr double default_extension_set_score_threshold = 20
 
static constexpr int default_extension_score_threshold = 1
 
static constexpr int default_min_extension_sets = 2
 
static constexpr int default_extension_set_min_score = 20
 
static constexpr size_t default_max_local_extensions = numeric_limits<size_t>::max()
 How many extensions should we try as seeds within a mapping location? More...
 
static constexpr size_t default_max_alignments = 8
 How many alignments should we make, max? More...
 
static constexpr size_t default_max_extension_mismatches = GaplessExtender::MAX_MISMATCHES
 
static constexpr bool default_align_from_chains = false
 
static constexpr double default_zipcode_tree_scale = 2.0
 
static constexpr double default_zipcode_tree_score_threshold = 50
 How far do we want to go down looking at zip code trees to make fragments? More...
 
static constexpr double default_pad_zipcode_tree_score_threshold = 20
 
static constexpr double default_zipcode_tree_coverage_threshold = 0.3
 
static constexpr size_t default_min_to_fragment = 4
 How many things should we produce fragments for, min? More...
 
static constexpr size_t default_max_to_fragment = 10
 How many things should we produce fragments for, max? More...
 
static constexpr size_t default_gapless_extension_limit = 0
 
static constexpr size_t default_fragment_max_lookback_bases = 300
 How many bases should we look back when making fragments? More...
 
static constexpr double default_fragment_max_lookback_bases_per_base = 0.03
 How many bases should we look back when making fragments, per base of read length? More...
 
static constexpr size_t default_max_fragments = std::numeric_limits<size_t>::max()
 How many fragments should we try and make when fragmenting something? More...
 
static constexpr double default_fragment_gap_scale = 1.0
 
static constexpr double default_fragment_points_per_possible_match = 0
 
static constexpr size_t default_fragment_max_indel_bases = 2000
 How many bases of indel should we allow in fragments? More...
 
static constexpr double default_fragment_max_indel_bases_per_base = 0.2
 How many bases of indel should we allow in fragments per base of read length? More...
 
static constexpr size_t default_max_chain_connection = 100
 
static constexpr size_t default_max_tail_length = 100
 Similarly, what is the maximum tail length we will try to WFA align? More...
 
static constexpr double default_fragment_score_fraction = 0.1
 
static constexpr double default_fragment_max_min_score = std::numeric_limits<double>::max()
 How high should we get the score threshold based on the best fragment's score get? More...
 
static constexpr double default_fragment_min_score = 60
 
static constexpr double default_fragment_set_score_threshold = 0
 
static constexpr int default_min_chaining_problems = 1
 
static constexpr int default_max_chaining_problems = std::numeric_limits<int>::max()
 Do no more than this many chaining problems. More...
 
static constexpr size_t default_max_direct_to_chain = 0
 
static constexpr size_t default_max_lookback_bases = 3000
 How many bases should we look back when chaining? More...
 
static constexpr double default_max_lookback_bases_per_base = 0.3
 How many bases should we look back when chaining, per base of read length? More...
 
static constexpr int default_item_bonus = 0
 
static constexpr double default_item_scale = 1.0
 
static constexpr double default_gap_scale = 1.0
 
static constexpr double default_points_per_possible_match = 0
 
static constexpr size_t default_max_indel_bases = 2000
 How many bases of indel should we allow in chaining? More...
 
static constexpr double default_max_indel_bases_per_base = 0.2
 How many bases of indel should we allow in chaining, per base of read length? More...
 
static constexpr double default_chain_score_threshold = 100
 
static constexpr int default_min_chains = 4
 
static constexpr size_t default_max_chains_per_tree = 1
 Allow up to this many chains per tree. More...
 
static constexpr double default_min_chain_score_per_base = 0.01
 
static constexpr int default_max_min_chain_score = 200
 Limit the min chain score to no more than this. More...
 
static constexpr size_t default_max_skipped_bases = 0
 
static constexpr size_t default_max_tail_dp_length = 30000
 
static constexpr size_t default_max_middle_dp_length = std::numeric_limits<int32_t>::max()
 How long of a DP can we do before something might go wrong with BandedGlobalAligner or the GBWT-based WFA? More...
 
static constexpr size_t default_max_dp_cells = std::numeric_limits<size_t>::max()
 
static constexpr size_t default_max_tail_gap = std::numeric_limits<size_t>::max()
 How many gap bases should we allow in a Dozeu tail alignment, max? More...
 
static constexpr size_t default_max_middle_gap = std::numeric_limits<size_t>::max()
 How many gap bases should we allow in a between-seed alignment, max? More...
 
static constexpr int default_wfa_max_mismatches = 2
 How many mismatch bases (or equivalent score of indels) should we allow in WFA connections and tails? More...
 
static constexpr double default_wfa_max_mismatches_per_base = 0.1
 How many mismatch bases (or equivalent score of indels) should we allow in WFA connections and tails per base of read sequence? More...
 
static constexpr int default_wfa_max_max_mismatches = 20
 How many mismatch bases (or equivalent score of indels) should we allow in WFA connections and tails maximum, at any read length? More...
 
static constexpr int default_wfa_distance = WFAExtender::ErrorModel::default_distance().min
 How far behind the leader should the WFA be allowed to get? More...
 
static constexpr double default_wfa_distance_per_base = WFAExtender::ErrorModel::default_distance().per_base
 How far behind the leader should the WFA be allowed to get, per base of read sequence? More...
 
static constexpr int default_wfa_max_distance = WFAExtender::ErrorModel::default_distance().max
 How far behind the leader should the WFA be allowed to get, at any read length? More...
 
static constexpr bool default_sort_by_chain_score = false
 Should alignments be ranked by chain score instead of base-level score? More...
 
static constexpr double default_min_unique_node_fraction = 0.0
 How much of an alignment needs to be from distinct nodes to be a distinct alignment? More...
 
static constexpr bool default_use_explored_cap = false
 
static constexpr size_t default_mapq_score_window = 0
 What number of bp should we re-scale scores to for MAPQ, for calibration? 0 for off. More...
 
static constexpr double default_mapq_score_scale = 1.0
 How should we scale scores before mapq, for calibration. More...
 
static constexpr size_t default_max_multimaps = 1
 
static constexpr size_t default_distance_limit = 200
 
static constexpr bool default_do_dp = true
 If false, skip computing base-level alignments. More...
 
static constexpr bool default_set_refpos = false
 Set refpos field of alignments to positions on nodes they visit. More...
 
static constexpr bool default_track_provenance = false
 
static constexpr bool default_track_correctness = false
 
static constexpr bool default_track_position = false
 Track linear reference position for placements in log output. More...
 
static constexpr bool default_show_work = false
 If set, log what the mapper is thinking in its mapping of each read. More...
 
static constexpr double default_paired_distance_stdevs = 2.0
 
static constexpr double default_paired_rescue_score_limit = 0.9
 How close does an alignment have to be to the best alignment for us to rescue on it. More...
 
static constexpr double default_rescue_subgraph_stdevs = 4.0
 How many stdevs from the mean do we extract a subgraph from? More...
 
static constexpr size_t default_rescue_seed_limit = 100
 Do not attempt rescue if there are more seeds in the rescue subgraph. More...
 
static constexpr size_t default_max_rescue_attempts = 15
 For paired end mapping, how many times should we attempt rescue (per read)? More...
 
static constexpr size_t default_max_dozeu_cells = (size_t)(1.5 * 1024 * 1024)
 
static constexpr size_t default_max_fragment_length = 2000
 What is the maximum fragment length that we accept as valid for paired-end reads? More...
 

Protected Types

typedef SnarlDistanceIndexClusterer::Cluster Cluster
 The information we store for each cluster. More...
 
using ImmutablePath = structures::ImmutableList< Mapping >
 

Protected Member Functions

double distance_to_annotation (int64_t distance) const
 
std::vector< algorithms::Anchorto_anchors (const Alignment &aln, const VectorView< Minimizer > &minimizers, std::vector< Seed > &seeds) const
 Convert a collection of seeds to a collection of chaining anchors. More...
 
WFAAlignment to_wfa_alignment (const algorithms::Anchor &anchor, const Alignment &aln, const Aligner *aligner) const
 
std::vector< Minimizerfind_minimizers (const std::string &sequence, Funnel &funnel) const
 
void flag_repetitive_minimizers (std::vector< Minimizer > &minimizers_in_read_order) const
 
std::vector< size_t > sort_minimizers_by_score (const std::vector< Minimizer > &minimizers_in_read_order, LazyRNG &rng) const
 
std::vector< Seedfind_seeds (const std::vector< Minimizer > &minimizers_in_read_order, const VectorView< Minimizer > &minimizers, const Alignment &aln, Funnel &funnel) const
 
void tag_seeds (const Alignment &aln, const std::vector< Seed >::const_iterator &begin, const std::vector< Seed >::const_iterator &end, const VectorView< Minimizer > &minimizers, size_t funnel_offset, Funnel &funnel) const
 
void score_cluster (Cluster &cluster, size_t i, const VectorView< Minimizer > &minimizers, const std::vector< Seed > &seeds, size_t seq_length, Funnel &funnel) const
 
std::pair< double, double > score_tree (const ZipCodeForest &zip_code_forest, size_t i, const VectorView< Minimizer > &minimizers, const std::vector< Seed > &seeds, size_t seq_length, Funnel &funnel) const
 
vector< GaplessExtensionextend_seed_group (const std::vector< size_t > &seed_group, size_t source_num, const VectorView< Minimizer > &minimizers, const std::vector< Seed > &seeds, const string &sequence, size_t max_mismatches, vector< vector< size_t >> *minimizer_kept_count=nullptr, Funnel *funnel=nullptr, std::vector< std::vector< size_t >> *seeds_used=nullptr) const
 
std::vector< int > score_extensions (const std::vector< std::vector< GaplessExtension >> &extensions, const Alignment &aln, Funnel &funnel) const
 
std::vector< int > score_extensions (const std::vector< std::pair< std::vector< GaplessExtension >, size_t >> &extensions, const Alignment &aln, Funnel &funnel) const
 
double get_read_coverage (const Alignment &aln, const VectorView< std::vector< size_t >> &seed_sets, const std::vector< Seed > &seeds, const VectorView< Minimizer > &minimizers) const
 
void do_fragmenting_on_trees (Alignment &aln, const ZipCodeForest &zip_code_forest, const std::vector< Seed > &seeds, const VectorView< MinimizerMapper::Minimizer > &minimizers, const vector< algorithms::Anchor > &seed_anchors, std::vector< std::vector< size_t >> &fragments, std::vector< double > &fragment_scores, std::vector< algorithms::Anchor > &fragment_anchors, std::vector< size_t > &fragment_source_tree, std::vector< std::vector< size_t >> &minimizer_kept_fragment_count, std::vector< double > &multiplicity_by_fragment, std::vector< Alignment > &alignments, SmallBitset &minimizer_explored, vector< double > &multiplicity_by_alignment, LazyRNG &rng, Funnel &funnel) const
 
void do_chaining_on_fragments (Alignment &aln, const ZipCodeForest &zip_code_forest, const std::vector< Seed > &seeds, const VectorView< MinimizerMapper::Minimizer > &minimizers, const std::vector< std::vector< size_t >> &fragments, const std::vector< double > &fragment_scores, const std::vector< algorithms::Anchor > &fragment_anchors, const std::vector< size_t > &fragment_source_tree, const std::vector< std::vector< size_t >> &minimizer_kept_fragment_count, const std::vector< double > &multiplicity_by_fragment, std::vector< std::vector< size_t >> &chains, std::vector< size_t > &chain_source_tree, std::vector< int > &chain_score_estimates, std::vector< std::vector< size_t >> &minimizer_kept_chain_count, std::vector< double > &multiplicity_by_chain, vector< double > &multiplicity_by_tree, std::unordered_map< size_t, std::vector< size_t >> &good_fragments_in, LazyRNG &rng, Funnel &funnel) const
 
void get_best_chain_stats (Alignment &aln, const ZipCodeForest &zip_code_forest, const std::vector< Seed > &seeds, const VectorView< MinimizerMapper::Minimizer > &minimizers, const std::vector< std::vector< size_t >> &fragments, const std::unordered_map< size_t, std::vector< size_t >> &good_fragments_in, const std::vector< std::vector< size_t >> &chains, const std::vector< size_t > &chain_source_tree, const vector< algorithms::Anchor > &seed_anchors, const std::vector< int > &chain_score_estimates, bool &best_chain_correct, double &best_chain_coverage, size_t &best_chain_longest_jump, double &best_chain_average_jump, size_t &best_chain_anchors, size_t &best_chain_anchor_length, Funnel &funnel) const
 
void do_alignment_on_chains (Alignment &aln, const std::vector< Seed > &seeds, const VectorView< MinimizerMapper::Minimizer > &minimizers, const vector< algorithms::Anchor > &seed_anchors, const std::vector< std::vector< size_t >> &chains, const std::vector< size_t > &chain_source_tree, const std::vector< double > &multiplicity_by_chain, const std::vector< int > &chain_score_estimates, const std::vector< std::vector< size_t >> &minimizer_kept_chain_count, vector< Alignment > &alignments, vector< double > &multiplicity_by_alignment, vector< size_t > &alignments_to_source, SmallBitset &minimizer_explored, aligner_stats_t &stats, bool &funnel_depleted, LazyRNG &rng, Funnel &funnel) const
 
void pick_mappings_from_alignments (Alignment &aln, const std::vector< Alignment > &alignments, const std::vector< double > &multiplicity_by_alignment, const std::vector< size_t > &alignments_to_source, const std::vector< int > &chain_score_estimates, std::vector< Alignment > &mappings, std::vector< double > &scores, std::vector< double > &multiplicity_by_mapping, bool &funnel_depleted, LazyRNG &rng, Funnel &funnel) const
 
Alignment find_chain_alignment (const Alignment &aln, const VectorView< algorithms::Anchor > &to_chain, const std::vector< size_t > &chain, aligner_stats_t *stats=nullptr) const
 
void find_optimal_tail_alignments (const Alignment &aln, const vector< GaplessExtension > &extended_seeds, LazyRNG &rng, Alignment &best, Alignment &second_best) const
 
void attempt_rescue (const Alignment &aligned_read, Alignment &rescued_alignment, const VectorView< Minimizer > &minimizers, bool rescue_forward)
 
GaplessExtender::cluster_type seeds_in_subgraph (const VectorView< Minimizer > &minimizers, const std::unordered_set< nid_t > &subgraph) const
 
void fix_dozeu_score (Alignment &rescued_alignment, const HandleGraph &rescue_graph, const std::vector< handle_t > &topological_order) const
 
void fix_dozeu_end_deletions (Alignment &rescued_alignment) const
 
int64_t distance_between (const pos_t &pos1, const pos_t &pos2)
 
int64_t distance_between (const Alignment &aln1, const Alignment &aln2)
 
int64_t unoriented_distance_between (const pos_t &pos1, const pos_t &pos2) const
 
void extension_to_alignment (const GaplessExtension &extension, Alignment &alignment) const
 
void wfa_alignment_to_alignment (const WFAAlignment &wfa_alignment, Alignment &alignment) const
 
void pair_all (std::array< vector< Alignment >, 2 > &mappings) const
 
void annotate_with_minimizer_statistics (Alignment &target, const VectorView< Minimizer > &minimizers, const std::vector< Seed > &seeds, size_t old_seed_count, size_t new_seed_offset, const Funnel &funnel) const
 
double compute_mapq_caps (const Alignment &aln, const VectorView< Minimizer > &minimizers, const SmallBitset &explored)
 
vector< TreeSubgraphget_tail_forest (const GaplessExtension &extended_seed, size_t read_length, bool left_tails, size_t *longest_detectable_gap=nullptr) const
 
pair< Path, size_t > get_best_alignment_against_any_tree (const vector< TreeSubgraph > &trees, const string &sequence, const Position &default_position, bool pin_left, size_t longest_detectable_gap, LazyRNG &rng) const
 
void dfs_gbwt (const Position &from, size_t walk_distance, const function< void(const handle_t &)> &enter_handle, const function< void(void)> exit_handle) const
 
void dfs_gbwt (handle_t from_handle, size_t from_offset, size_t walk_distance, const function< void(const handle_t &)> &enter_handle, const function< void(void)> exit_handle) const
 
void dfs_gbwt (const gbwt::SearchState &start_state, size_t from_offset, size_t walk_distance, const function< void(const handle_t &)> &enter_handle, const function< void(void)> exit_handle) const
 
double score_alignment_pair (Alignment &aln1, Alignment &aln2, int64_t fragment_distance)
 
template<typename Score = double>
void process_until_threshold_a (size_t items, const function< Score(size_t)> &get_score, double threshold, size_t min_count, size_t max_count, LazyRNG &rng, const function< bool(size_t, size_t)> &process_item, const function< void(size_t)> &discard_item_by_count, const function< void(size_t)> &discard_item_by_score) const
 
template<typename Score = double>
void process_until_threshold_b (const vector< Score > &scores, double threshold, size_t min_count, size_t max_count, LazyRNG &rng, const function< bool(size_t, size_t)> &process_item, const function< void(size_t)> &discard_item_by_count, const function< void(size_t)> &discard_item_by_score) const
 
template<typename Score = double>
void process_until_threshold_c (size_t items, const function< Score(size_t)> &get_score, const function< bool(size_t, size_t)> &comparator, double threshold, size_t min_count, size_t max_count, LazyRNG &get_seed, const function< bool(size_t, size_t)> &process_item, const function< void(size_t)> &discard_item_by_count, const function< void(size_t)> &discard_item_by_score) const
 
bool validate_clusters (const std::vector< std::vector< Cluster >> &clusters, const std::vector< std::vector< Seed >> &seeds, size_t read_limit, size_t fragment_limit) const
 Do a brute check of the clusters. Print errors to stderr. More...
 
- Protected Member Functions inherited from vg::AlignerClient
 AlignerClient (double gc_content_estimate=vg::default_gc_content)
 
const GSSWAlignerget_aligner (bool have_qualities=true) const
 
const QualAdjAlignerget_qual_adj_aligner () const
 
const Alignerget_regular_aligner () const
 

Static Protected Member Functions

static gbwtgraph::Payload no_chain_info ()
 How should we initialize chain info when it's not stored in the minimizer index? More...
 
static Seed chain_info_to_seed (const pos_t &hit, size_t minimizer, const ZipCode &zip)
 
static algorithms::Anchor to_anchor (const Alignment &aln, const VectorView< Minimizer > &minimizers, std::vector< Seed > &seeds, size_t seed_number, const HandleGraph &graph, const Aligner *aligner)
 Convert a single seed to a single chaining anchor. More...
 
static algorithms::Anchor to_anchor (const Alignment &aln, size_t read_start, size_t read_end, const std::vector< size_t > &sorted_seeds, const std::vector< algorithms::Anchor > &seed_anchors, const std::vector< size_t >::const_iterator &mismatch_begin, const std::vector< size_t >::const_iterator &mismatch_end, const HandleGraph &graph, const Aligner *aligner)
 
static int score_extension_group (const Alignment &aln, const vector< GaplessExtension > &extended_seeds, int gap_open_penalty, int gap_extend_penalty)
 
static void with_dagified_local_graph (const pos_t &left_anchor, const pos_t &right_anchor, size_t max_path_length, const HandleGraph &graph, const std::function< void(DeletableHandleGraph &, const std::function< std::pair< nid_t, bool >(const handle_t &)> &)> &callback)
 
static size_t longest_detectable_gap_in_range (const Alignment &aln, const std::string::const_iterator &sequence_begin, const std::string::const_iterator &sequence_end, const GSSWAligner *aligner)
 
static std::pair< size_t, size_t > align_sequence_between (const pos_t &left_anchor, const pos_t &right_anchor, size_t max_path_length, size_t max_gap_length, const HandleGraph *graph, const GSSWAligner *aligner, Alignment &alignment, const std::string *alignment_name=nullptr, size_t max_dp_cells=std::numeric_limits< size_t >::max(), const std::function< size_t(const Alignment &, const HandleGraph &)> &choose_band_padding=algorithms::pad_band_random_walk())
 
static std::pair< size_t, size_t > align_sequence_between_consistently (const pos_t &left_anchor, const pos_t &right_anchor, size_t max_path_length, size_t max_gap_length, const HandleGraph *graph, const GSSWAligner *aligner, Alignment &alignment, const std::string *alignment_name=nullptr, size_t max_dp_cells=std::numeric_limits< size_t >::max(), const std::function< size_t(const Alignment &, const HandleGraph &)> &choose_band_padding=algorithms::pad_band_random_walk())
 
static WFAAlignment connect_consistently (const std::string &sequence, const pos_t &left_anchor, const pos_t &right_anchor, const WFAExtender &wfa_extender)
 
static double window_breaking_quality (const VectorView< Minimizer > &minimizers, vector< size_t > &broken, const string &sequence, const string &quality_bytes)
 
static double faster_cap (const VectorView< Minimizer > &minimizers, vector< size_t > &minimizers_explored, const string &sequence, const string &quality_bytes)
 
static void for_each_agglomeration_interval (const VectorView< Minimizer > &minimizers, const string &sequence, const string &quality_bytes, const vector< size_t > &minimizer_indices, const function< void(size_t, size_t, size_t, size_t)> &iteratee)
 
static double get_log10_prob_of_disruption_in_interval (const VectorView< Minimizer > &minimizers, const string &sequence, const string &quality_bytes, const vector< size_t >::iterator &disrupt_begin, const vector< size_t >::iterator &disrupt_end, size_t left, size_t right)
 
static double get_prob_of_disruption_in_column (const VectorView< Minimizer > &minimizers, const string &sequence, const string &quality_bytes, const vector< size_t >::iterator &disrupt_begin, const vector< size_t >::iterator &disrupt_end, size_t index)
 
static size_t immutable_path_from_length (const ImmutablePath &path)
 
static Path to_path (const ImmutablePath &path)
 
static string log_name ()
 Get the thread identifier prefix for logging. More...
 
static string log_alignment (const Alignment &aln)
 Turn an Alignment into a conveniently-sized string for logging. More...
 
static string log_alignment (const Path &path, bool force_condensed=false)
 Turn an Path from an alignment into a conveniently-sized string for logging. More...
 
static string log_bits (const std::vector< bool > &bits)
 Turn a list of bit flags into a compact representation. More...
 
static void dump_chaining_problem (const std::vector< algorithms::Anchor > &anchors, const std::vector< size_t > &cluster_seeds_sorted, const HandleGraph &graph)
 Dump a whole chaining problem. More...
 
static void dump_debug_minimizers (const VectorView< Minimizer > &minimizers, const string &sequence, const vector< size_t > *to_include=nullptr, size_t start_offset=0, size_t length_limit=std::numeric_limits< size_t >::max())
 Dump all the given minimizers, with optional subset restriction. More...
 
static void dump_debug_extension_set (const HandleGraph &graph, const Alignment &aln, const vector< GaplessExtension > &extended_seeds)
 Dump all the extansions in an extension set. More...
 
static void dump_debug_sequence (ostream &out, const string &sequence, size_t start_offset=0, size_t length_limit=std::numeric_limits< size_t >::max())
 Print a sequence with base numbering. More...
 
static void dump_debug_clustering (const Cluster &cluster, size_t cluster_number, const VectorView< Minimizer > &minimizers, const std::vector< Seed > &seeds)
 Print the seed content of a cluster. More...
 
static void dump_debug_seeds (const VectorView< Minimizer > &minimizers, const std::vector< Seed > &seeds, const std::vector< size_t > &selected_seeds)
 Print information about a selected set of seeds. More...
 
static void dump_debug_query (const Alignment &aln)
 Print information about a read to be aligned. More...
 
static void dump_debug_query (const Alignment &aln1, const Alignment &aln2)
 Print information about a read pair to be aligned. More...
 
static void dump_debug_dotplot (const std::string &name, const VectorView< Minimizer > &minimizers, const std::vector< Seed > &seeds, const std::vector< std::pair< std::string, std::vector< std::vector< size_t >>>> &seed_sets, const PathPositionHandleGraph *path_graph)
 
static void dump_debug_graph (const HandleGraph &graph)
 Dump a graph. More...
 

Protected Attributes

const PathPositionHandleGraphpath_graph
 
const gbwtgraph::DefaultMinimizerIndex & minimizer_index
 
SnarlDistanceIndex * distance_index
 
const ZipCodeCollectionzipcodes
 
const gbwtgraph::GBWTGraph & gbwt_graph
 This is our primary graph. More...
 
std::unique_ptr< GaplessExtenderextender
 
SnarlDistanceIndexClusterer clusterer
 We have a clusterer. More...
 
ZipCodeForest zip_forest
 We have a zip code tree for finding distances between seeds. More...
 
std::function< size_t(const Alignment &, const HandleGraph &)> choose_band_padding
 
FragmentLengthDistribution fragment_length_distr
 
atomic_flag warned_about_bad_distribution = ATOMIC_FLAG_INIT
 We may need to complain exactly once that the distribution is bad. More...
 

Static Protected Attributes

const static size_t LONG_LIMIT = 256
 Length at which we cut over to long-alignment logging. More...
 
const static size_t MANY_LIMIT = 10
 Count at which we cut over to summary logging. More...
 

Friends

class TestMinimizerMapper
 

Additional Inherited Members

- Static Public Member Functions inherited from vg::AlignerClient
static int8_t * parse_matrix (std::istream &matrix_stream)
 Allocates an array to hold a 4x4 substitution matrix and returns it. More...
 

Member Typedef Documentation

◆ Cluster

The information we store for each cluster.

◆ ImmutablePath

using vg::MinimizerMapper::ImmutablePath = structures::ImmutableList<Mapping>
protected

We define a type for shared-tail lists of Mappings, to avoid constantly copying Path objects.

◆ Seed

The information we store for each seed.

Member Enumeration Documentation

◆ RescueAlgorithm

Implemented rescue algorithms: no rescue, dozeu, GSSW.

Enumerator
rescue_none 
rescue_dozeu 
rescue_gssw 

Constructor & Destructor Documentation

◆ MinimizerMapper()

vg::MinimizerMapper::MinimizerMapper ( const gbwtgraph::GBWTGraph &  graph,
const gbwtgraph::DefaultMinimizerIndex &  minimizer_index,
SnarlDistanceIndex *  distance_index,
const ZipCodeCollection zipcodes,
const PathPositionHandleGraph path_graph = nullptr 
)

Construct a new MinimizerMapper using the given indexes. The PathPositionhandleGraph can be nullptr, as we only use it for correctness tracking.

Member Function Documentation

◆ align_sequence_between()

std::pair< size_t, size_t > vg::MinimizerMapper::align_sequence_between ( const pos_t left_anchor,
const pos_t right_anchor,
size_t  max_path_length,
size_t  max_gap_length,
const HandleGraph graph,
const GSSWAligner aligner,
Alignment alignment,
const std::string *  alignment_name = nullptr,
size_t  max_dp_cells = std::numeric_limits<size_t>::max(),
const std::function< size_t(const Alignment &, const HandleGraph &)> &  choose_band_padding = algorithms::pad_band_random_walk() 
)
staticprotected

Clip out the part of the graph between the given positions and global-align the sequence of the given Alignment to it. Populate the Alignment's path and score.

Finds an alignment against a graph path if it is <= max_path_length.

If one of the anchor positions is empty, does pinned alignment against the other position.

For pinned alignment, restricts the alignment to have gaps no longer than max_gap_length, and to use <= max_dp_cells cells. If too many DP cells would be used, produces a softclip alignment.

For connecting alignment, restricts the alignment to use <= max_dp_cells cells. If too many DP cells would be used, produces an Alignment with and empty path.

Returns the number of nodes and bases in the graph aligned against.

◆ align_sequence_between_consistently()

std::pair< size_t, size_t > vg::MinimizerMapper::align_sequence_between_consistently ( const pos_t left_anchor,
const pos_t right_anchor,
size_t  max_path_length,
size_t  max_gap_length,
const HandleGraph graph,
const GSSWAligner aligner,
Alignment alignment,
const std::string *  alignment_name = nullptr,
size_t  max_dp_cells = std::numeric_limits<size_t>::max(),
const std::function< size_t(const Alignment &, const HandleGraph &)> &  choose_band_padding = algorithms::pad_band_random_walk() 
)
staticprotected

Version of align_sequence_between() that guarantees that you get the same answer (modulo reverse-complementation) no matter whether the sequence and anchors are reverse-complemented or not.

◆ annotate_with_minimizer_statistics()

void vg::MinimizerMapper::annotate_with_minimizer_statistics ( Alignment target,
const VectorView< Minimizer > &  minimizers,
const std::vector< Seed > &  seeds,
size_t  old_seed_count,
size_t  new_seed_offset,
const Funnel funnel 
) const
protected

Add annotations to an Alignment with statistics about the minimizers.

old_seed_count is the number of seeds in the seed vector actually created at the "seed" stage of the alignment process. new_seed_offset is where the first of thos eseeds appears in the funnel at the reseed stage.

◆ attempt_rescue()

void vg::MinimizerMapper::attempt_rescue ( const Alignment aligned_read,
Alignment rescued_alignment,
const VectorView< Minimizer > &  minimizers,
bool  rescue_forward 
)
protected

Given an aligned read, extract a subgraph of the graph within a distance range based on the fragment length distribution and attempt to align the unaligned read to it. Rescue_forward is true if the aligned read is the first and false otherwise. Assumes that both reads are facing the same direction. TODO: This should be const, but some of the function calls are not.

◆ chain_info_to_seed()

static Seed vg::MinimizerMapper::chain_info_to_seed ( const pos_t hit,
size_t  minimizer,
const ZipCode zip 
)
inlinestaticprotected

How do we convert chain info to an actual seed of the type we are using? Also needs to know the hit position, and the minimizer number.

◆ compute_mapq_caps()

double vg::MinimizerMapper::compute_mapq_caps ( const Alignment aln,
const VectorView< Minimizer > &  minimizers,
const SmallBitset explored 
)
protected

Compute MAPQ caps based on all minimizers that are explored, for some definition of explored.

Needs access to the input alignment for sequence and quality information.

Returns only an "extended" cap at the moment.

◆ connect_consistently()

WFAAlignment vg::MinimizerMapper::connect_consistently ( const std::string &  sequence,
const pos_t left_anchor,
const pos_t right_anchor,
const WFAExtender wfa_extender 
)
staticprotected

Produce a WFAAlignment of the given sequence between the given points that will be the same (modulo reverse-complementation) no matter whether the sequence and anchors are reverse-complemented or not.

◆ dfs_gbwt() [1/3]

void vg::MinimizerMapper::dfs_gbwt ( const gbwt::SearchState &  start_state,
size_t  from_offset,
size_t  walk_distance,
const function< void(const handle_t &)> &  enter_handle,
const function< void(void)>  exit_handle 
) const
protected

The same as dfs_gbwt on a handle and an offset, but takes a gbwt::SearchState that defines only some haplotypes on a handle to start with.

◆ dfs_gbwt() [2/3]

void vg::MinimizerMapper::dfs_gbwt ( const Position from,
size_t  walk_distance,
const function< void(const handle_t &)> &  enter_handle,
const function< void(void)>  exit_handle 
) const
protected

Run a DFS on valid haplotypes in the GBWT starting from the given Position, and continuing up to the given number of bases.

Calls enter_handle when the DFS enters a haplotype visit to a particular handle, and exit_handle when it exits a visit. These let the caller maintain a stack and track the traversals.

The starting node is only entered if its offset isn't equal to its length (i.e. bases remain to be visited).

Stopping early is not permitted.

◆ dfs_gbwt() [3/3]

void vg::MinimizerMapper::dfs_gbwt ( handle_t  from_handle,
size_t  from_offset,
size_t  walk_distance,
const function< void(const handle_t &)> &  enter_handle,
const function< void(void)>  exit_handle 
) const
protected

The same as dfs_gbwt on a Position, but takes a handle in the backing gbwt_graph and an offset from the start of the handle instead.

◆ distance_between() [1/2]

int64_t vg::MinimizerMapper::distance_between ( const Alignment aln1,
const Alignment aln2 
)
protected

Get the distance between a pair of read alignments, or std::numeric_limits<int64_t>::max() if unreachable.

◆ distance_between() [2/2]

int64_t vg::MinimizerMapper::distance_between ( const pos_t pos1,
const pos_t pos2 
)
protected

Get the distance between a pair of positions, or std::numeric_limits<int64_t>::max() if unreachable.

◆ distance_to_annotation()

double vg::MinimizerMapper::distance_to_annotation ( int64_t  distance) const
protected

Convert an integer distance, with limits standing for no distance, to a double annotation that can safely be parsed back from JSON into an integer if it is integral.

◆ do_alignment_on_chains()

void vg::MinimizerMapper::do_alignment_on_chains ( Alignment aln,
const std::vector< Seed > &  seeds,
const VectorView< MinimizerMapper::Minimizer > &  minimizers,
const vector< algorithms::Anchor > &  seed_anchors,
const std::vector< std::vector< size_t >> &  chains,
const std::vector< size_t > &  chain_source_tree,
const std::vector< double > &  multiplicity_by_chain,
const std::vector< int > &  chain_score_estimates,
const std::vector< std::vector< size_t >> &  minimizer_kept_chain_count,
vector< Alignment > &  alignments,
vector< double > &  multiplicity_by_alignment,
vector< size_t > &  alignments_to_source,
SmallBitset minimizer_explored,
aligner_stats_t stats,
bool &  funnel_depleted,
LazyRNG rng,
Funnel funnel 
) const
protected

◆ do_chaining_on_fragments()

void vg::MinimizerMapper::do_chaining_on_fragments ( Alignment aln,
const ZipCodeForest zip_code_forest,
const std::vector< Seed > &  seeds,
const VectorView< MinimizerMapper::Minimizer > &  minimizers,
const std::vector< std::vector< size_t >> &  fragments,
const std::vector< double > &  fragment_scores,
const std::vector< algorithms::Anchor > &  fragment_anchors,
const std::vector< size_t > &  fragment_source_tree,
const std::vector< std::vector< size_t >> &  minimizer_kept_fragment_count,
const std::vector< double > &  multiplicity_by_fragment,
std::vector< std::vector< size_t >> &  chains,
std::vector< size_t > &  chain_source_tree,
std::vector< int > &  chain_score_estimates,
std::vector< std::vector< size_t >> &  minimizer_kept_chain_count,
std::vector< double > &  multiplicity_by_chain,
vector< double > &  multiplicity_by_tree,
std::unordered_map< size_t, std::vector< size_t >> &  good_fragments_in,
LazyRNG rng,
Funnel funnel 
) const
protected

Given a collection of fragments, filter down to the good ones and do chaining on them

◆ do_fragmenting_on_trees()

void vg::MinimizerMapper::do_fragmenting_on_trees ( Alignment aln,
const ZipCodeForest zip_code_forest,
const std::vector< Seed > &  seeds,
const VectorView< MinimizerMapper::Minimizer > &  minimizers,
const vector< algorithms::Anchor > &  seed_anchors,
std::vector< std::vector< size_t >> &  fragments,
std::vector< double > &  fragment_scores,
std::vector< algorithms::Anchor > &  fragment_anchors,
std::vector< size_t > &  fragment_source_tree,
std::vector< std::vector< size_t >> &  minimizer_kept_fragment_count,
std::vector< double > &  multiplicity_by_fragment,
std::vector< Alignment > &  alignments,
SmallBitset minimizer_explored,
vector< double > &  multiplicity_by_alignment,
LazyRNG rng,
Funnel funnel 
) const
protected

Given a collection of zipcode trees, score the trees and do fragmenting on the best trees.

This will fill in the given vectors of fragments, fragment scores, etc.

If we do gapless extension, turn good full-length gapless extensions into alignments and return them in alignments Gapless extensions are considered good enough if they have fewer than default_max_extension_mismatches mismatches

◆ dump_chaining_problem()

void vg::MinimizerMapper::dump_chaining_problem ( const std::vector< algorithms::Anchor > &  anchors,
const std::vector< size_t > &  cluster_seeds_sorted,
const HandleGraph graph 
)
staticprotected

Dump a whole chaining problem.

◆ dump_debug_clustering()

void vg::MinimizerMapper::dump_debug_clustering ( const Cluster cluster,
size_t  cluster_number,
const VectorView< Minimizer > &  minimizers,
const std::vector< Seed > &  seeds 
)
staticprotected

Print the seed content of a cluster.

◆ dump_debug_dotplot()

void vg::MinimizerMapper::dump_debug_dotplot ( const std::string &  name,
const VectorView< Minimizer > &  minimizers,
const std::vector< Seed > &  seeds,
const std::vector< std::pair< std::string, std::vector< std::vector< size_t >>>> &  seed_sets,
const PathPositionHandleGraph path_graph 
)
staticprotected

Dump dotplot information for seeds. Displays one or more named collections of runs of seeds.

◆ dump_debug_extension_set()

void vg::MinimizerMapper::dump_debug_extension_set ( const HandleGraph graph,
const Alignment aln,
const vector< GaplessExtension > &  extended_seeds 
)
staticprotected

Dump all the extansions in an extension set.

◆ dump_debug_graph()

void vg::MinimizerMapper::dump_debug_graph ( const HandleGraph graph)
staticprotected

Dump a graph.

◆ dump_debug_minimizers()

void vg::MinimizerMapper::dump_debug_minimizers ( const VectorView< Minimizer > &  minimizers,
const string &  sequence,
const vector< size_t > *  to_include = nullptr,
size_t  start_offset = 0,
size_t  length_limit = std::numeric_limits<size_t>::max() 
)
staticprotected

Dump all the given minimizers, with optional subset restriction.

◆ dump_debug_query() [1/2]

void vg::MinimizerMapper::dump_debug_query ( const Alignment aln)
staticprotected

Print information about a read to be aligned.

◆ dump_debug_query() [2/2]

void vg::MinimizerMapper::dump_debug_query ( const Alignment aln1,
const Alignment aln2 
)
staticprotected

Print information about a read pair to be aligned.

◆ dump_debug_seeds()

void vg::MinimizerMapper::dump_debug_seeds ( const VectorView< Minimizer > &  minimizers,
const std::vector< Seed > &  seeds,
const std::vector< size_t > &  selected_seeds 
)
staticprotected

Print information about a selected set of seeds.

◆ dump_debug_sequence()

void vg::MinimizerMapper::dump_debug_sequence ( ostream &  out,
const string &  sequence,
size_t  start_offset = 0,
size_t  length_limit = std::numeric_limits<size_t>::max() 
)
staticprotected

Print a sequence with base numbering.

◆ extend_seed_group()

vector< GaplessExtension > vg::MinimizerMapper::extend_seed_group ( const std::vector< size_t > &  seed_group,
size_t  source_num,
const VectorView< Minimizer > &  minimizers,
const std::vector< Seed > &  seeds,
const string &  sequence,
size_t  max_mismatches,
vector< vector< size_t >> *  minimizer_kept_count = nullptr,
Funnel funnel = nullptr,
std::vector< std::vector< size_t >> *  seeds_used = nullptr 
) const
protected

Extends the seeds in a cluster or other grouping into a collection of GaplessExtension objects.

If funnel is set, the group is intended to come from the previous funnel stage and will be introduced in this one.

If seeds_used is not null, it should be an empty vector that gets filled with, for each gapless extension, the numbers of the seeds in seeds that are subsumed into the extension. They will be sorted by the stapled base (first base for forward strand, last base for reverse strand) in the read.

Note that multiple gapless extensions might cover each seed position or use each seed.

◆ extension_to_alignment()

void vg::MinimizerMapper::extension_to_alignment ( const GaplessExtension extension,
Alignment alignment 
) const
protected

Convert the GaplessExtension into an alignment. This assumes that the extension is a full-length alignment and that the sequence field of the alignment has been set.

◆ faster_cap()

double vg::MinimizerMapper::faster_cap ( const VectorView< Minimizer > &  minimizers,
vector< size_t > &  minimizers_explored,
const string &  sequence,
const string &  quality_bytes 
)
staticprotected

Compute a bound on the Phred score probability of a mapping beign wrong due to base errors and unlocated minimizer hits prevented us from finding the true alignment.

Algorithm uses a "sweep line" dynamic programming approach. For a read with minimizers aligned to it:

         000000000011111111112222222222
         012345678901234567890123456789

Read: ****************************** Minimizer 1: ***** Minimizer 2: ***** Minimizer 3: ***** Minimizer 4: *****

For each distinct read interval of overlapping minimizers, e.g. in the example the intervals 3,4,5; 6,7; 8,9,10; 18,19,20; 21,22; and 23,24,25 we consider base errors that would result in the minimizers in the interval being incorrect

We use dynamic programming sweeping left-to-right over the intervals to compute the probability of the minimum number of base errors needed to disrupt all the minimizers.

Will sort minimizers_explored (which is indices into minimizers) by minimizer start position.

◆ finalize_fragment_length_distr()

void vg::MinimizerMapper::finalize_fragment_length_distr ( )
inline

◆ find_chain_alignment()

Alignment vg::MinimizerMapper::find_chain_alignment ( const Alignment aln,
const VectorView< algorithms::Anchor > &  to_chain,
const std::vector< size_t > &  chain,
aligner_stats_t stats = nullptr 
) const
protected

Turn a chain into an Alignment.

Operating on the given input alignment, align the tails and intervening sequences along the given chain of perfect-match seeds, and return an optimal Alignment.

If given base processing stats for bases and for time, adds aligned bases and consumed time to them.

◆ find_minimizers()

std::vector< MinimizerMapper::Minimizer > vg::MinimizerMapper::find_minimizers ( const std::string &  sequence,
Funnel funnel 
) const
protected

Find the minimizers in the sequence using the minimizer index, and return them sorted in read order.

◆ find_optimal_tail_alignments()

void vg::MinimizerMapper::find_optimal_tail_alignments ( const Alignment aln,
const vector< GaplessExtension > &  extended_seeds,
LazyRNG rng,
Alignment best,
Alignment second_best 
) const
protected

Operating on the given input alignment, align the tails dangling off the given extended perfect-match seeds and produce an optimal alignment into the given output Alignment object, best, and the second best alignment into second_best.

Uses the given RNG to break ties.

◆ find_seeds()

std::vector< MinimizerMapper::Seed > vg::MinimizerMapper::find_seeds ( const std::vector< Minimizer > &  minimizers_in_read_order,
const VectorView< Minimizer > &  minimizers,
const Alignment aln,
Funnel funnel 
) const
protected

Find seeds for all minimizers passing the filters. Takes in minimizers sorted in read order, and a view of them sorted in score order.

◆ fix_dozeu_end_deletions()

void vg::MinimizerMapper::fix_dozeu_end_deletions ( Alignment rescued_alignment) const
protected

When dozeu doesn't have any seeds, it's scan heuristic can lead to inaccurate anchoring with the end result that one end of the alignment has a deletion that doesn't connect to an aligned base. This function removes those deletions

◆ fix_dozeu_score()

void vg::MinimizerMapper::fix_dozeu_score ( Alignment rescued_alignment,
const HandleGraph rescue_graph,
const std::vector< handle_t > &  topological_order 
) const
protected

When we use dozeu for rescue, the reported alignment score is incorrect. 1) Dozeu only gives the full-length bonus once. 2) There is no penalty for a softclip at the edge of the subgraph. This function calculates the score correctly. If the score is <= 0, we realign the read using GSSW. TODO: This should be unnecessary.

◆ flag_repetitive_minimizers()

void vg::MinimizerMapper::flag_repetitive_minimizers ( std::vector< Minimizer > &  minimizers_in_read_order) const
protected

Flag minimizers as being in repetitive regions of the read

◆ for_each_agglomeration_interval()

void vg::MinimizerMapper::for_each_agglomeration_interval ( const VectorView< Minimizer > &  minimizers,
const string &  sequence,
const string &  quality_bytes,
const vector< size_t > &  minimizer_indices,
const function< void(size_t, size_t, size_t, size_t)> &  iteratee 
)
staticprotected

Given a collection of minimizers, and a list of the minimizers we actually care about (as indices into the collection), iterate over common intervals of overlapping minimizer agglomerations.

Calls the given callback with (left, right, bottom, top), where left is the first base of the agglomeration interval (inclusive), right is the last base of the agglomeration interval (exclusive), bottom is the index of the first minimizer with an agglomeration in the interval and top is the index of the last minimizer with an agglomeration in the interval (exclusive).

minimizer_indices must be sorted by agglomeration end, and then by agglomeration start, so they can be decomposed into nice rectangles.

Note that bottom and top are offsets into minimizer_indices, NOT minimizers itself. Only contiguous ranges in minimizer_indices actually make sense.

◆ force_fragment_length_distr()

void vg::MinimizerMapper::force_fragment_length_distr ( double  mean,
double  stdev 
)
inline

◆ fragment_distr_is_finalized()

bool vg::MinimizerMapper::fragment_distr_is_finalized ( )
inline

◆ get_best_alignment_against_any_tree()

pair< Path, size_t > vg::MinimizerMapper::get_best_alignment_against_any_tree ( const vector< TreeSubgraph > &  trees,
const string &  sequence,
const Position default_position,
bool  pin_left,
size_t  longest_detectable_gap,
LazyRNG rng 
) const
protected

Find the best alignment of the given sequence against any of the trees provided in trees, where each tree is a TreeSubgraph over the GBWT graph. Each tree subgraph is rooted at the left in its own local coordinate space, even if we are pinning on the right.

If no mapping is possible (for example, because there are no trees), produce a pure insert at default_position.

Alignment is always pinned.

If pin_left is true, pin the alignment on the left to the root of each tree. Otherwise pin it on the right to the root of each tree.

Limits the length of the longest gap to longest_detectable_gap.

Returns alignments in gbwt_graph space.

◆ get_best_chain_stats()

void vg::MinimizerMapper::get_best_chain_stats ( Alignment aln,
const ZipCodeForest zip_code_forest,
const std::vector< Seed > &  seeds,
const VectorView< MinimizerMapper::Minimizer > &  minimizers,
const std::vector< std::vector< size_t >> &  fragments,
const std::unordered_map< size_t, std::vector< size_t >> &  good_fragments_in,
const std::vector< std::vector< size_t >> &  chains,
const std::vector< size_t > &  chain_source_tree,
const vector< algorithms::Anchor > &  seed_anchors,
const std::vector< int > &  chain_score_estimates,
bool &  best_chain_correct,
double &  best_chain_coverage,
size_t &  best_chain_longest_jump,
double &  best_chain_average_jump,
size_t &  best_chain_anchors,
size_t &  best_chain_anchor_length,
Funnel funnel 
) const
protected

Collect stats about the best chains for annotating the final alignment

◆ get_distance_limit()

size_t vg::MinimizerMapper::get_distance_limit ( size_t  read_length) const
inline

Get the distance limit for the given read length

◆ get_fragment_length_mean()

double vg::MinimizerMapper::get_fragment_length_mean ( ) const
inline

◆ get_fragment_length_sample_size()

size_t vg::MinimizerMapper::get_fragment_length_sample_size ( ) const
inline

◆ get_fragment_length_stdev()

double vg::MinimizerMapper::get_fragment_length_stdev ( ) const
inline

◆ get_log10_prob_of_disruption_in_interval()

double vg::MinimizerMapper::get_log10_prob_of_disruption_in_interval ( const VectorView< Minimizer > &  minimizers,
const string &  sequence,
const string &  quality_bytes,
const vector< size_t >::iterator &  disrupt_begin,
const vector< size_t >::iterator &  disrupt_end,
size_t  left,
size_t  right 
)
staticprotected

Gives the log10 prob of a base error in the given interval of the read, accounting for the disruption of specified minimizers.

minimizers is the collection of all minimizers

disrupt_begin and disrupt_end are iterators defining a sequence of indices of minimizers in minimizers that are disrupted.

left and right are the inclusive and exclusive bounds of the interval of the read where the disruption occurs.

◆ get_prob_of_disruption_in_column()

double vg::MinimizerMapper::get_prob_of_disruption_in_column ( const VectorView< Minimizer > &  minimizers,
const string &  sequence,
const string &  quality_bytes,
const vector< size_t >::iterator &  disrupt_begin,
const vector< size_t >::iterator &  disrupt_end,
size_t  index 
)
staticprotected

Gives the raw probability of a base error in the given column of the read, accounting for the disruption of specified minimizers.

minimizers is the collection of all minimizers

disrupt_begin and disrupt_end are iterators defining a sequence of indices of minimizers in minimizers that are disrupted.

index is the position in the read where the disruption occurs.

◆ get_read_coverage()

double vg::MinimizerMapper::get_read_coverage ( const Alignment aln,
const VectorView< std::vector< size_t >> &  seed_sets,
const std::vector< Seed > &  seeds,
const VectorView< Minimizer > &  minimizers 
) const
protected

Get the fraction of read bases covered by the given chains/fragments of seeds. A base is covered if it is between the first and last endpoints in the read of any of the given lists of seeds. The lists of seeds are each assumed to be colinear in the read.

◆ get_tail_forest()

vector< TreeSubgraph > vg::MinimizerMapper::get_tail_forest ( const GaplessExtension extended_seed,
size_t  read_length,
bool  left_tails,
size_t *  longest_detectable_gap = nullptr 
) const
protected

Get all the trees defining tails off the specified side of the specified gapless extension. Should only be called if a tail on that side exists, or this is a waste of time.

If the gapless extension starts or ends at a node boundary, there may be multiple trees produced, each with a distinct root.

If the gapless extension abuts the edge of the read, an empty forest will be produced.

Each tree is represented as a TreeSubgraph over our gbwt_graph.

If left_tails is true, the trees read out of the left sides of the gapless extension. Otherwise they read out of the right side.

As a side effect, saves the length of the longest detectable gap in an alignment of a tail to the forest into the provided location, if set.

◆ immutable_path_from_length()

size_t vg::MinimizerMapper::immutable_path_from_length ( const ImmutablePath path)
staticprotected

Get the from length of an ImmutabelPath.

Can't be called path_from_length or it will shadow the one for Paths instead of overloading.

◆ log_alignment() [1/2]

string vg::MinimizerMapper::log_alignment ( const Alignment aln)
staticprotected

Turn an Alignment into a conveniently-sized string for logging.

◆ log_alignment() [2/2]

string vg::MinimizerMapper::log_alignment ( const Path path,
bool  force_condensed = false 
)
staticprotected

Turn an Path from an alignment into a conveniently-sized string for logging.

◆ log_bits()

string vg::MinimizerMapper::log_bits ( const std::vector< bool > &  bits)
staticprotected

Turn a list of bit flags into a compact representation.

◆ log_name()

string vg::MinimizerMapper::log_name ( )
staticprotected

Get the thread identifier prefix for logging.

◆ longest_detectable_gap_in_range()

size_t vg::MinimizerMapper::longest_detectable_gap_in_range ( const Alignment aln,
const std::string::const_iterator &  sequence_begin,
const std::string::const_iterator &  sequence_end,
const GSSWAligner aligner 
)
staticprotected

Determine the gap limit to use when aligning the given range of sequence bases for the given Alignment.

Accounts for the lognest gap that could be detected anywhere in the range, not just at the very beginning or the very end, or at a single point like GSSWAligner::longest_detectable_gap().

◆ map() [1/2]

vector< Alignment > vg::MinimizerMapper::map ( Alignment aln)

Map the given read. Return a vector of alignments that it maps to, winner first.

◆ map() [2/2]

void vg::MinimizerMapper::map ( Alignment aln,
AlignmentEmitter alignment_emitter 
)

Map the given read, and send output to the given AlignmentEmitter. May be run from any thread. TODO: Can't be const because the clusterer's cluster_seeds isn't const.

◆ map_from_chains()

vector< Alignment > vg::MinimizerMapper::map_from_chains ( Alignment aln)

Map the given read using chaining of seeds. Return a vector of alignments that it maps to, winner first.

◆ map_from_extensions()

vector< Alignment > vg::MinimizerMapper::map_from_extensions ( Alignment aln)

Map the given read using gapless extensions. Return a vector of alignments that it maps to, winner first.

◆ map_paired() [1/2]

pair< vector< Alignment >, vector< Alignment > > vg::MinimizerMapper::map_paired ( Alignment aln1,
Alignment aln2 
)

Map the given pair of reads, where aln1 is upstream of aln2 and they are oriented towards each other in the graph.

If the fragment length distribution is not yet fixed, reads will be mapped independently. Otherwise, they will be mapped according to the fragment length distribution.

◆ map_paired() [2/2]

pair< vector< Alignment >, vector< Alignment > > vg::MinimizerMapper::map_paired ( Alignment aln1,
Alignment aln2,
vector< pair< Alignment, Alignment >> &  ambiguous_pair_buffer 
)

Map the given pair of reads, where aln1 is upstream of aln2 and they are oriented towards each other in the graph.

If the reads are ambiguous and there's no fragment length distribution fixed yet, they will be dropped into ambiguous_pair_buffer.

Otherwise, at least one result will be returned for them (although it may be the unmapped alignment).

◆ no_chain_info()

static gbwtgraph::Payload vg::MinimizerMapper::no_chain_info ( )
inlinestaticprotected

How should we initialize chain info when it's not stored in the minimizer index?

◆ pair_all()

void vg::MinimizerMapper::pair_all ( std::array< vector< Alignment >, 2 > &  mappings) const
protected

Set pair partner references for paired mapping results.

◆ pick_mappings_from_alignments()

void vg::MinimizerMapper::pick_mappings_from_alignments ( Alignment aln,
const std::vector< Alignment > &  alignments,
const std::vector< double > &  multiplicity_by_alignment,
const std::vector< size_t > &  alignments_to_source,
const std::vector< int > &  chain_score_estimates,
std::vector< Alignment > &  mappings,
std::vector< double > &  scores,
std::vector< double > &  multiplicity_by_mapping,
bool &  funnel_depleted,
LazyRNG rng,
Funnel funnel 
) const
protected

◆ process_until_threshold_a()

template<typename Score >
void vg::MinimizerMapper::process_until_threshold_a ( size_t  items,
const function< Score(size_t)> &  get_score,
double  threshold,
size_t  min_count,
size_t  max_count,
LazyRNG rng,
const function< bool(size_t, size_t)> &  process_item,
const function< void(size_t)> &  discard_item_by_count,
const function< void(size_t)> &  discard_item_by_score 
) const
protected

Given a count of items, a function to get the score of each, a score-difference-from-the-best cutoff, a min and max processed item count, and a function to get a sort-shuffling seed for breaking ties, process items in descending score order by calling process_item with the item's number and the number of other items with the same or better score, until min_count items are processed and either max_count items are processed or the score difference threshold is hit (or we run out of items).

If process_item returns false, the item is skipped and does not count against min_count or max_count.

Call discard_item_by_count with the item's number for all remaining items that would pass the score threshold.

Call discard_item_by_score with the item's number for all remaining items that would fail the score threshold.

◆ process_until_threshold_b()

template<typename Score >
void vg::MinimizerMapper::process_until_threshold_b ( const vector< Score > &  scores,
double  threshold,
size_t  min_count,
size_t  max_count,
LazyRNG rng,
const function< bool(size_t, size_t)> &  process_item,
const function< void(size_t)> &  discard_item_by_count,
const function< void(size_t)> &  discard_item_by_score 
) const
protected

Same as the other process_until_threshold functions, except using a vector to supply scores.

◆ process_until_threshold_c()

template<typename Score >
void vg::MinimizerMapper::process_until_threshold_c ( size_t  items,
const function< Score(size_t)> &  get_score,
const function< bool(size_t, size_t)> &  comparator,
double  threshold,
size_t  min_count,
size_t  max_count,
LazyRNG get_seed,
const function< bool(size_t, size_t)> &  process_item,
const function< void(size_t)> &  discard_item_by_count,
const function< void(size_t)> &  discard_item_by_score 
) const
protected

Same as the other process_until_threshold functions, except user supplies comparator to sort the items (must still be sorted by score).

◆ score_alignment_pair()

double vg::MinimizerMapper::score_alignment_pair ( Alignment aln1,
Alignment aln2,
int64_t  fragment_distance 
)
protected

Score a pair of alignments given the distance between them

◆ score_cluster()

void vg::MinimizerMapper::score_cluster ( Cluster cluster,
size_t  i,
const VectorView< Minimizer > &  minimizers,
const std::vector< Seed > &  seeds,
size_t  seq_length,
Funnel funnel 
) const
protected

Determine cluster score, read coverage, and a vector of flags for the minimizers present in the cluster. Score is the sum of the scores of distinct minimizers in the cluster, while read coverage is the fraction of the read covered by seeds in the cluster.

Puts the cluster in the funnel as coming from its seeds.

◆ score_extension_group()

int vg::MinimizerMapper::score_extension_group ( const Alignment aln,
const vector< GaplessExtension > &  extended_seeds,
int  gap_open_penalty,
int  gap_extend_penalty 
)
staticprotected

Score the given group of gapless extensions. Determines the best score that can be obtained by chaining extensions together, using the given gap open and gap extend penalties to charge for either overlaps or gaps in coverage of the read.

Enforces that overlaps cannot result in containment.

Input extended seeds must be sorted by start position.

◆ score_extensions() [1/2]

std::vector< int > vg::MinimizerMapper::score_extensions ( const std::vector< std::pair< std::vector< GaplessExtension >, size_t >> &  extensions,
const Alignment aln,
Funnel funnel 
) const
protected

Score the set of extensions for each cluster using score_extension_group(). Return the scores in the same order as the extensions.

This version allows the collections of extensions to be scored to come with annotating read numbers, which are ignored.

◆ score_extensions() [2/2]

std::vector< int > vg::MinimizerMapper::score_extensions ( const std::vector< std::vector< GaplessExtension >> &  extensions,
const Alignment aln,
Funnel funnel 
) const
protected

Score the set of extensions for each cluster using score_extension_group(). Return the scores in the same order as the extension groups.

◆ score_tree()

std::pair< double, double > vg::MinimizerMapper::score_tree ( const ZipCodeForest zip_code_forest,
size_t  i,
const VectorView< Minimizer > &  minimizers,
const std::vector< Seed > &  seeds,
size_t  seq_length,
Funnel funnel 
) const
protected

Determine score and read coverage for a zip code tree. Score is the sum of the scores of distinct minimizers in the tree, while read coverage is the fraction of the read covered by seeds in the tree.

Puts the tree in the funnel as coming from its seeds.

◆ seeds_in_subgraph()

GaplessExtender::cluster_type vg::MinimizerMapper::seeds_in_subgraph ( const VectorView< Minimizer > &  minimizers,
const std::unordered_set< nid_t > &  subgraph 
) const
protected

Return the all non-redundant seeds in the subgraph, including those from minimizers not used for mapping.

◆ set_alignment_scores() [1/4]

void vg::AlignerClient::set_alignment_scores

Set the algner scoring parameters and create the stored aligner instances. The score matrix should by a 4 x 4 array in the order (ACGT). Other overloads of set_alignment_scores all call this one.

◆ set_alignment_scores() [2/4]

void vg::MinimizerMapper::set_alignment_scores ( const int8_t *  score_matrix,
int8_t  gap_open,
int8_t  gap_extend,
int8_t  full_length_bonus 
)
virtual

Set the algner scoring parameters and create the stored aligner instances. The score matrix should by a 4 x 4 array in the order (ACGT). Other overloads of set_alignment_scores all call this one.

Reimplemented from vg::AlignerClient.

◆ set_alignment_scores() [3/4]

void vg::AlignerClient::set_alignment_scores

Set all the aligner scoring parameters and create the stored aligner instances.

◆ set_alignment_scores() [4/4]

void vg::AlignerClient::set_alignment_scores

Set the algner scoring parameters and create the stored aligner instances. The stream should contain a 4 x 4 whitespace-separated substitution matrix (in the order ACGT)

◆ sort_minimizers_by_score()

std::vector< size_t > vg::MinimizerMapper::sort_minimizers_by_score ( const std::vector< Minimizer > &  minimizers_in_read_order,
LazyRNG rng 
) const
protected

Return the indices of all the minimizers, sorted in descending order by their minimizers' scores.

◆ tag_seeds()

void vg::MinimizerMapper::tag_seeds ( const Alignment aln,
const std::vector< Seed >::const_iterator &  begin,
const std::vector< Seed >::const_iterator &  end,
const VectorView< Minimizer > &  minimizers,
size_t  funnel_offset,
Funnel funnel 
) const
protected

If tracking correctness, mark seeds that are correctly mapped as correct in the funnel, based on proximity along paths to the input read's refpos. Otherwise, tag just as placed, with the seed's read interval. Assumes we are tracking provenance.

◆ to_anchor() [1/2]

algorithms::Anchor vg::MinimizerMapper::to_anchor ( const Alignment aln,
const VectorView< Minimizer > &  minimizers,
std::vector< Seed > &  seeds,
size_t  seed_number,
const HandleGraph graph,
const Aligner aligner 
)
staticprotected

Convert a single seed to a single chaining anchor.

◆ to_anchor() [2/2]

algorithms::Anchor vg::MinimizerMapper::to_anchor ( const Alignment aln,
size_t  read_start,
size_t  read_end,
const std::vector< size_t > &  sorted_seeds,
const std::vector< algorithms::Anchor > &  seed_anchors,
const std::vector< size_t >::const_iterator &  mismatch_begin,
const std::vector< size_t >::const_iterator &  mismatch_end,
const HandleGraph graph,
const Aligner aligner 
)
staticprotected

Convert a read region, and the seeds that that region covers the stapled bases of (sorted by stapled base), into a single chaining anchor. Takes an iterator range of positions within the base range that are mismatches.

◆ to_anchors()

std::vector< algorithms::Anchor > vg::MinimizerMapper::to_anchors ( const Alignment aln,
const VectorView< Minimizer > &  minimizers,
std::vector< Seed > &  seeds 
) const
protected

Convert a collection of seeds to a collection of chaining anchors.

◆ to_path()

Path vg::MinimizerMapper::to_path ( const ImmutablePath path)
staticprotected

Convert an ImmutablePath to a Path.

◆ to_wfa_alignment()

WFAAlignment vg::MinimizerMapper::to_wfa_alignment ( const algorithms::Anchor anchor,
const Alignment aln,
const Aligner aligner 
) const
protected

Convert an Anchor to a WFAAlignment, given the input read it is from and the Aligner to use for scoring. Accounts for fuill length bonuses if the anchor abuts the end of the read.

◆ unoriented_distance_between()

int64_t vg::MinimizerMapper::unoriented_distance_between ( const pos_t pos1,
const pos_t pos2 
) const
protected

Get the unoriented distance between a pair of positions

◆ validate_clusters()

bool vg::MinimizerMapper::validate_clusters ( const std::vector< std::vector< Cluster >> &  clusters,
const std::vector< std::vector< Seed >> &  seeds,
size_t  read_limit,
size_t  fragment_limit 
) const
protected

Do a brute check of the clusters. Print errors to stderr.

◆ wfa_alignment_to_alignment()

void vg::MinimizerMapper::wfa_alignment_to_alignment ( const WFAAlignment wfa_alignment,
Alignment alignment 
) const
protected

Convert a WFAAlignment into a vg Alignment. This assumes that the WFAAlignment is a full-length alignment and that the sequence field of the vg Alignment has been set.

◆ window_breaking_quality()

static double vg::MinimizerMapper::window_breaking_quality ( const VectorView< Minimizer > &  minimizers,
vector< size_t > &  broken,
const string &  sequence,
const string &  quality_bytes 
)
staticprotected

Compute a bound on the Phred score probability of having created the agglomerations of the specified minimizers by base errors from the given sequence, which was sequenced with the given qualities.

No limit is imposed if broken is empty.

Takes the collection of all minimizers found, and a vector of the indices of minimizers we are interested in the agglomerations of. May modify the order of that index vector.

Also takes the sequence of the read (to avoid Ns) and the quality string (interpreted as a byte array).

Currently computes a lower-score-bound, upper-probability-bound, suitable for use as a mapping quality cap, by assuming the easiest-to-disrupt possible layout of the windows, and the lowest possible qualities for the disrupting bases.

◆ with_dagified_local_graph()

void vg::MinimizerMapper::with_dagified_local_graph ( const pos_t left_anchor,
const pos_t right_anchor,
size_t  max_path_length,
const HandleGraph graph,
const std::function< void(DeletableHandleGraph &, const std::function< std::pair< nid_t, bool >(const handle_t &)> &)> &  callback 
)
staticprotected

Clip out the part of the graph between the given positions, and dagify it from the perspective of the anchors. If a left anchor is set, all heads should correspond to the left anchor, and if a right anchor is set, all tails should correspond to the right anchor. At least one anchor must be set. Both anchors may be on the same node.

Calls the callback with an extracted, strand-split, dagified graph, and a function that translates from handle in the dagified graph to node ID and orientation in the base graph.

Friends And Related Function Documentation

◆ TestMinimizerMapper

friend class TestMinimizerMapper
friend

Member Data Documentation

◆ align_from_chains

bool vg::MinimizerMapper::align_from_chains = default_align_from_chains

◆ chain_score_threshold

double vg::MinimizerMapper::chain_score_threshold = default_chain_score_threshold

◆ choose_band_padding

std::function<size_t(const Alignment&, const HandleGraph&)> vg::MinimizerMapper::choose_band_padding
protected

We have a function for determinign band paddding for banded alignment when aligning from chains.

◆ cluster_coverage_threshold

double vg::MinimizerMapper::cluster_coverage_threshold = default_cluster_coverage_threshold

◆ cluster_score_threshold

double vg::MinimizerMapper::cluster_score_threshold = default_cluster_score_threshold

◆ clusterer

SnarlDistanceIndexClusterer vg::MinimizerMapper::clusterer
protected

We have a clusterer.

◆ default_align_from_chains

constexpr bool vg::MinimizerMapper::default_align_from_chains = false
staticconstexpr

If true, produce alignments from extension sets by chaining gapless extensions up and aligning the sequences between them. If false, produce alignments by aligning the tails off of individual gapless extensions.

◆ default_chain_score_threshold

constexpr double vg::MinimizerMapper::default_chain_score_threshold = 100
staticconstexpr

If a chain's score is smaller than the best chain's score by more than this much, don't align it

◆ default_cluster_coverage_threshold

constexpr double vg::MinimizerMapper::default_cluster_coverage_threshold = 0.3
staticconstexpr

If the read coverage of a cluster is less than the best coverage of any tree by more than this much, don't extend it

◆ default_cluster_score_threshold

constexpr double vg::MinimizerMapper::default_cluster_score_threshold = 50
staticconstexpr

this much, then don't extend it

◆ default_distance_limit

constexpr size_t vg::MinimizerMapper::default_distance_limit = 200
staticconstexpr

◆ default_do_dp

constexpr bool vg::MinimizerMapper::default_do_dp = true
staticconstexpr

If false, skip computing base-level alignments.

◆ default_exclude_overlapping_min

constexpr bool vg::MinimizerMapper::default_exclude_overlapping_min = false
staticconstexpr

If set, exclude overlapping minimizers.

◆ default_extension_score_threshold

constexpr int vg::MinimizerMapper::default_extension_score_threshold = 1
staticconstexpr

◆ default_extension_set_min_score

constexpr int vg::MinimizerMapper::default_extension_set_min_score = 20
staticconstexpr

Even if we would have fewer than min_extension_sets results, don't process anything with a score smaller than this.

◆ default_extension_set_score_threshold

constexpr double vg::MinimizerMapper::default_extension_set_score_threshold = 20
staticconstexpr

◆ default_fragment_gap_scale

constexpr double vg::MinimizerMapper::default_fragment_gap_scale = 1.0
staticconstexpr

How much of a multiple should we apply to each transition's gap penalty at fragmenting?

◆ default_fragment_max_indel_bases

constexpr size_t vg::MinimizerMapper::default_fragment_max_indel_bases = 2000
staticconstexpr

How many bases of indel should we allow in fragments?

◆ default_fragment_max_indel_bases_per_base

constexpr double vg::MinimizerMapper::default_fragment_max_indel_bases_per_base = 0.2
staticconstexpr

How many bases of indel should we allow in fragments per base of read length?

◆ default_fragment_max_lookback_bases

constexpr size_t vg::MinimizerMapper::default_fragment_max_lookback_bases = 300
staticconstexpr

How many bases should we look back when making fragments?

◆ default_fragment_max_lookback_bases_per_base

constexpr double vg::MinimizerMapper::default_fragment_max_lookback_bases_per_base = 0.03
staticconstexpr

How many bases should we look back when making fragments, per base of read length?

◆ default_fragment_max_min_score

constexpr double vg::MinimizerMapper::default_fragment_max_min_score = std::numeric_limits<double>::max()
staticconstexpr

How high should we get the score threshold based on the best fragment's score get?

◆ default_fragment_min_score

constexpr double vg::MinimizerMapper::default_fragment_min_score = 60
staticconstexpr

What minimum score in points should a fragment have in order to keep it? Needs to be set to some kind of significance threshold.

◆ default_fragment_points_per_possible_match

constexpr double vg::MinimizerMapper::default_fragment_points_per_possible_match = 0
staticconstexpr

◆ default_fragment_score_fraction

constexpr double vg::MinimizerMapper::default_fragment_score_fraction = 0.1
staticconstexpr

How good should a fragment be in order to keep it? Fragments with scores less than this fraction of the best fragment's score will not be used.

◆ default_fragment_set_score_threshold

constexpr double vg::MinimizerMapper::default_fragment_set_score_threshold = 0
staticconstexpr

If a fragment set's score is smaller than the best fragment set's score by more than this much, don't align it

◆ default_gap_scale

constexpr double vg::MinimizerMapper::default_gap_scale = 1.0
staticconstexpr

How much of a multiple should we apply to each transition's gap penalty at chaining?

◆ default_gapless_extension_limit

constexpr size_t vg::MinimizerMapper::default_gapless_extension_limit = 0
staticconstexpr

Do gapless extension to the seeds in each tree before fragmenting the tree if the read length is less than the limit.

◆ default_hard_hit_cap

constexpr size_t vg::MinimizerMapper::default_hard_hit_cap = 500
staticconstexpr

Ignore all minimizers with more than hard_hit_cap hits.

◆ default_hit_cap

constexpr size_t vg::MinimizerMapper::default_hit_cap = 10
staticconstexpr

Use all minimizers with at most hit_cap hits.

◆ default_item_bonus

constexpr int vg::MinimizerMapper::default_item_bonus = 0
staticconstexpr

How much of a bonus should we give to each item in fragmenting/chaining?

◆ default_item_scale

constexpr double vg::MinimizerMapper::default_item_scale = 1.0
staticconstexpr

How much of a multiple should we apply to each item's non-bonus score in fragmenting/chaining?

◆ default_mapq_score_scale

constexpr double vg::MinimizerMapper::default_mapq_score_scale = 1.0
staticconstexpr

How should we scale scores before mapq, for calibration.

◆ default_mapq_score_window

constexpr size_t vg::MinimizerMapper::default_mapq_score_window = 0
staticconstexpr

What number of bp should we re-scale scores to for MAPQ, for calibration? 0 for off.

◆ default_max_alignments

constexpr size_t vg::MinimizerMapper::default_max_alignments = 8
staticconstexpr

How many alignments should we make, max?

◆ default_max_chain_connection

constexpr size_t vg::MinimizerMapper::default_max_chain_connection = 100
staticconstexpr

When converting chains to alignments, what's the longest gap between items we will try to WFA align? Passing strings longer than ~100bp can cause WFAAligner to run for a pathologically long amount of time. May not be 0.

◆ default_max_chaining_problems

constexpr int vg::MinimizerMapper::default_max_chaining_problems = std::numeric_limits<int>::max()
staticconstexpr

Do no more than this many chaining problems.

◆ default_max_chains_per_tree

constexpr size_t vg::MinimizerMapper::default_max_chains_per_tree = 1
staticconstexpr

Allow up to this many chains per tree.

◆ default_max_direct_to_chain

constexpr size_t vg::MinimizerMapper::default_max_direct_to_chain = 0
staticconstexpr

Sometimes we don't do chaining but instead turn fragments directly into chains If this is 0, then do chaining. Otherwise take up to this many fragments and turn them into chains

◆ default_max_dozeu_cells

constexpr size_t vg::MinimizerMapper::default_max_dozeu_cells = (size_t)(1.5 * 1024 * 1024)
staticconstexpr

How big of an alignment in POA cells should we ever try to do with Dozeu? TODO: Lift this when Dozeu's allocator is able to work with >4 MB of memory. Each cell is 16 bits in Dozeu, and we leave some room for the query and padding to full SSE registers. Note that a very chopped graph might still break this!

◆ default_max_dp_cells

constexpr size_t vg::MinimizerMapper::default_max_dp_cells = std::numeric_limits<size_t>::max()
staticconstexpr

How many DP cells should we be willing to do for an end-pinned alignment? If we want to do more than this, just leave tail unaligned.

◆ default_max_extension_mismatches

constexpr size_t vg::MinimizerMapper::default_max_extension_mismatches = GaplessExtender::MAX_MISMATCHES
staticconstexpr

How many mismatches should we allow in gapless extension (except for start node where the limit doesn't count)?

◆ default_max_extensions

constexpr size_t vg::MinimizerMapper::default_max_extensions = 800
staticconstexpr

How many clusters should we produce gapless extensions for, max?

◆ default_max_fragment_length

constexpr size_t vg::MinimizerMapper::default_max_fragment_length = 2000
staticconstexpr

What is the maximum fragment length that we accept as valid for paired-end reads?

◆ default_max_fragments

constexpr size_t vg::MinimizerMapper::default_max_fragments = std::numeric_limits<size_t>::max()
staticconstexpr

How many fragments should we try and make when fragmenting something?

◆ default_max_indel_bases

constexpr size_t vg::MinimizerMapper::default_max_indel_bases = 2000
staticconstexpr

How many bases of indel should we allow in chaining?

◆ default_max_indel_bases_per_base

constexpr double vg::MinimizerMapper::default_max_indel_bases_per_base = 0.2
staticconstexpr

How many bases of indel should we allow in chaining, per base of read length?

◆ default_max_local_extensions

constexpr size_t vg::MinimizerMapper::default_max_local_extensions = numeric_limits<size_t>::max()
staticconstexpr

How many extensions should we try as seeds within a mapping location?

◆ default_max_lookback_bases

constexpr size_t vg::MinimizerMapper::default_max_lookback_bases = 3000
staticconstexpr

How many bases should we look back when chaining?

◆ default_max_lookback_bases_per_base

constexpr double vg::MinimizerMapper::default_max_lookback_bases_per_base = 0.3
staticconstexpr

How many bases should we look back when chaining, per base of read length?

◆ default_max_middle_dp_length

constexpr size_t vg::MinimizerMapper::default_max_middle_dp_length = std::numeric_limits<int32_t>::max()
staticconstexpr

How long of a DP can we do before something might go wrong with BandedGlobalAligner or the GBWT-based WFA?

◆ default_max_middle_gap

constexpr size_t vg::MinimizerMapper::default_max_middle_gap = std::numeric_limits<size_t>::max()
staticconstexpr

How many gap bases should we allow in a between-seed alignment, max?

◆ default_max_min_chain_score

constexpr int vg::MinimizerMapper::default_max_min_chain_score = 200
staticconstexpr

Limit the min chain score to no more than this.

◆ default_max_multimaps

constexpr size_t vg::MinimizerMapper::default_max_multimaps = 1
staticconstexpr

◆ default_max_rescue_attempts

constexpr size_t vg::MinimizerMapper::default_max_rescue_attempts = 15
staticconstexpr

For paired end mapping, how many times should we attempt rescue (per read)?

◆ default_max_skipped_bases

constexpr size_t vg::MinimizerMapper::default_max_skipped_bases = 0
staticconstexpr

When turning chains into alignments, we can skip seeds to create gaps up to this length in the graph

◆ default_max_tail_dp_length

constexpr size_t vg::MinimizerMapper::default_max_tail_dp_length = 30000
staticconstexpr

How long of a DP can we do before Dozeu gets lost at traceback due to 16-bit score overflow?

◆ default_max_tail_gap

constexpr size_t vg::MinimizerMapper::default_max_tail_gap = std::numeric_limits<size_t>::max()
staticconstexpr

How many gap bases should we allow in a Dozeu tail alignment, max?

◆ default_max_tail_length

constexpr size_t vg::MinimizerMapper::default_max_tail_length = 100
staticconstexpr

Similarly, what is the maximum tail length we will try to WFA align?

◆ default_max_to_fragment

constexpr size_t vg::MinimizerMapper::default_max_to_fragment = 10
staticconstexpr

How many things should we produce fragments for, max?

◆ default_max_unique_min

constexpr size_t vg::MinimizerMapper::default_max_unique_min = 500
staticconstexpr

Maximum number of distinct minimizers to take.

◆ default_min_chain_score_per_base

constexpr double vg::MinimizerMapper::default_min_chain_score_per_base = 0.01
staticconstexpr

Even if we would have fewer than min_chains results, don't process anything with a score smaller than this, per read base.

◆ default_min_chaining_problems

constexpr int vg::MinimizerMapper::default_min_chaining_problems = 1
staticconstexpr

Disregard the fragment set score thresholds when they would give us fewer than this many chainign problems done.

◆ default_min_chains

constexpr int vg::MinimizerMapper::default_min_chains = 4
staticconstexpr

Disregard the chain score thresholds when they would give us fewer than this many chains aligned.

◆ default_min_extension_sets

constexpr int vg::MinimizerMapper::default_min_extension_sets = 2
staticconstexpr

Disregard the extension set score thresholds when they would give us fewer than this many extension sets.

◆ default_min_extensions

constexpr size_t vg::MinimizerMapper::default_min_extensions = 2
staticconstexpr

Accept at least this many clusters for gapless extension.

◆ default_min_to_fragment

constexpr size_t vg::MinimizerMapper::default_min_to_fragment = 4
staticconstexpr

How many things should we produce fragments for, min?

◆ default_min_unique_node_fraction

constexpr double vg::MinimizerMapper::default_min_unique_node_fraction = 0.0
staticconstexpr

How much of an alignment needs to be from distinct nodes to be a distinct alignment?

◆ default_minimizer_coverage_flank

constexpr size_t vg::MinimizerMapper::default_minimizer_coverage_flank = 250
staticconstexpr

◆ default_minimizer_downsampling_max_window_length

constexpr size_t vg::MinimizerMapper::default_minimizer_downsampling_max_window_length = std::numeric_limits<size_t>::max()
staticconstexpr

◆ default_minimizer_downsampling_window_count

constexpr size_t vg::MinimizerMapper::default_minimizer_downsampling_window_count = 0
staticconstexpr

Window count for minimizer downsampling.

◆ default_minimizer_score_fraction

constexpr double vg::MinimizerMapper::default_minimizer_score_fraction = 0.9
staticconstexpr

Take minimizers between hit_cap and hard_hit_cap hits until this fraction of total score

◆ default_num_bp_per_min

constexpr size_t vg::MinimizerMapper::default_num_bp_per_min = 1000
staticconstexpr

Number of minimzers to select based on read_len/num_min_per_bp.

◆ default_pad_cluster_score_threshold

constexpr double vg::MinimizerMapper::default_pad_cluster_score_threshold = 20
staticconstexpr

If the second best cluster's score is no more than this many points below the cutoff set by cluster_score_threshold, snap that cutoff down to the second best cluster's score, to avoid throwing away promising secondaries.

◆ default_pad_zipcode_tree_score_threshold

constexpr double vg::MinimizerMapper::default_pad_zipcode_tree_score_threshold = 20
staticconstexpr

If the second best tree's score is no more than this many points below the cutoff set by zipcode_tree_score_threshold, snap that cutoff down to the second best tree's score, to avoid throwing away promising secondaries.

◆ default_paired_distance_stdevs

constexpr double vg::MinimizerMapper::default_paired_distance_stdevs = 2.0
staticconstexpr

◆ default_paired_rescue_score_limit

constexpr double vg::MinimizerMapper::default_paired_rescue_score_limit = 0.9
staticconstexpr

How close does an alignment have to be to the best alignment for us to rescue on it.

◆ default_points_per_possible_match

constexpr double vg::MinimizerMapper::default_points_per_possible_match = 0
staticconstexpr

◆ default_rescue_seed_limit

constexpr size_t vg::MinimizerMapper::default_rescue_seed_limit = 100
staticconstexpr

Do not attempt rescue if there are more seeds in the rescue subgraph.

◆ default_rescue_subgraph_stdevs

constexpr double vg::MinimizerMapper::default_rescue_subgraph_stdevs = 4.0
staticconstexpr

How many stdevs from the mean do we extract a subgraph from?

◆ default_set_refpos

constexpr bool vg::MinimizerMapper::default_set_refpos = false
staticconstexpr

Set refpos field of alignments to positions on nodes they visit.

◆ default_show_work

constexpr bool vg::MinimizerMapper::default_show_work = false
staticconstexpr

If set, log what the mapper is thinking in its mapping of each read.

◆ default_sort_by_chain_score

constexpr bool vg::MinimizerMapper::default_sort_by_chain_score = false
staticconstexpr

Should alignments be ranked by chain score instead of base-level score?

◆ default_track_correctness

constexpr bool vg::MinimizerMapper::default_track_correctness = false
staticconstexpr

Guess which seed hits are correct by location in the linear reference and track if/when their descendants make it through stages of the algorithm. Only works if track_provenance is true.

◆ default_track_position

constexpr bool vg::MinimizerMapper::default_track_position = false
staticconstexpr

Track linear reference position for placements in log output.

◆ default_track_provenance

constexpr bool vg::MinimizerMapper::default_track_provenance = false
staticconstexpr

Track which internal work items came from which others during each stage of the mapping algorithm.

◆ default_use_explored_cap

constexpr bool vg::MinimizerMapper::default_use_explored_cap = false
staticconstexpr

If set, cap mapping quality based on minimizer layout in the read. Only really likely to help for short reads.

◆ default_wfa_distance

constexpr int vg::MinimizerMapper::default_wfa_distance = WFAExtender::ErrorModel::default_distance().min
staticconstexpr

How far behind the leader should the WFA be allowed to get?

◆ default_wfa_distance_per_base

constexpr double vg::MinimizerMapper::default_wfa_distance_per_base = WFAExtender::ErrorModel::default_distance().per_base
staticconstexpr

How far behind the leader should the WFA be allowed to get, per base of read sequence?

◆ default_wfa_max_distance

constexpr int vg::MinimizerMapper::default_wfa_max_distance = WFAExtender::ErrorModel::default_distance().max
staticconstexpr

How far behind the leader should the WFA be allowed to get, at any read length?

◆ default_wfa_max_max_mismatches

constexpr int vg::MinimizerMapper::default_wfa_max_max_mismatches = 20
staticconstexpr

How many mismatch bases (or equivalent score of indels) should we allow in WFA connections and tails maximum, at any read length?

◆ default_wfa_max_mismatches

constexpr int vg::MinimizerMapper::default_wfa_max_mismatches = 2
staticconstexpr

How many mismatch bases (or equivalent score of indels) should we allow in WFA connections and tails?

◆ default_wfa_max_mismatches_per_base

constexpr double vg::MinimizerMapper::default_wfa_max_mismatches_per_base = 0.1
staticconstexpr

How many mismatch bases (or equivalent score of indels) should we allow in WFA connections and tails per base of read sequence?

◆ default_zipcode_tree_coverage_threshold

constexpr double vg::MinimizerMapper::default_zipcode_tree_coverage_threshold = 0.3
staticconstexpr

If the read coverage of a tree is less than the best coverage of any tree by more than this much, don't extend it

◆ default_zipcode_tree_scale

constexpr double vg::MinimizerMapper::default_zipcode_tree_scale = 2.0
staticconstexpr

When making zipcode trees, at what multiple of the read length should the trees be split?

◆ default_zipcode_tree_score_threshold

constexpr double vg::MinimizerMapper::default_zipcode_tree_score_threshold = 50
staticconstexpr

How far do we want to go down looking at zip code trees to make fragments?

◆ distance_index

SnarlDistanceIndex* vg::MinimizerMapper::distance_index
protected

◆ distance_limit

size_t vg::MinimizerMapper::distance_limit = default_distance_limit

◆ do_dp

bool vg::MinimizerMapper::do_dp = default_do_dp

◆ exclude_overlapping_min

bool vg::MinimizerMapper::exclude_overlapping_min = default_exclude_overlapping_min

◆ extender

std::unique_ptr<GaplessExtender> vg::MinimizerMapper::extender
protected

We have a gapless extender to extend seed hits in haplotype space. Because this needs a reference to an Aligner, and because changing the scoring parameters deletes all the alignmers, we need to keep this somewhere we can clear out.

◆ extension_score_threshold

int vg::MinimizerMapper::extension_score_threshold = default_extension_score_threshold

◆ extension_set_min_score

int vg::MinimizerMapper::extension_set_min_score = default_extension_set_min_score

◆ extension_set_score_threshold

double vg::MinimizerMapper::extension_set_score_threshold = default_extension_set_score_threshold

◆ fragment_gap_scale

double vg::MinimizerMapper::fragment_gap_scale = default_fragment_gap_scale

◆ fragment_length_distr

FragmentLengthDistribution vg::MinimizerMapper::fragment_length_distr
protected

We have a distribution for read fragment lengths that takes care of knowing when we've observed enough good ones to learn a good distribution.

◆ fragment_max_indel_bases

size_t vg::MinimizerMapper::fragment_max_indel_bases = default_fragment_max_indel_bases

◆ fragment_max_indel_bases_per_base

double vg::MinimizerMapper::fragment_max_indel_bases_per_base = default_fragment_max_indel_bases_per_base

◆ fragment_max_lookback_bases

size_t vg::MinimizerMapper::fragment_max_lookback_bases = default_fragment_max_lookback_bases

◆ fragment_max_lookback_bases_per_base

double vg::MinimizerMapper::fragment_max_lookback_bases_per_base = default_fragment_max_lookback_bases_per_base

◆ fragment_max_min_score

double vg::MinimizerMapper::fragment_max_min_score = default_fragment_max_min_score

◆ fragment_min_score

double vg::MinimizerMapper::fragment_min_score = default_fragment_min_score

◆ fragment_points_per_possible_match

double vg::MinimizerMapper::fragment_points_per_possible_match = default_fragment_points_per_possible_match

◆ fragment_score_fraction

double vg::MinimizerMapper::fragment_score_fraction = default_fragment_score_fraction

◆ fragment_set_score_threshold

double vg::MinimizerMapper::fragment_set_score_threshold = default_fragment_set_score_threshold

◆ gap_scale

double vg::MinimizerMapper::gap_scale = default_gap_scale

◆ gapless_extension_limit

size_t vg::MinimizerMapper::gapless_extension_limit = default_gapless_extension_limit

◆ gbwt_graph

const gbwtgraph::GBWTGraph& vg::MinimizerMapper::gbwt_graph
protected

This is our primary graph.

◆ hard_hit_cap

size_t vg::MinimizerMapper::hard_hit_cap = default_hard_hit_cap

◆ hit_cap

size_t vg::MinimizerMapper::hit_cap = default_hit_cap

◆ item_bonus

int vg::MinimizerMapper::item_bonus = default_item_bonus

◆ item_scale

double vg::MinimizerMapper::item_scale = default_item_scale

◆ LONG_LIMIT

const static size_t vg::MinimizerMapper::LONG_LIMIT = 256
staticprotected

Length at which we cut over to long-alignment logging.

◆ MANY_LIMIT

const static size_t vg::MinimizerMapper::MANY_LIMIT = 10
staticprotected

Count at which we cut over to summary logging.

◆ mapq_score_scale

double vg::MinimizerMapper::mapq_score_scale = default_mapq_score_scale

◆ mapq_score_window

size_t vg::MinimizerMapper::mapq_score_window = default_mapq_score_window

◆ max_alignments

size_t vg::MinimizerMapper::max_alignments = default_max_alignments

◆ max_chain_connection

size_t vg::MinimizerMapper::max_chain_connection = default_max_chain_connection

◆ max_chaining_problems

int vg::MinimizerMapper::max_chaining_problems = default_max_chaining_problems

◆ max_chains_per_tree

size_t vg::MinimizerMapper::max_chains_per_tree = default_max_chains_per_tree

◆ max_direct_to_chain

size_t vg::MinimizerMapper::max_direct_to_chain = default_max_direct_to_chain

◆ max_dozeu_cells

size_t vg::MinimizerMapper::max_dozeu_cells = default_max_dozeu_cells

◆ max_dp_cells

size_t vg::MinimizerMapper::max_dp_cells = default_max_dp_cells

◆ max_extension_mismatches

size_t vg::MinimizerMapper::max_extension_mismatches = default_max_extension_mismatches

◆ max_extensions

size_t vg::MinimizerMapper::max_extensions = default_max_extensions

◆ max_fragment_length

size_t vg::MinimizerMapper::max_fragment_length = default_max_fragment_length

◆ max_fragments

size_t vg::MinimizerMapper::max_fragments = default_max_fragments

◆ max_indel_bases

size_t vg::MinimizerMapper::max_indel_bases = default_max_indel_bases

◆ max_indel_bases_per_base

double vg::MinimizerMapper::max_indel_bases_per_base = default_max_indel_bases_per_base

◆ max_local_extensions

size_t vg::MinimizerMapper::max_local_extensions = default_max_local_extensions

◆ max_lookback_bases

size_t vg::MinimizerMapper::max_lookback_bases = default_max_lookback_bases

◆ max_lookback_bases_per_base

double vg::MinimizerMapper::max_lookback_bases_per_base = default_max_lookback_bases_per_base

◆ max_middle_dp_length

size_t vg::MinimizerMapper::max_middle_dp_length = default_max_middle_dp_length

◆ max_middle_gap

size_t vg::MinimizerMapper::max_middle_gap = default_max_middle_gap

◆ max_min_chain_score

int vg::MinimizerMapper::max_min_chain_score = default_max_min_chain_score

◆ max_multimaps

size_t vg::MinimizerMapper::max_multimaps = default_max_multimaps

◆ max_rescue_attempts

size_t vg::MinimizerMapper::max_rescue_attempts = default_max_rescue_attempts

◆ max_skipped_bases

size_t vg::MinimizerMapper::max_skipped_bases = default_max_skipped_bases

◆ max_tail_dp_length

size_t vg::MinimizerMapper::max_tail_dp_length = default_max_tail_dp_length

◆ max_tail_gap

size_t vg::MinimizerMapper::max_tail_gap = default_max_tail_gap

◆ max_tail_length

size_t vg::MinimizerMapper::max_tail_length = default_max_tail_length

◆ max_to_fragment

size_t vg::MinimizerMapper::max_to_fragment = default_max_to_fragment

◆ max_unique_min

size_t vg::MinimizerMapper::max_unique_min = default_max_unique_min

◆ min_chain_score_per_base

double vg::MinimizerMapper::min_chain_score_per_base = default_min_chain_score_per_base

◆ min_chaining_problems

int vg::MinimizerMapper::min_chaining_problems = default_min_chaining_problems

◆ min_chains

int vg::MinimizerMapper::min_chains = default_min_chains

◆ min_extension_sets

int vg::MinimizerMapper::min_extension_sets = default_min_extension_sets

◆ min_extensions

size_t vg::MinimizerMapper::min_extensions = default_min_extensions

◆ min_to_fragment

size_t vg::MinimizerMapper::min_to_fragment = default_min_to_fragment

◆ min_unique_node_fraction

double vg::MinimizerMapper::min_unique_node_fraction = default_min_unique_node_fraction

◆ minimizer_coverage_flank

size_t vg::MinimizerMapper::minimizer_coverage_flank = default_minimizer_coverage_flank

◆ minimizer_downsampling_max_window_length

size_t vg::MinimizerMapper::minimizer_downsampling_max_window_length = default_minimizer_downsampling_max_window_length

◆ minimizer_downsampling_window_count

size_t vg::MinimizerMapper::minimizer_downsampling_window_count = default_minimizer_downsampling_window_count

◆ minimizer_index

const gbwtgraph::DefaultMinimizerIndex& vg::MinimizerMapper::minimizer_index
protected

◆ minimizer_score_fraction

double vg::MinimizerMapper::minimizer_score_fraction = default_minimizer_score_fraction

◆ num_bp_per_min

size_t vg::MinimizerMapper::num_bp_per_min = default_num_bp_per_min

◆ pad_cluster_score_threshold

double vg::MinimizerMapper::pad_cluster_score_threshold = default_pad_cluster_score_threshold

◆ pad_zipcode_tree_score_threshold

double vg::MinimizerMapper::pad_zipcode_tree_score_threshold = default_pad_zipcode_tree_score_threshold

◆ paired_distance_stdevs

double vg::MinimizerMapper::paired_distance_stdevs = default_paired_distance_stdevs

◆ paired_rescue_score_limit

double vg::MinimizerMapper::paired_rescue_score_limit = default_paired_rescue_score_limit

◆ path_graph

const PathPositionHandleGraph* vg::MinimizerMapper::path_graph
protected

◆ points_per_possible_match

double vg::MinimizerMapper::points_per_possible_match = default_points_per_possible_match

◆ read_group

string vg::MinimizerMapper::read_group

Apply this read group name.

◆ rescue_algorithm

RescueAlgorithm vg::MinimizerMapper::rescue_algorithm = rescue_dozeu

The algorithm used for rescue.

◆ rescue_seed_limit

size_t vg::MinimizerMapper::rescue_seed_limit = default_rescue_seed_limit

◆ rescue_subgraph_stdevs

double vg::MinimizerMapper::rescue_subgraph_stdevs = default_rescue_subgraph_stdevs

◆ sample_name

string vg::MinimizerMapper::sample_name

Apply this sample name.

◆ set_refpos

bool vg::MinimizerMapper::set_refpos = default_set_refpos

◆ show_work

bool vg::MinimizerMapper::show_work = default_show_work

◆ sort_by_chain_score

bool vg::MinimizerMapper::sort_by_chain_score = default_sort_by_chain_score

◆ track_correctness

bool vg::MinimizerMapper::track_correctness = default_track_correctness

◆ track_position

bool vg::MinimizerMapper::track_position = default_track_position

◆ track_provenance

bool vg::MinimizerMapper::track_provenance = default_track_provenance

◆ use_explored_cap

bool vg::MinimizerMapper::use_explored_cap = default_use_explored_cap

◆ warned_about_bad_distribution

atomic_flag vg::MinimizerMapper::warned_about_bad_distribution = ATOMIC_FLAG_INIT
protected

We may need to complain exactly once that the distribution is bad.

◆ warned_about_rescue_size

atomic_flag vg::MinimizerMapper::warned_about_rescue_size = ATOMIC_FLAG_INIT

Have we complained about hitting the size limit for rescue?

◆ warned_about_tail_size

atomic_flag vg::MinimizerMapper::warned_about_tail_size = ATOMIC_FLAG_INIT
mutable

Have we complained about hitting the size limit for tails?

◆ wfa_distance

int vg::MinimizerMapper::wfa_distance = default_wfa_distance

◆ wfa_distance_per_base

double vg::MinimizerMapper::wfa_distance_per_base = default_wfa_distance_per_base

◆ wfa_max_distance

int vg::MinimizerMapper::wfa_max_distance = default_wfa_max_distance

◆ wfa_max_max_mismatches

int vg::MinimizerMapper::wfa_max_max_mismatches = default_wfa_max_max_mismatches

◆ wfa_max_mismatches

int vg::MinimizerMapper::wfa_max_mismatches = default_wfa_max_mismatches

◆ wfa_max_mismatches_per_base

double vg::MinimizerMapper::wfa_max_mismatches_per_base = default_wfa_max_mismatches_per_base

◆ zip_forest

ZipCodeForest vg::MinimizerMapper::zip_forest
protected

We have a zip code tree for finding distances between seeds.

◆ zipcode_tree_coverage_threshold

double vg::MinimizerMapper::zipcode_tree_coverage_threshold = default_zipcode_tree_coverage_threshold

◆ zipcode_tree_scale

double vg::MinimizerMapper::zipcode_tree_scale = default_zipcode_tree_scale

◆ zipcode_tree_score_threshold

double vg::MinimizerMapper::zipcode_tree_score_threshold = default_zipcode_tree_score_threshold

◆ zipcodes

const ZipCodeCollection* vg::MinimizerMapper::zipcodes
protected

The documentation for this class was generated from the following files: