#include <minimizer_mapper.hpp>

Inheritance diagram for vg::MinimizerMapper:

Classes
struct	aligner_stats_t
	Struct to represent per-DP-method stats. More...

struct	Minimizer

Public Types
enum	RescueAlgorithm { rescue_none, rescue_dozeu, rescue_gssw }
	Implemented rescue algorithms: no rescue, dozeu, GSSW. More...

typedef SnarlDistanceIndexClusterer::Seed	Seed
	The information we store for each seed. More...

Public Member Functions
	MinimizerMapper (const gbwtgraph::GBWTGraph &graph, const gbwtgraph::DefaultMinimizerIndex &minimizer_index, SnarlDistanceIndex distance_index, const ZipCodeCollection zipcodes, const PathPositionHandleGraph *path_graph=nullptr)

virtual void	set_alignment_scores (const int8_t *score_matrix, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus)

void	map (Alignment &aln, AlignmentEmitter &alignment_emitter)

vector< Alignment >	map (Alignment &aln)

vector< Alignment >	map_from_chains (Alignment &aln)

vector< Alignment >	map_from_extensions (Alignment &aln)

pair< vector< Alignment >, vector< Alignment > >	map_paired (Alignment &aln1, Alignment &aln2, vector< pair< Alignment, Alignment >> &ambiguous_pair_buffer)

pair< vector< Alignment >, vector< Alignment > >	map_paired (Alignment &aln1, Alignment &aln2)

bool	fragment_distr_is_finalized ()

void	finalize_fragment_length_distr ()

void	force_fragment_length_distr (double mean, double stdev)

double	get_fragment_length_mean () const

double	get_fragment_length_stdev () const

size_t	get_fragment_length_sample_size () const

size_t	get_distance_limit (size_t read_length) const

virtual void	set_alignment_scores (int8_t match, int8_t mismatch, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus)
	Set all the aligner scoring parameters and create the stored aligner instances. More...

virtual void	set_alignment_scores (std::istream &matrix_stream, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus)

virtual void	set_alignment_scores (const int8_t *score_matrix, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus)

Public Member Functions inherited from vg::AlignerClient
virtual void	set_alignment_scores (int8_t match, int8_t mismatch, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus)
	Set all the aligner scoring parameters and create the stored aligner instances. More...

virtual void	set_alignment_scores (std::istream &matrix_stream, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus)

Static Public Member Functions
static std::pair< size_t, size_t >	align_sequence_between_consistently (const pos_t &left_anchor, const pos_t &right_anchor, size_t max_path_length, size_t max_gap_length, const HandleGraph graph, const GSSWAligner aligner, Alignment &alignment, const std::string *alignment_name=nullptr, size_t max_dp_cells=std::numeric_limits< size_t >::max(), const std::function< size_t(const Alignment &, const HandleGraph &)> &choose_band_padding=algorithms::pad_band_random_walk())

Static Public Member Functions inherited from vg::AlignerClient
static int8_t *	parse_matrix (std::istream &matrix_stream)
	Allocates an array to hold a 4x4 substitution matrix and returns it. More...

Public Attributes
size_t	hit_cap = default_hit_cap

size_t	hard_hit_cap = default_hard_hit_cap

double	minimizer_score_fraction = default_minimizer_score_fraction

size_t	minimizer_downsampling_window_count = default_minimizer_downsampling_window_count

size_t	minimizer_downsampling_max_window_length = default_minimizer_downsampling_max_window_length

size_t	minimizer_coverage_flank = default_minimizer_coverage_flank

size_t	max_unique_min = default_max_unique_min

size_t	num_bp_per_min = default_num_bp_per_min

bool	exclude_overlapping_min = default_exclude_overlapping_min

size_t	min_extensions = default_min_extensions

size_t	max_extensions = default_max_extensions

double	cluster_score_threshold = default_cluster_score_threshold

double	pad_cluster_score_threshold = default_pad_cluster_score_threshold

double	cluster_coverage_threshold = default_cluster_coverage_threshold

double	extension_set_score_threshold = default_extension_set_score_threshold

int	extension_score_threshold = default_extension_score_threshold

int	min_extension_sets = default_min_extension_sets

int	extension_set_min_score = default_extension_set_min_score

size_t	max_local_extensions = default_max_local_extensions

size_t	max_alignments = default_max_alignments

size_t	max_extension_mismatches = default_max_extension_mismatches

bool	align_from_chains = default_align_from_chains

double	zipcode_tree_scale = default_zipcode_tree_scale

double	zipcode_tree_score_threshold = default_zipcode_tree_score_threshold

double	pad_zipcode_tree_score_threshold = default_pad_zipcode_tree_score_threshold

double	zipcode_tree_coverage_threshold = default_zipcode_tree_coverage_threshold

size_t	min_to_fragment = default_min_to_fragment

size_t	max_to_fragment = default_max_to_fragment

size_t	gapless_extension_limit = default_gapless_extension_limit

size_t	fragment_max_graph_lookback_bases = default_fragment_max_graph_lookback_bases

double	fragment_max_graph_lookback_bases_per_base = default_fragment_max_graph_lookback_bases_per_base

size_t	fragment_max_read_lookback_bases = default_fragment_max_read_lookback_bases

double	fragment_max_read_lookback_bases_per_base = default_fragment_max_read_lookback_bases_per_base

size_t	max_fragments = default_max_fragments

double	fragment_gap_scale = default_fragment_gap_scale

double	fragment_points_per_possible_match = default_fragment_points_per_possible_match

size_t	fragment_max_indel_bases = default_fragment_max_indel_bases

double	fragment_max_indel_bases_per_base = default_fragment_max_indel_bases_per_base

size_t	max_chain_connection = default_max_chain_connection

size_t	max_tail_length = default_max_tail_length

double	fragment_score_fraction = default_fragment_score_fraction

double	fragment_max_min_score = default_fragment_max_min_score

double	fragment_min_score = default_fragment_min_score

double	fragment_set_score_threshold = default_fragment_set_score_threshold

int	min_chaining_problems = default_min_chaining_problems

int	max_chaining_problems = default_max_chaining_problems

size_t	max_direct_to_chain = default_max_direct_to_chain

size_t	max_graph_lookback_bases = default_max_graph_lookback_bases

double	max_graph_lookback_bases_per_base = default_max_graph_lookback_bases_per_base

size_t	max_read_lookback_bases = default_max_read_lookback_bases

double	max_read_lookback_bases_per_base = default_max_read_lookback_bases_per_base

int	item_bonus = default_item_bonus

double	item_scale = default_item_scale

double	gap_scale = default_gap_scale

double	points_per_possible_match = default_points_per_possible_match

size_t	max_indel_bases = default_max_indel_bases

double	max_indel_bases_per_base = default_max_indel_bases_per_base

double	chain_score_threshold = default_chain_score_threshold

int	min_chains = default_min_chains

size_t	max_chains_per_tree = default_max_chains_per_tree

double	min_chain_score_per_base = default_min_chain_score_per_base

int	max_min_chain_score = default_max_min_chain_score

size_t	max_skipped_bases = default_max_skipped_bases

size_t	max_tail_dp_length = default_max_tail_dp_length

size_t	max_middle_dp_length = default_max_middle_dp_length

size_t	max_dp_cells = default_max_dp_cells

size_t	max_tail_gap = default_max_tail_gap

size_t	max_middle_gap = default_max_middle_gap

int	wfa_max_mismatches = default_wfa_max_mismatches

double	wfa_max_mismatches_per_base = default_wfa_max_mismatches_per_base

int	wfa_max_max_mismatches = default_wfa_max_max_mismatches

int	wfa_distance = default_wfa_distance

double	wfa_distance_per_base = default_wfa_distance_per_base

int	wfa_max_distance = default_wfa_max_distance

bool	sort_by_chain_score = default_sort_by_chain_score

double	min_unique_node_fraction = default_min_unique_node_fraction

bool	use_explored_cap = default_use_explored_cap

size_t	mapq_score_window = default_mapq_score_window

double	mapq_score_scale = default_mapq_score_scale

size_t	max_multimaps = default_max_multimaps

size_t	distance_limit = default_distance_limit

bool	do_dp = default_do_dp

bool	set_refpos = default_set_refpos

bool	track_provenance = default_track_provenance

bool	track_correctness = default_track_correctness

bool	track_position = default_track_position

bool	show_work = default_show_work

double	paired_distance_stdevs = default_paired_distance_stdevs

double	paired_rescue_score_limit = default_paired_rescue_score_limit

double	rescue_subgraph_stdevs = default_rescue_subgraph_stdevs

size_t	rescue_seed_limit = default_rescue_seed_limit

size_t	max_rescue_attempts = default_max_rescue_attempts

size_t	max_dozeu_cells = default_max_dozeu_cells

double	rescue_likelihood_limit = default_rescue_likelihood_limit

size_t	max_fragment_length = default_max_fragment_length

RescueAlgorithm	rescue_algorithm = rescue_dozeu
	The algorithm used for rescue. More...

string	sample_name
	Apply this sample name. More...

string	read_group
	Apply this read group name. More...

atomic_flag	warned_about_rescue_size = ATOMIC_FLAG_INIT
	Have we complained about hitting the size limit for rescue? More...

atomic_flag	warned_about_tail_size = ATOMIC_FLAG_INIT
	Have we complained about hitting the size limit for tails? More...

Public Attributes inherited from vg::AlignerClient
bool	adjust_alignments_for_base_quality = false

Static Public Attributes
static constexpr size_t	default_hit_cap = 10
	Use all minimizers with at most hit_cap hits. More...

static constexpr size_t	default_hard_hit_cap = 500
	Ignore all minimizers with more than hard_hit_cap hits. More...

static constexpr double	default_minimizer_score_fraction = 0.9

static constexpr size_t	default_minimizer_downsampling_window_count = 0
	Window count for minimizer downsampling. More...

static constexpr size_t	default_minimizer_downsampling_max_window_length = std::numeric_limits<size_t>::max()

static constexpr size_t	default_minimizer_coverage_flank = 250

static constexpr size_t	default_max_unique_min = 500
	Maximum number of distinct minimizers to take. More...

static constexpr size_t	default_num_bp_per_min = 1000
	Number of minimzers to select based on read_len/num_min_per_bp. More...

static constexpr bool	default_exclude_overlapping_min = false
	If set, exclude overlapping minimizers. More...

static constexpr size_t	default_min_extensions = 2
	Accept at least this many clusters for gapless extension. More...

static constexpr size_t	default_max_extensions = 800
	How many clusters should we produce gapless extensions for, max? More...

static constexpr double	default_cluster_score_threshold = 50
	this much, then don't extend it More...

static constexpr double	default_pad_cluster_score_threshold = 20

static constexpr double	default_cluster_coverage_threshold = 0.3

static constexpr double	default_extension_set_score_threshold = 20

static constexpr int	default_extension_score_threshold = 1

static constexpr int	default_min_extension_sets = 2

static constexpr int	default_extension_set_min_score = 20

static constexpr size_t	default_max_local_extensions = numeric_limits<size_t>::max()
	How many extensions should we try as seeds within a mapping location? More...

static constexpr size_t	default_max_alignments = 8
	How many alignments should we make, max? More...

static constexpr size_t	default_max_extension_mismatches = GaplessExtender::MAX_MISMATCHES

static constexpr bool	default_align_from_chains = false

static constexpr double	default_zipcode_tree_scale = 2.0

static constexpr double	default_zipcode_tree_score_threshold = 50
	How far do we want to go down looking at zip code trees to make fragments? More...

static constexpr double	default_pad_zipcode_tree_score_threshold = 20

static constexpr double	default_zipcode_tree_coverage_threshold = 0.3

static constexpr size_t	default_min_to_fragment = 4
	How many things should we produce fragments for, min? More...

static constexpr size_t	default_max_to_fragment = 10
	How many things should we produce fragments for, max? More...

static constexpr size_t	default_gapless_extension_limit = 0

static constexpr size_t	default_fragment_max_graph_lookback_bases = 300
	How many bases should we look back in the graph when making fragments? More...

static constexpr double	default_fragment_max_graph_lookback_bases_per_base = 0.03
	How many bases should we look back in the graph when making fragments, per base of read length? More...

static constexpr size_t	default_fragment_max_read_lookback_bases = std::numeric_limits<size_t>::max()
	How many bases should we look back in the read when making fragments? More...

static constexpr double	default_fragment_max_read_lookback_bases_per_base = 1.0
	How many bases should we look back in the read when making fragments, per base of read length? More...

static constexpr size_t	default_max_fragments = std::numeric_limits<size_t>::max()
	How many fragments should we try and make when fragmenting something? More...

static constexpr double	default_fragment_gap_scale = 1.0

static constexpr double	default_fragment_points_per_possible_match = 0

static constexpr size_t	default_fragment_max_indel_bases = 2000
	How many bases of indel should we allow in fragments? More...

static constexpr double	default_fragment_max_indel_bases_per_base = 0.2
	How many bases of indel should we allow in fragments per base of read length? More...

static constexpr size_t	default_max_chain_connection = 100

static constexpr size_t	default_max_tail_length = 100
	Similarly, what is the maximum tail length we will try to WFA align? More...

static constexpr double	default_fragment_score_fraction = 0.1

static constexpr double	default_fragment_max_min_score = std::numeric_limits<double>::max()
	How high should we get the score threshold based on the best fragment's score get? More...

static constexpr double	default_fragment_min_score = 60

static constexpr double	default_fragment_set_score_threshold = 0

static constexpr int	default_min_chaining_problems = 1

static constexpr int	default_max_chaining_problems = std::numeric_limits<int>::max()
	Do no more than this many chaining problems. More...

static constexpr size_t	default_max_direct_to_chain = 0

static constexpr size_t	default_max_graph_lookback_bases = 3000
	How many bases should we look back in the graph when chaining? More...

static constexpr double	default_max_graph_lookback_bases_per_base = 0.3
	How many bases should we look back in the graph when chaining, per base of read length? More...

static constexpr size_t	default_max_read_lookback_bases = std::numeric_limits<size_t>::max()
	How many bases should we look back in the read when chaining? More...

static constexpr double	default_max_read_lookback_bases_per_base = 1.0
	How many bases should we look back in the read when chaining, per base of read length? More...

static constexpr int	default_item_bonus = 0

static constexpr double	default_item_scale = 1.0

static constexpr double	default_gap_scale = 1.0

static constexpr double	default_points_per_possible_match = 0

static constexpr size_t	default_max_indel_bases = 2000
	How many bases of indel should we allow in chaining? More...

static constexpr double	default_max_indel_bases_per_base = 0.2
	How many bases of indel should we allow in chaining, per base of read length? More...

static constexpr double	default_chain_score_threshold = 100

static constexpr int	default_min_chains = 4

static constexpr size_t	default_max_chains_per_tree = 1
	Allow up to this many chains per tree. More...

static constexpr double	default_min_chain_score_per_base = 0.01

static constexpr int	default_max_min_chain_score = 200
	Limit the min chain score to no more than this. More...

static constexpr size_t	default_max_skipped_bases = 0

static constexpr size_t	default_max_tail_dp_length = 30000

static constexpr size_t	default_max_middle_dp_length = std::numeric_limits<int32_t>::max()
	How long of a DP can we do before something might go wrong with BandedGlobalAligner or the GBWT-based WFA? More...

static constexpr size_t	default_max_dp_cells = std::numeric_limits<size_t>::max()

static constexpr size_t	default_max_tail_gap = std::numeric_limits<size_t>::max()
	How many gap bases should we allow in a Dozeu tail alignment, max? More...

static constexpr size_t	default_max_middle_gap = std::numeric_limits<size_t>::max()
	How many gap bases should we allow in a between-seed alignment, max? More...

static constexpr int	default_wfa_max_mismatches = 2
	How many mismatch bases (or equivalent score of indels) should we allow in WFA connections and tails? More...

static constexpr double	default_wfa_max_mismatches_per_base = 0.1
	How many mismatch bases (or equivalent score of indels) should we allow in WFA connections and tails per base of read sequence? More...

static constexpr int	default_wfa_max_max_mismatches = 20
	How many mismatch bases (or equivalent score of indels) should we allow in WFA connections and tails maximum, at any read length? More...

static constexpr int	default_wfa_distance = WFAExtender::ErrorModel::default_distance().min
	How far behind the leader should the WFA be allowed to get? More...

static constexpr double	default_wfa_distance_per_base = WFAExtender::ErrorModel::default_distance().per_base
	How far behind the leader should the WFA be allowed to get, per base of read sequence? More...

static constexpr int	default_wfa_max_distance = WFAExtender::ErrorModel::default_distance().max
	How far behind the leader should the WFA be allowed to get, at any read length? More...

static constexpr bool	default_sort_by_chain_score = false
	Should alignments be ranked by chain score instead of base-level score? More...

static constexpr double	default_min_unique_node_fraction = 0.0
	How much of an alignment needs to be from distinct nodes to be a distinct alignment? More...

static constexpr bool	default_use_explored_cap = false

static constexpr size_t	default_mapq_score_window = 0
	What number of bp should we re-scale scores to for MAPQ, for calibration? 0 for off. More...

static constexpr double	default_mapq_score_scale = 1.0
	How should we scale scores before mapq, for calibration. More...

static constexpr size_t	default_max_multimaps = 1

static constexpr size_t	default_distance_limit = 200

static constexpr bool	default_do_dp = true
	If false, skip computing base-level alignments. More...

static constexpr bool	default_set_refpos = false
	Set refpos field of alignments to positions on nodes they visit. More...

static constexpr bool	default_track_provenance = false

static constexpr bool	default_track_correctness = false

static constexpr bool	default_track_position = false
	Track linear reference position for placements in log output. More...

static constexpr bool	default_show_work = false
	If set, log what the mapper is thinking in its mapping of each read. More...

static constexpr double	default_paired_distance_stdevs = 2.0

static constexpr double	default_paired_rescue_score_limit = 0.9
	How close does an alignment have to be to the best alignment for us to rescue on it. More...

static constexpr double	default_rescue_subgraph_stdevs = 4.0
	How many stdevs from the mean do we extract a subgraph from? More...

static constexpr size_t	default_rescue_seed_limit = 100
	Do not attempt rescue if there are more seeds in the rescue subgraph. More...

static constexpr size_t	default_max_rescue_attempts = 15
	For paired end mapping, how many times should we attempt rescue (per read)? More...

static constexpr size_t	default_max_dozeu_cells = (size_t)(1.5 * 1024 * 1024)

static constexpr double	default_rescue_likelihood_limit = 0.05
	For rescure, how likely can an alignment be by chance to still accept it? More...

static constexpr size_t	default_max_fragment_length = 2000
	What is the maximum fragment length that we accept as valid for paired-end reads? More...

Protected Types
typedef SnarlDistanceIndexClusterer::Cluster	Cluster
	The information we store for each cluster. More...

using	ImmutablePath = structures::ImmutableList< Mapping >

Protected Member Functions
double	distance_to_annotation (int64_t distance) const

std::vector< algorithms::Anchor >	to_anchors (const Alignment &aln, const VectorView< Minimizer > &minimizers, std::vector< Seed > &seeds) const
	Convert a collection of seeds to a collection of chaining anchors. More...

WFAAlignment	to_wfa_alignment (const algorithms::Anchor &anchor, const Alignment &aln, const Aligner *aligner) const

std::vector< Minimizer >	find_minimizers (const std::string &sequence, Funnel &funnel) const

void	flag_repetitive_minimizers (std::vector< Minimizer > &minimizers_in_read_order) const

std::vector< size_t >	sort_minimizers_by_score (const std::vector< Minimizer > &minimizers_in_read_order, LazyRNG &rng) const

std::vector< Seed >	find_seeds (const std::vector< Minimizer > &minimizers_in_read_order, const VectorView< Minimizer > &minimizers, const Alignment &aln, Funnel &funnel) const

void	tag_seeds (const Alignment &aln, const std::vector< Seed >::const_iterator &begin, const std::vector< Seed >::const_iterator &end, const VectorView< Minimizer > &minimizers, size_t funnel_offset, Funnel &funnel) const

void	score_cluster (Cluster &cluster, size_t i, const VectorView< Minimizer > &minimizers, const std::vector< Seed > &seeds, size_t seq_length, Funnel &funnel) const

std::pair< double, double >	score_tree (const ZipCodeForest &zip_code_forest, size_t i, const VectorView< Minimizer > &minimizers, const std::vector< Seed > &seeds, size_t seq_length, Funnel &funnel) const

vector< GaplessExtension >	extend_seed_group (const std::vector< size_t > &seed_group, size_t source_num, const VectorView< Minimizer > &minimizers, const std::vector< Seed > &seeds, const string &sequence, size_t max_mismatches, vector< vector< size_t >> minimizer_kept_count=nullptr, Funnel funnel=nullptr, std::vector< std::vector< size_t >> *seeds_used=nullptr) const

std::vector< int >	score_extensions (const std::vector< std::vector< GaplessExtension >> &extensions, const Alignment &aln, Funnel &funnel) const

std::vector< int >	score_extensions (const std::vector< std::pair< std::vector< GaplessExtension >, size_t >> &extensions, const Alignment &aln, Funnel &funnel) const

double	get_read_coverage (const Alignment &aln, const VectorView< std::vector< size_t >> &seed_sets, const std::vector< Seed > &seeds, const VectorView< Minimizer > &minimizers) const

void	do_fragmenting_on_trees (Alignment &aln, const ZipCodeForest &zip_code_forest, const std::vector< Seed > &seeds, const VectorView< MinimizerMapper::Minimizer > &minimizers, const vector< algorithms::Anchor > &seed_anchors, std::vector< std::vector< size_t >> &fragments, std::vector< double > &fragment_scores, std::vector< algorithms::Anchor > &fragment_anchors, std::vector< size_t > &fragment_source_tree, std::vector< std::vector< size_t >> &minimizer_kept_fragment_count, std::vector< double > &multiplicity_by_fragment, std::vector< Alignment > &alignments, SmallBitset &minimizer_explored, vector< double > &multiplicity_by_alignment, LazyRNG &rng, Funnel &funnel) const

void	do_chaining_on_fragments (Alignment &aln, const ZipCodeForest &zip_code_forest, const std::vector< Seed > &seeds, const VectorView< MinimizerMapper::Minimizer > &minimizers, const std::vector< std::vector< size_t >> &fragments, const std::vector< double > &fragment_scores, const std::vector< algorithms::Anchor > &fragment_anchors, const std::vector< size_t > &fragment_source_tree, const std::vector< std::vector< size_t >> &minimizer_kept_fragment_count, const std::vector< double > &multiplicity_by_fragment, std::vector< std::vector< size_t >> &chains, std::vector< size_t > &chain_source_tree, std::vector< int > &chain_score_estimates, std::vector< std::vector< size_t >> &minimizer_kept_chain_count, std::vector< double > &multiplicity_by_chain, vector< double > &multiplicity_by_tree, std::unordered_map< size_t, std::vector< size_t >> &good_fragments_in, LazyRNG &rng, Funnel &funnel) const

void	get_best_chain_stats (Alignment &aln, const ZipCodeForest &zip_code_forest, const std::vector< Seed > &seeds, const VectorView< MinimizerMapper::Minimizer > &minimizers, const std::vector< std::vector< size_t >> &fragments, const std::unordered_map< size_t, std::vector< size_t >> &good_fragments_in, const std::vector< std::vector< size_t >> &chains, const std::vector< size_t > &chain_source_tree, const vector< algorithms::Anchor > &seed_anchors, const std::vector< int > &chain_score_estimates, bool &best_chain_correct, double &best_chain_coverage, size_t &best_chain_longest_jump, double &best_chain_average_jump, size_t &best_chain_anchors, size_t &best_chain_anchor_length, Funnel &funnel) const

void	do_alignment_on_chains (Alignment &aln, const std::vector< Seed > &seeds, const VectorView< MinimizerMapper::Minimizer > &minimizers, const vector< algorithms::Anchor > &seed_anchors, const std::vector< std::vector< size_t >> &chains, const std::vector< size_t > &chain_source_tree, const std::vector< double > &multiplicity_by_chain, const std::vector< int > &chain_score_estimates, const std::vector< std::vector< size_t >> &minimizer_kept_chain_count, vector< Alignment > &alignments, vector< double > &multiplicity_by_alignment, vector< size_t > &alignments_to_source, SmallBitset &minimizer_explored, aligner_stats_t &stats, bool &funnel_depleted, LazyRNG &rng, Funnel &funnel) const

void	pick_mappings_from_alignments (Alignment &aln, const std::vector< Alignment > &alignments, const std::vector< double > &multiplicity_by_alignment, const std::vector< size_t > &alignments_to_source, const std::vector< int > &chain_score_estimates, std::vector< Alignment > &mappings, std::vector< double > &scores, std::vector< double > &multiplicity_by_mapping, bool &funnel_depleted, LazyRNG &rng, Funnel &funnel) const

Alignment	find_chain_alignment (const Alignment &aln, const VectorView< algorithms::Anchor > &to_chain, const std::vector< size_t > &chain, aligner_stats_t *stats=nullptr) const

void	find_optimal_tail_alignments (const Alignment &aln, const vector< GaplessExtension > &extended_seeds, LazyRNG &rng, Alignment &best, Alignment &second_best) const

void	attempt_rescue (const Alignment &aligned_read, Alignment &rescued_alignment, const VectorView< Minimizer > &minimizers, bool rescue_forward)

GaplessExtender::cluster_type	seeds_in_subgraph (const VectorView< Minimizer > &minimizers, const std::unordered_set< nid_t > &subgraph) const

void	fix_dozeu_score (Alignment &rescued_alignment, const HandleGraph &rescue_graph, const std::vector< handle_t > &topological_order) const

void	fix_dozeu_end_deletions (Alignment &rescued_alignment) const

int64_t	distance_between (const pos_t &pos1, const pos_t &pos2)

int64_t	distance_between (const Alignment &aln1, const Alignment &aln2)

int64_t	unoriented_distance_between (const pos_t &pos1, const pos_t &pos2) const

void	extension_to_alignment (const GaplessExtension &extension, Alignment &alignment) const

void	wfa_alignment_to_alignment (const WFAAlignment &wfa_alignment, Alignment &alignment) const

void	pair_all (std::array< vector< Alignment >, 2 > &mappings) const

void	annotate_with_minimizer_statistics (Alignment &target, const VectorView< Minimizer > &minimizers, const std::vector< Seed > &seeds, size_t old_seed_count, size_t new_seed_offset, const Funnel &funnel) const

double	compute_mapq_caps (const Alignment &aln, const VectorView< Minimizer > &minimizers, const SmallBitset &explored)

vector< TreeSubgraph >	get_tail_forest (const GaplessExtension &extended_seed, size_t read_length, bool left_tails, size_t *longest_detectable_gap=nullptr) const

pair< Path, size_t >	get_best_alignment_against_any_tree (const vector< TreeSubgraph > &trees, const string &sequence, const Position &default_position, bool pin_left, size_t longest_detectable_gap, LazyRNG &rng) const

void	dfs_gbwt (const Position &from, size_t walk_distance, const function< void(const handle_t &)> &enter_handle, const function< void(void)> exit_handle) const

void	dfs_gbwt (handle_t from_handle, size_t from_offset, size_t walk_distance, const function< void(const handle_t &)> &enter_handle, const function< void(void)> exit_handle) const

void	dfs_gbwt (const gbwt::SearchState &start_state, size_t from_offset, size_t walk_distance, const function< void(const handle_t &)> &enter_handle, const function< void(void)> exit_handle) const

double	score_alignment_pair (Alignment &aln1, Alignment &aln2, int64_t fragment_distance)

template<typename Score = double>
void	process_until_threshold_a (size_t items, const function< Score(size_t)> &get_score, double threshold, size_t min_count, size_t max_count, LazyRNG &rng, const function< bool(size_t, size_t)> &process_item, const function< void(size_t)> &discard_item_by_count, const function< void(size_t)> &discard_item_by_score) const

template<typename Score = double>
void	process_until_threshold_b (const vector< Score > &scores, double threshold, size_t min_count, size_t max_count, LazyRNG &rng, const function< bool(size_t, size_t)> &process_item, const function< void(size_t)> &discard_item_by_count, const function< void(size_t)> &discard_item_by_score) const

template<typename Score = double>
void	process_until_threshold_c (size_t items, const function< Score(size_t)> &get_score, const function< bool(size_t, size_t)> &comparator, double threshold, size_t min_count, size_t max_count, LazyRNG &get_seed, const function< bool(size_t, size_t)> &process_item, const function< void(size_t)> &discard_item_by_count, const function< void(size_t)> &discard_item_by_score) const

bool	validate_clusters (const std::vector< std::vector< Cluster >> &clusters, const std::vector< std::vector< Seed >> &seeds, size_t read_limit, size_t fragment_limit) const
	Do a brute check of the clusters. Print errors to stderr. More...

Protected Member Functions inherited from vg::AlignerClient
	AlignerClient (double gc_content_estimate=vg::default_gc_content)

const GSSWAligner *	get_aligner (bool have_qualities=true) const

const QualAdjAligner *	get_qual_adj_aligner () const

const Aligner *	get_regular_aligner () const

Static Protected Member Functions
static gbwtgraph::Payload	no_chain_info ()
	How should we initialize chain info when it's not stored in the minimizer index? More...

static Seed	chain_info_to_seed (const pos_t &hit, size_t minimizer, const ZipCode &zip)

static algorithms::Anchor	to_anchor (const Alignment &aln, const VectorView< Minimizer > &minimizers, std::vector< Seed > &seeds, size_t seed_number, const HandleGraph &graph, const Aligner *aligner)
	Convert a single seed to a single chaining anchor. More...

static algorithms::Anchor	to_anchor (const Alignment &aln, size_t read_start, size_t read_end, const std::vector< size_t > &sorted_seeds, const std::vector< algorithms::Anchor > &seed_anchors, const std::vector< size_t >::const_iterator &mismatch_begin, const std::vector< size_t >::const_iterator &mismatch_end, const HandleGraph &graph, const Aligner *aligner)

static int	score_extension_group (const Alignment &aln, const vector< GaplessExtension > &extended_seeds, int gap_open_penalty, int gap_extend_penalty)

static void	with_dagified_local_graph (const pos_t &left_anchor, const pos_t &right_anchor, size_t max_path_length, const HandleGraph &graph, const std::function< void(DeletableHandleGraph &, const handle_t &, const handle_t &, const std::function< std::pair< nid_t, bool >(const handle_t &)> &)> &callback)

static size_t	longest_detectable_gap_in_range (const Alignment &aln, const std::string::const_iterator &sequence_begin, const std::string::const_iterator &sequence_end, const GSSWAligner *aligner)

static std::pair< size_t, size_t >	align_sequence_between (const pos_t &left_anchor, const pos_t &right_anchor, size_t max_path_length, size_t max_gap_length, const HandleGraph graph, const GSSWAligner aligner, Alignment &alignment, const std::string *alignment_name=nullptr, size_t max_dp_cells=std::numeric_limits< size_t >::max(), const std::function< size_t(const Alignment &, const HandleGraph &)> &choose_band_padding=algorithms::pad_band_random_walk())

static WFAAlignment	connect_consistently (const std::string &sequence, const pos_t &left_anchor, const pos_t &right_anchor, const WFAExtender &wfa_extender)

static double	window_breaking_quality (const VectorView< Minimizer > &minimizers, vector< size_t > &broken, const string &sequence, const string &quality_bytes)

static double	faster_cap (const VectorView< Minimizer > &minimizers, vector< size_t > &minimizers_explored, const string &sequence, const string &quality_bytes)

static void	for_each_agglomeration_interval (const VectorView< Minimizer > &minimizers, const string &sequence, const string &quality_bytes, const vector< size_t > &minimizer_indices, const function< void(size_t, size_t, size_t, size_t)> &iteratee)

static double	get_log10_prob_of_disruption_in_interval (const VectorView< Minimizer > &minimizers, const string &sequence, const string &quality_bytes, const vector< size_t >::iterator &disrupt_begin, const vector< size_t >::iterator &disrupt_end, size_t left, size_t right)

static double	get_prob_of_disruption_in_column (const VectorView< Minimizer > &minimizers, const string &sequence, const string &quality_bytes, const vector< size_t >::iterator &disrupt_begin, const vector< size_t >::iterator &disrupt_end, size_t index)

static size_t	immutable_path_from_length (const ImmutablePath &path)

static Path	to_path (const ImmutablePath &path)

static string	log_name ()
	Get the thread identifier prefix for logging. More...

static string	log_alignment (const Alignment &aln)
	Turn an Alignment into a conveniently-sized string for logging. More...

static string	log_alignment (const Path &path, bool force_condensed=false)
	Turn an Path from an alignment into a conveniently-sized string for logging. More...

static string	log_bits (const std::vector< bool > &bits)
	Turn a list of bit flags into a compact representation. More...

static void	dump_chaining_problem (const std::vector< algorithms::Anchor > &anchors, const std::vector< size_t > &cluster_seeds_sorted, const HandleGraph &graph)
	Dump a whole chaining problem. More...

static void	dump_debug_minimizers (const VectorView< Minimizer > &minimizers, const string &sequence, const vector< size_t > *to_include=nullptr, size_t start_offset=0, size_t length_limit=std::numeric_limits< size_t >::max())
	Dump all the given minimizers, with optional subset restriction. More...

static void	dump_debug_extension_set (const HandleGraph &graph, const Alignment &aln, const vector< GaplessExtension > &extended_seeds)
	Dump all the extansions in an extension set. More...

static void	dump_debug_sequence (ostream &out, const string &sequence, size_t start_offset=0, size_t length_limit=std::numeric_limits< size_t >::max())
	Print a sequence with base numbering. More...

static void	dump_debug_clustering (const Cluster &cluster, size_t cluster_number, const VectorView< Minimizer > &minimizers, const std::vector< Seed > &seeds)
	Print the seed content of a cluster. More...

static void	dump_debug_seeds (const VectorView< Minimizer > &minimizers, const std::vector< Seed > &seeds, const std::vector< size_t > &selected_seeds)
	Print information about a selected set of seeds. More...

static void	dump_debug_query (const Alignment &aln)
	Print information about a read to be aligned. More...

static void	dump_debug_query (const Alignment &aln1, const Alignment &aln2)
	Print information about a read pair to be aligned. More...

static void	dump_debug_dotplot (const std::string &name, const VectorView< Minimizer > &minimizers, const std::vector< Seed > &seeds, const std::vector< std::pair< std::string, std::vector< std::vector< size_t >>>> &seed_sets, const PathPositionHandleGraph *path_graph)

static void	dump_debug_graph (const HandleGraph &graph)
	Dump a graph. More...

Protected Attributes
const PathPositionHandleGraph *	path_graph

const gbwtgraph::DefaultMinimizerIndex &	minimizer_index

SnarlDistanceIndex *	distance_index

const ZipCodeCollection *	zipcodes

const gbwtgraph::GBWTGraph &	gbwt_graph
	This is our primary graph. More...

std::unique_ptr< GaplessExtender >	extender

SnarlDistanceIndexClusterer	clusterer
	We have a clusterer. More...

ZipCodeForest	zip_forest
	We have a zip code tree for finding distances between seeds. More...

std::function< size_t(const Alignment &, const HandleGraph &)>	choose_band_padding

FragmentLengthDistribution	fragment_length_distr

atomic_flag	warned_about_bad_distribution = ATOMIC_FLAG_INIT
	We may need to complain exactly once that the distribution is bad. More...

Protected Attributes inherited from vg::AlignerClient
double	gc_content_estimate

Static Protected Attributes
const static size_t	LONG_LIMIT = 256
	Length at which we cut over to long-alignment logging. More...

const static size_t	MANY_LIMIT = 10
	Count at which we cut over to summary logging. More...

Friends
class	TestMinimizerMapper

Member Typedef Documentation

◆ Cluster

typedef SnarlDistanceIndexClusterer::Cluster vg::MinimizerMapper::Cluster

protected

The information we store for each cluster.

◆ ImmutablePath

using vg::MinimizerMapper::ImmutablePath = structures::ImmutableList<Mapping>

protected

We define a type for shared-tail lists of Mappings, to avoid constantly copying Path objects.

◆ Seed

typedef SnarlDistanceIndexClusterer::Seed vg::MinimizerMapper::Seed

The information we store for each seed.

Member Enumeration Documentation

◆ RescueAlgorithm

enum vg::MinimizerMapper::RescueAlgorithm

Implemented rescue algorithms: no rescue, dozeu, GSSW.

Enumerator
rescue_none
rescue_dozeu
rescue_gssw

Constructor & Destructor Documentation

◆ MinimizerMapper()

vg::MinimizerMapper::MinimizerMapper	(	const gbwtgraph::GBWTGraph &	graph,
		const gbwtgraph::DefaultMinimizerIndex &	minimizer_index,
		SnarlDistanceIndex *	distance_index,
		const ZipCodeCollection *	zipcodes,
		const PathPositionHandleGraph *	path_graph = `nullptr`
	)

Construct a new MinimizerMapper using the given indexes. The PathPositionhandleGraph can be nullptr, as we only use it for correctness tracking.

Member Function Documentation

◆ align_sequence_between()

std::pair< size_t, size_t > vg::MinimizerMapper::align_sequence_between	(	const pos_t &	left_anchor,
		const pos_t &	right_anchor,
		size_t	max_path_length,
		size_t	max_gap_length,
		const HandleGraph *	graph,
		const GSSWAligner *	aligner,
		Alignment &	alignment,
		const std::string *	alignment_name = `nullptr`,
		size_t	max_dp_cells = `std::numeric_limits<size_t>::max()`,
		const std::function< size_t(const Alignment &, const HandleGraph &)> &	choose_band_padding = `algorithms::pad_band_random_walk()`
	)

staticprotected

Clip out the part of the graph between the given positions (left facing into the region and right facing out) and global-align the sequence of the given Alignment to it. Populate the Alignment's path and score.

Finds an alignment against a graph path if it is <= max_path_length.

If one of the anchor positions is empty, does pinned alignment against the other position.

For pinned alignment, restricts the alignment to have gaps no longer than max_gap_length, and to use <= max_dp_cells cells. If too many DP cells would be used, produces a softclip alignment.

For connecting alignment, restricts the alignment to use <= max_dp_cells cells. If too many DP cells would be used, produces an Alignment with and empty path.

Returns the number of nodes and bases in the graph aligned against.

◆ align_sequence_between_consistently()

std::pair< size_t, size_t > vg::MinimizerMapper::align_sequence_between_consistently	(	const pos_t &	left_anchor,
		const pos_t &	right_anchor,
		size_t	max_path_length,
		size_t	max_gap_length,
		const HandleGraph *	graph,
		const GSSWAligner *	aligner,
		Alignment &	alignment,
		const std::string *	alignment_name = `nullptr`,
		size_t	max_dp_cells = `std::numeric_limits<size_t>::max()`,
		const std::function< size_t(const Alignment &, const HandleGraph &)> &	choose_band_padding = `algorithms::pad_band_random_walk()`
	)

static

Version of align_sequence_between() that guarantees that you get the same answer (modulo reverse-complementation) no matter whether the sequence and anchors are reverse-complemented or not.

◆ annotate_with_minimizer_statistics()

void vg::MinimizerMapper::annotate_with_minimizer_statistics	(	Alignment &	target,
		const VectorView< Minimizer > &	minimizers,
		const std::vector< Seed > &	seeds,
		size_t	old_seed_count,
		size_t	new_seed_offset,
		const Funnel &	funnel
	)		const

protected

Add annotations to an Alignment with statistics about the minimizers.

old_seed_count is the number of seeds in the seed vector actually created at the "seed" stage of the alignment process. new_seed_offset is where the first of thos eseeds appears in the funnel at the reseed stage.

◆ attempt_rescue()

void vg::MinimizerMapper::attempt_rescue	(	const Alignment &	aligned_read,
		Alignment &	rescued_alignment,
		const VectorView< Minimizer > &	minimizers,
		bool	rescue_forward
	)

protected

Given an aligned read, extract a subgraph of the graph within a distance range based on the fragment length distribution and attempt to align the unaligned read to it. Rescue_forward is true if the aligned read is the first and false otherwise. Assumes that both reads are facing the same direction. TODO: This should be const, but some of the function calls are not.

◆ chain_info_to_seed()

static Seed vg::MinimizerMapper::chain_info_to_seed	(	const pos_t &	hit,
		size_t	minimizer,
		const ZipCode &	zip
	)

inlinestaticprotected

How do we convert chain info to an actual seed of the type we are using? Also needs to know the hit position, and the minimizer number.

◆ compute_mapq_caps()

double vg::MinimizerMapper::compute_mapq_caps	(	const Alignment &	aln,
		const VectorView< Minimizer > &	minimizers,
		const SmallBitset &	explored
	)

protected

Compute MAPQ caps based on all minimizers that are explored, for some definition of explored.

Needs access to the input alignment for sequence and quality information.

Returns only an "extended" cap at the moment.

◆ connect_consistently()

WFAAlignment vg::MinimizerMapper::connect_consistently	(	const std::string &	sequence,
		const pos_t &	left_anchor,
		const pos_t &	right_anchor,
		const WFAExtender &	wfa_extender
	)

staticprotected

Produce a WFAAlignment of the given sequence between the given points that will be the same (modulo reverse-complementation) no matter whether the sequence and anchors are reverse-complemented or not.

◆ dfs_gbwt() [1/3]

void vg::MinimizerMapper::dfs_gbwt	(	const gbwt::SearchState &	start_state,
		size_t	from_offset,
		size_t	walk_distance,
		const function< void(const handle_t &)> &	enter_handle,
		const function< void(void)>	exit_handle
	)		const

protected

The same as dfs_gbwt on a handle and an offset, but takes a gbwt::SearchState that defines only some haplotypes on a handle to start with.

◆ dfs_gbwt() [2/3]

void vg::MinimizerMapper::dfs_gbwt	(	const Position &	from,
		size_t	walk_distance,
		const function< void(const handle_t &)> &	enter_handle,
		const function< void(void)>	exit_handle
	)		const

protected

Run a DFS on valid haplotypes in the GBWT starting from the given Position, and continuing up to the given number of bases.

Calls enter_handle when the DFS enters a haplotype visit to a particular handle, and exit_handle when it exits a visit. These let the caller maintain a stack and track the traversals.

The starting node is only entered if its offset isn't equal to its length (i.e. bases remain to be visited).

Stopping early is not permitted.

◆ dfs_gbwt() [3/3]

void vg::MinimizerMapper::dfs_gbwt	(	handle_t	from_handle,
		size_t	from_offset,
		size_t	walk_distance,
		const function< void(const handle_t &)> &	enter_handle,
		const function< void(void)>	exit_handle
	)		const

protected

The same as dfs_gbwt on a Position, but takes a handle in the backing gbwt_graph and an offset from the start of the handle instead.

◆ distance_between() [1/2]

int64_t vg::MinimizerMapper::distance_between	(	const Alignment &	aln1,
		const Alignment &	aln2
	)

protected

Get the distance between a pair of read alignments, or std::numeric_limits<int64_t>::max() if unreachable.

◆ distance_between() [2/2]

int64_t vg::MinimizerMapper::distance_between	(	const pos_t &	pos1,
		const pos_t &	pos2
	)

protected

Get the distance between a pair of positions, or std::numeric_limits<int64_t>::max() if unreachable.

◆ distance_to_annotation()

double vg::MinimizerMapper::distance_to_annotation ( int64_t distance ) const

protected

Convert an integer distance, with limits standing for no distance, to a double annotation that can safely be parsed back from JSON into an integer if it is integral.

◆ do_alignment_on_chains()

void vg::MinimizerMapper::do_alignment_on_chains	(	Alignment &	aln,
		const std::vector< Seed > &	seeds,
		const VectorView< MinimizerMapper::Minimizer > &	minimizers,
		const vector< algorithms::Anchor > &	seed_anchors,
		const std::vector< std::vector< size_t >> &	chains,
		const std::vector< size_t > &	chain_source_tree,
		const std::vector< double > &	multiplicity_by_chain,
		const std::vector< int > &	chain_score_estimates,
		const std::vector< std::vector< size_t >> &	minimizer_kept_chain_count,
		vector< Alignment > &	alignments,
		vector< double > &	multiplicity_by_alignment,
		vector< size_t > &	alignments_to_source,
		SmallBitset &	minimizer_explored,
		aligner_stats_t &	stats,
		bool &	funnel_depleted,
		LazyRNG &	rng,
		Funnel &	funnel
	)		const

protected

◆ do_chaining_on_fragments()

void vg::MinimizerMapper::do_chaining_on_fragments	(	Alignment &	aln,
		const ZipCodeForest &	zip_code_forest,
		const std::vector< Seed > &	seeds,
		const VectorView< MinimizerMapper::Minimizer > &	minimizers,
		const std::vector< std::vector< size_t >> &	fragments,
		const std::vector< double > &	fragment_scores,
		const std::vector< algorithms::Anchor > &	fragment_anchors,
		const std::vector< size_t > &	fragment_source_tree,
		const std::vector< std::vector< size_t >> &	minimizer_kept_fragment_count,
		const std::vector< double > &	multiplicity_by_fragment,
		std::vector< std::vector< size_t >> &	chains,
		std::vector< size_t > &	chain_source_tree,
		std::vector< int > &	chain_score_estimates,
		std::vector< std::vector< size_t >> &	minimizer_kept_chain_count,
		std::vector< double > &	multiplicity_by_chain,
		vector< double > &	multiplicity_by_tree,
		std::unordered_map< size_t, std::vector< size_t >> &	good_fragments_in,
		LazyRNG &	rng,
		Funnel &	funnel
	)		const

protected

Given a collection of fragments, filter down to the good ones and do chaining on them

◆ do_fragmenting_on_trees()

void vg::MinimizerMapper::do_fragmenting_on_trees	(	Alignment &	aln,
		const ZipCodeForest &	zip_code_forest,
		const std::vector< Seed > &	seeds,
		const VectorView< MinimizerMapper::Minimizer > &	minimizers,
		const vector< algorithms::Anchor > &	seed_anchors,
		std::vector< std::vector< size_t >> &	fragments,
		std::vector< double > &	fragment_scores,
		std::vector< algorithms::Anchor > &	fragment_anchors,
		std::vector< size_t > &	fragment_source_tree,
		std::vector< std::vector< size_t >> &	minimizer_kept_fragment_count,
		std::vector< double > &	multiplicity_by_fragment,
		std::vector< Alignment > &	alignments,
		SmallBitset &	minimizer_explored,
		vector< double > &	multiplicity_by_alignment,
		LazyRNG &	rng,
		Funnel &	funnel
	)		const

protected

Given a collection of zipcode trees, score the trees and do fragmenting on the best trees.

This will fill in the given vectors of fragments, fragment scores, etc.

If we do gapless extension, turn good full-length gapless extensions into alignments and return them in alignments Gapless extensions are considered good enough if they have fewer than default_max_extension_mismatches mismatches

◆ dump_chaining_problem()

void vg::MinimizerMapper::dump_chaining_problem	(	const std::vector< algorithms::Anchor > &	anchors,
		const std::vector< size_t > &	cluster_seeds_sorted,
		const HandleGraph &	graph
	)

staticprotected

Dump a whole chaining problem.

◆ dump_debug_clustering()

void vg::MinimizerMapper::dump_debug_clustering	(	const Cluster &	cluster,
		size_t	cluster_number,
		const VectorView< Minimizer > &	minimizers,
		const std::vector< Seed > &	seeds
	)

staticprotected

Print the seed content of a cluster.

◆ dump_debug_dotplot()

void vg::MinimizerMapper::dump_debug_dotplot	(	const std::string &	name,
		const VectorView< Minimizer > &	minimizers,
		const std::vector< Seed > &	seeds,
		const std::vector< std::pair< std::string, std::vector< std::vector< size_t >>>> &	seed_sets,
		const PathPositionHandleGraph *	path_graph
	)

staticprotected

Dump dotplot information for seeds. Displays one or more named collections of runs of seeds.

◆ dump_debug_extension_set()

void vg::MinimizerMapper::dump_debug_extension_set	(	const HandleGraph &	graph,
		const Alignment &	aln,
		const vector< GaplessExtension > &	extended_seeds
	)

staticprotected

Dump all the extansions in an extension set.

◆ dump_debug_graph()

void vg::MinimizerMapper::dump_debug_graph ( const HandleGraph & graph )

staticprotected

Dump a graph.

◆ dump_debug_minimizers()

void vg::MinimizerMapper::dump_debug_minimizers	(	const VectorView< Minimizer > &	minimizers,
		const string &	sequence,
		const vector< size_t > *	to_include = `nullptr`,
		size_t	start_offset = `0`,
		size_t	length_limit = `std::numeric_limits<size_t>::max()`
	)

staticprotected

Dump all the given minimizers, with optional subset restriction.

◆ dump_debug_query() [1/2]

void vg::MinimizerMapper::dump_debug_query ( const Alignment & aln )

staticprotected

Print information about a read to be aligned.

◆ dump_debug_query() [2/2]

void vg::MinimizerMapper::dump_debug_query	(	const Alignment &	aln1,
		const Alignment &	aln2
	)

staticprotected

Print information about a read pair to be aligned.

◆ dump_debug_seeds()

void vg::MinimizerMapper::dump_debug_seeds	(	const VectorView< Minimizer > &	minimizers,
		const std::vector< Seed > &	seeds,
		const std::vector< size_t > &	selected_seeds
	)

staticprotected

Print information about a selected set of seeds.

◆ dump_debug_sequence()

void vg::MinimizerMapper::dump_debug_sequence	(	ostream &	out,
		const string &	sequence,
		size_t	start_offset = `0`,
		size_t	length_limit = `std::numeric_limits<size_t>::max()`
	)

staticprotected

Print a sequence with base numbering.

◆ extend_seed_group()

vector< GaplessExtension > vg::MinimizerMapper::extend_seed_group	(	const std::vector< size_t > &	seed_group,
		size_t	source_num,
		const VectorView< Minimizer > &	minimizers,
		const std::vector< Seed > &	seeds,
		const string &	sequence,
		size_t	max_mismatches,
		vector< vector< size_t >> *	minimizer_kept_count = `nullptr`,
		Funnel *	funnel = `nullptr`,
		std::vector< std::vector< size_t >> *	seeds_used = `nullptr`
	)		const

protected

Extends the seeds in a cluster or other grouping into a collection of GaplessExtension objects.

If funnel is set, the group is intended to come from the previous funnel stage and will be introduced in this one.

If seeds_used is not null, it should be an empty vector that gets filled with, for each gapless extension, the numbers of the seeds in seeds that are subsumed into the extension. They will be sorted by the stapled base (first base for forward strand, last base for reverse strand) in the read.

Note that multiple gapless extensions might cover each seed position or use each seed.

◆ extension_to_alignment()

void vg::MinimizerMapper::extension_to_alignment	(	const GaplessExtension &	extension,
		Alignment &	alignment
	)		const

protected

Convert the GaplessExtension into an alignment. This assumes that the extension is a full-length alignment and that the sequence field of the alignment has been set.

◆ faster_cap()

double vg::MinimizerMapper::faster_cap	(	const VectorView< Minimizer > &	minimizers,
		vector< size_t > &	minimizers_explored,
		const string &	sequence,
		const string &	quality_bytes
	)

staticprotected

Compute a bound on the Phred score probability of a mapping beign wrong due to base errors and unlocated minimizer hits prevented us from finding the true alignment.

Algorithm uses a "sweep line" dynamic programming approach. For a read with minimizers aligned to it:

         000000000011111111112222222222
         012345678901234567890123456789

Read: ****************************** Minimizer 1: ***** Minimizer 2: ***** Minimizer 3: ***** Minimizer 4: *****

For each distinct read interval of overlapping minimizers, e.g. in the example the intervals 3,4,5; 6,7; 8,9,10; 18,19,20; 21,22; and 23,24,25 we consider base errors that would result in the minimizers in the interval being incorrect

We use dynamic programming sweeping left-to-right over the intervals to compute the probability of the minimum number of base errors needed to disrupt all the minimizers.

Will sort minimizers_explored (which is indices into minimizers) by minimizer start position.

◆ finalize_fragment_length_distr()

void vg::MinimizerMapper::finalize_fragment_length_distr ( )

inline

◆ find_chain_alignment()

Alignment vg::MinimizerMapper::find_chain_alignment	(	const Alignment &	aln,
		const VectorView< algorithms::Anchor > &	to_chain,
		const std::vector< size_t > &	chain,
		aligner_stats_t *	stats = `nullptr`
	)		const

protected

Turn a chain into an Alignment.

Operating on the given input alignment, align the tails and intervening sequences along the given chain of perfect-match seeds, and return an optimal Alignment.

If given base processing stats for bases and for time, adds aligned bases and consumed time to them.

◆ find_minimizers()

std::vector< MinimizerMapper::Minimizer > vg::MinimizerMapper::find_minimizers	(	const std::string &	sequence,
		Funnel &	funnel
	)		const

protected

Find the minimizers in the sequence using the minimizer index, and return them sorted in read order.

◆ find_optimal_tail_alignments()

void vg::MinimizerMapper::find_optimal_tail_alignments	(	const Alignment &	aln,
		const vector< GaplessExtension > &	extended_seeds,
		LazyRNG &	rng,
		Alignment &	best,
		Alignment &	second_best
	)		const

protected

Operating on the given input alignment, align the tails dangling off the given extended perfect-match seeds and produce an optimal alignment into the given output Alignment object, best, and the second best alignment into second_best.

Uses the given RNG to break ties.

◆ find_seeds()

std::vector< MinimizerMapper::Seed > vg::MinimizerMapper::find_seeds	(	const std::vector< Minimizer > &	minimizers_in_read_order,
		const VectorView< Minimizer > &	minimizers,
		const Alignment &	aln,
		Funnel &	funnel
	)		const

protected

Find seeds for all minimizers passing the filters. Takes in minimizers sorted in read order, and a view of them sorted in score order.

◆ fix_dozeu_end_deletions()

void vg::MinimizerMapper::fix_dozeu_end_deletions ( Alignment & rescued_alignment ) const

protected

When dozeu doesn't have any seeds, it's scan heuristic can lead to inaccurate anchoring with the end result that one end of the alignment has a deletion that doesn't connect to an aligned base. This function removes those deletions

◆ fix_dozeu_score()

void vg::MinimizerMapper::fix_dozeu_score	(	Alignment &	rescued_alignment,
		const HandleGraph &	rescue_graph,
		const std::vector< handle_t > &	topological_order
	)		const

protected

When we use dozeu for rescue, the reported alignment score is incorrect. 1) Dozeu only gives the full-length bonus once. 2) There is no penalty for a softclip at the edge of the subgraph. This function calculates the score correctly. If the score is <= 0, we realign the read using GSSW. TODO: This should be unnecessary.

◆ flag_repetitive_minimizers()

void vg::MinimizerMapper::flag_repetitive_minimizers ( std::vector< Minimizer > & minimizers_in_read_order ) const

protected

Flag minimizers as being in repetitive regions of the read

◆ for_each_agglomeration_interval()

void vg::MinimizerMapper::for_each_agglomeration_interval	(	const VectorView< Minimizer > &	minimizers,
		const string &	sequence,
		const string &	quality_bytes,
		const vector< size_t > &	minimizer_indices,
		const function< void(size_t, size_t, size_t, size_t)> &	iteratee
	)

staticprotected

Given a collection of minimizers, and a list of the minimizers we actually care about (as indices into the collection), iterate over common intervals of overlapping minimizer agglomerations.

Calls the given callback with (left, right, bottom, top), where left is the first base of the agglomeration interval (inclusive), right is the last base of the agglomeration interval (exclusive), bottom is the index of the first minimizer with an agglomeration in the interval and top is the index of the last minimizer with an agglomeration in the interval (exclusive).

minimizer_indices must be sorted by agglomeration end, and then by agglomeration start, so they can be decomposed into nice rectangles.

Note that bottom and top are offsets into minimizer_indices, NOT minimizers itself. Only contiguous ranges in minimizer_indices actually make sense.

◆ force_fragment_length_distr()

void vg::MinimizerMapper::force_fragment_length_distr	(	double	mean,
		double	stdev
	)

inline

◆ fragment_distr_is_finalized()

bool vg::MinimizerMapper::fragment_distr_is_finalized ( )

inline

◆ get_best_alignment_against_any_tree()

pair< Path, size_t > vg::MinimizerMapper::get_best_alignment_against_any_tree	(	const vector< TreeSubgraph > &	trees,
		const string &	sequence,
		const Position &	default_position,
		bool	pin_left,
		size_t	longest_detectable_gap,
		LazyRNG &	rng
	)		const

protected

Find the best alignment of the given sequence against any of the trees provided in trees, where each tree is a TreeSubgraph over the GBWT graph. Each tree subgraph is rooted at the left in its own local coordinate space, even if we are pinning on the right.

If no mapping is possible (for example, because there are no trees), produce a pure insert at default_position.

Alignment is always pinned.

If pin_left is true, pin the alignment on the left to the root of each tree. Otherwise pin it on the right to the root of each tree.

Limits the length of the longest gap to longest_detectable_gap.

Returns alignments in gbwt_graph space.

◆ get_best_chain_stats()

void vg::MinimizerMapper::get_best_chain_stats	(	Alignment &	aln,
		const ZipCodeForest &	zip_code_forest,
		const std::vector< Seed > &	seeds,
		const VectorView< MinimizerMapper::Minimizer > &	minimizers,
		const std::vector< std::vector< size_t >> &	fragments,
		const std::unordered_map< size_t, std::vector< size_t >> &	good_fragments_in,
		const std::vector< std::vector< size_t >> &	chains,
		const std::vector< size_t > &	chain_source_tree,
		const vector< algorithms::Anchor > &	seed_anchors,
		const std::vector< int > &	chain_score_estimates,
		bool &	best_chain_correct,
		double &	best_chain_coverage,
		size_t &	best_chain_longest_jump,
		double &	best_chain_average_jump,
		size_t &	best_chain_anchors,
		size_t &	best_chain_anchor_length,
		Funnel &	funnel
	)		const

protected

Collect stats about the best chains for annotating the final alignment

◆ get_distance_limit()

size_t vg::MinimizerMapper::get_distance_limit ( size_t read_length ) const

inline

Get the distance limit for the given read length

◆ get_fragment_length_mean()

double vg::MinimizerMapper::get_fragment_length_mean ( ) const

inline

◆ get_fragment_length_sample_size()

size_t vg::MinimizerMapper::get_fragment_length_sample_size ( ) const

inline

◆ get_fragment_length_stdev()

double vg::MinimizerMapper::get_fragment_length_stdev ( ) const

inline

◆ get_log10_prob_of_disruption_in_interval()

double vg::MinimizerMapper::get_log10_prob_of_disruption_in_interval	(	const VectorView< Minimizer > &	minimizers,
		const string &	sequence,
		const string &	quality_bytes,
		const vector< size_t >::iterator &	disrupt_begin,
		const vector< size_t >::iterator &	disrupt_end,
		size_t	left,
		size_t	right
	)

staticprotected

Gives the log10 prob of a base error in the given interval of the read, accounting for the disruption of specified minimizers.

minimizers is the collection of all minimizers

disrupt_begin and disrupt_end are iterators defining a sequence of indices of minimizers in minimizers that are disrupted.

left and right are the inclusive and exclusive bounds of the interval of the read where the disruption occurs.

◆ get_prob_of_disruption_in_column()

double vg::MinimizerMapper::get_prob_of_disruption_in_column	(	const VectorView< Minimizer > &	minimizers,
		const string &	sequence,
		const string &	quality_bytes,
		const vector< size_t >::iterator &	disrupt_begin,
		const vector< size_t >::iterator &	disrupt_end,
		size_t	index
	)

staticprotected

Gives the raw probability of a base error in the given column of the read, accounting for the disruption of specified minimizers.

minimizers is the collection of all minimizers

disrupt_begin and disrupt_end are iterators defining a sequence of indices of minimizers in minimizers that are disrupted.

index is the position in the read where the disruption occurs.

◆ get_read_coverage()

double vg::MinimizerMapper::get_read_coverage	(	const Alignment &	aln,
		const VectorView< std::vector< size_t >> &	seed_sets,
		const std::vector< Seed > &	seeds,
		const VectorView< Minimizer > &	minimizers
	)		const

protected

Get the fraction of read bases covered by the given chains/fragments of seeds. A base is covered if it is between the first and last endpoints in the read of any of the given lists of seeds. The lists of seeds are each assumed to be colinear in the read.

◆ get_tail_forest()

vector< TreeSubgraph > vg::MinimizerMapper::get_tail_forest	(	const GaplessExtension &	extended_seed,
		size_t	read_length,
		bool	left_tails,
		size_t *	longest_detectable_gap = `nullptr`
	)		const

protected

Get all the trees defining tails off the specified side of the specified gapless extension. Should only be called if a tail on that side exists, or this is a waste of time.

If the gapless extension starts or ends at a node boundary, there may be multiple trees produced, each with a distinct root.

If the gapless extension abuts the edge of the read, an empty forest will be produced.

Each tree is represented as a TreeSubgraph over our gbwt_graph.

If left_tails is true, the trees read out of the left sides of the gapless extension. Otherwise they read out of the right side.

As a side effect, saves the length of the longest detectable gap in an alignment of a tail to the forest into the provided location, if set.

◆ immutable_path_from_length()

size_t vg::MinimizerMapper::immutable_path_from_length ( const ImmutablePath & path )

staticprotected

Get the from length of an ImmutabelPath.

Can't be called path_from_length or it will shadow the one for Paths instead of overloading.

◆ log_alignment() [1/2]

string vg::MinimizerMapper::log_alignment ( const Alignment & aln )

staticprotected

Turn an Alignment into a conveniently-sized string for logging.

◆ log_alignment() [2/2]

string vg::MinimizerMapper::log_alignment	(	const Path &	path,
		bool	force_condensed = `false`
	)

staticprotected

Turn an Path from an alignment into a conveniently-sized string for logging.

◆ log_bits()

string vg::MinimizerMapper::log_bits ( const std::vector< bool > & bits )

staticprotected

Turn a list of bit flags into a compact representation.

◆ log_name()

string vg::MinimizerMapper::log_name ( )

staticprotected

Get the thread identifier prefix for logging.

◆ longest_detectable_gap_in_range()

size_t vg::MinimizerMapper::longest_detectable_gap_in_range	(	const Alignment &	aln,
		const std::string::const_iterator &	sequence_begin,
		const std::string::const_iterator &	sequence_end,
		const GSSWAligner *	aligner
	)

staticprotected

Determine the gap limit to use when aligning the given range of sequence bases for the given Alignment.

Accounts for the lognest gap that could be detected anywhere in the range, not just at the very beginning or the very end, or at a single point like GSSWAligner::longest_detectable_gap().

◆ map() [1/2]

vector< Alignment > vg::MinimizerMapper::map ( Alignment & aln )

Map the given read. Return a vector of alignments that it maps to, winner first.

◆ map() [2/2]

void vg::MinimizerMapper::map	(	Alignment &	aln,
		AlignmentEmitter &	alignment_emitter
	)

Map the given read, and send output to the given AlignmentEmitter. May be run from any thread. TODO: Can't be const because the clusterer's cluster_seeds isn't const.

◆ map_from_chains()

vector< Alignment > vg::MinimizerMapper::map_from_chains ( Alignment & aln )

Map the given read using chaining of seeds. Return a vector of alignments that it maps to, winner first.

◆ map_from_extensions()

vector< Alignment > vg::MinimizerMapper::map_from_extensions ( Alignment & aln )

Map the given read using gapless extensions. Return a vector of alignments that it maps to, winner first.

◆ map_paired() [1/2]

pair< vector< Alignment >, vector< Alignment > > vg::MinimizerMapper::map_paired	(	Alignment &	aln1,
		Alignment &	aln2
	)

Map the given pair of reads, where aln1 is upstream of aln2 and they are oriented towards each other in the graph.

If the fragment length distribution is not yet fixed, reads will be mapped independently. Otherwise, they will be mapped according to the fragment length distribution.

◆ map_paired() [2/2]

pair< vector< Alignment >, vector< Alignment > > vg::MinimizerMapper::map_paired	(	Alignment &	aln1,
		Alignment &	aln2,
		vector< pair< Alignment, Alignment >> &	ambiguous_pair_buffer
	)

Map the given pair of reads, where aln1 is upstream of aln2 and they are oriented towards each other in the graph.

If the reads are ambiguous and there's no fragment length distribution fixed yet, they will be dropped into ambiguous_pair_buffer.

Otherwise, at least one result will be returned for them (although it may be the unmapped alignment).

◆ no_chain_info()

static gbwtgraph::Payload vg::MinimizerMapper::no_chain_info ( )

inlinestaticprotected

How should we initialize chain info when it's not stored in the minimizer index?

◆ pair_all()

void vg::MinimizerMapper::pair_all ( std::array< vector< Alignment >, 2 > & mappings ) const

protected

Set pair partner references for paired mapping results.

◆ pick_mappings_from_alignments()

void vg::MinimizerMapper::pick_mappings_from_alignments	(	Alignment &	aln,
		const std::vector< Alignment > &	alignments,
		const std::vector< double > &	multiplicity_by_alignment,
		const std::vector< size_t > &	alignments_to_source,
		const std::vector< int > &	chain_score_estimates,
		std::vector< Alignment > &	mappings,
		std::vector< double > &	scores,
		std::vector< double > &	multiplicity_by_mapping,
		bool &	funnel_depleted,
		LazyRNG &	rng,
		Funnel &	funnel
	)		const

protected

◆ process_until_threshold_a()

template<typename Score >

void vg::MinimizerMapper::process_until_threshold_a	(	size_t	items,
		const function< Score(size_t)> &	get_score,
		double	threshold,
		size_t	min_count,
		size_t	max_count,
		LazyRNG &	rng,
		const function< bool(size_t, size_t)> &	process_item,
		const function< void(size_t)> &	discard_item_by_count,
		const function< void(size_t)> &	discard_item_by_score
	)		const

protected

Given a count of items, a function to get the score of each, a score-difference-from-the-best cutoff, a min and max processed item count, and a function to get a sort-shuffling seed for breaking ties, process items in descending score order by calling process_item with the item's number and the number of other items with the same or better score, until min_count items are processed and either max_count items are processed or the score difference threshold is hit (or we run out of items).

If process_item returns false, the item is skipped and does not count against min_count or max_count.

Call discard_item_by_count with the item's number for all remaining items that would pass the score threshold.

Call discard_item_by_score with the item's number for all remaining items that would fail the score threshold.

◆ process_until_threshold_b()

template<typename Score >

void vg::MinimizerMapper::process_until_threshold_b	(	const vector< Score > &	scores,
		double	threshold,
		size_t	min_count,
		size_t	max_count,
		LazyRNG &	rng,
		const function< bool(size_t, size_t)> &	process_item,
		const function< void(size_t)> &	discard_item_by_count,
		const function< void(size_t)> &	discard_item_by_score
	)		const

protected

Same as the other process_until_threshold functions, except using a vector to supply scores.

◆ process_until_threshold_c()

template<typename Score >

void vg::MinimizerMapper::process_until_threshold_c	(	size_t	items,
		const function< Score(size_t)> &	get_score,
		const function< bool(size_t, size_t)> &	comparator,
		double	threshold,
		size_t	min_count,
		size_t	max_count,
		LazyRNG &	get_seed,
		const function< bool(size_t, size_t)> &	process_item,
		const function< void(size_t)> &	discard_item_by_count,
		const function< void(size_t)> &	discard_item_by_score
	)		const

protected

Same as the other process_until_threshold functions, except user supplies comparator to sort the items (must still be sorted by score).

◆ score_alignment_pair()

double vg::MinimizerMapper::score_alignment_pair	(	Alignment &	aln1,
		Alignment &	aln2,
		int64_t	fragment_distance
	)

protected

Score a pair of alignments given the distance between them

◆ score_cluster()

void vg::MinimizerMapper::score_cluster	(	Cluster &	cluster,
		size_t	i,
		const VectorView< Minimizer > &	minimizers,
		const std::vector< Seed > &	seeds,
		size_t	seq_length,
		Funnel &	funnel
	)		const

protected

Determine cluster score, read coverage, and a vector of flags for the minimizers present in the cluster. Score is the sum of the scores of distinct minimizers in the cluster, while read coverage is the fraction of the read covered by seeds in the cluster.

Puts the cluster in the funnel as coming from its seeds.

◆ score_extension_group()

int vg::MinimizerMapper::score_extension_group	(	const Alignment &	aln,
		const vector< GaplessExtension > &	extended_seeds,
		int	gap_open_penalty,
		int	gap_extend_penalty
	)

staticprotected

Score the given group of gapless extensions. Determines the best score that can be obtained by chaining extensions together, using the given gap open and gap extend penalties to charge for either overlaps or gaps in coverage of the read.

Enforces that overlaps cannot result in containment.

Input extended seeds must be sorted by start position.

◆ score_extensions() [1/2]

std::vector< int > vg::MinimizerMapper::score_extensions	(	const std::vector< std::pair< std::vector< GaplessExtension >, size_t >> &	extensions,
		const Alignment &	aln,
		Funnel &	funnel
	)		const

protected

Score the set of extensions for each cluster using score_extension_group(). Return the scores in the same order as the extensions.

This version allows the collections of extensions to be scored to come with annotating read numbers, which are ignored.

◆ score_extensions() [2/2]

std::vector< int > vg::MinimizerMapper::score_extensions	(	const std::vector< std::vector< GaplessExtension >> &	extensions,
		const Alignment &	aln,
		Funnel &	funnel
	)		const

protected

Score the set of extensions for each cluster using score_extension_group(). Return the scores in the same order as the extension groups.

◆ score_tree()

std::pair< double, double > vg::MinimizerMapper::score_tree	(	const ZipCodeForest &	zip_code_forest,
		size_t	i,
		const VectorView< Minimizer > &	minimizers,
		const std::vector< Seed > &	seeds,
		size_t	seq_length,
		Funnel &	funnel
	)		const

protected

Determine score and read coverage for a zip code tree. Score is the sum of the scores of distinct minimizers in the tree, while read coverage is the fraction of the read covered by seeds in the tree.

Puts the tree in the funnel as coming from its seeds.

◆ seeds_in_subgraph()

GaplessExtender::cluster_type vg::MinimizerMapper::seeds_in_subgraph	(	const VectorView< Minimizer > &	minimizers,
		const std::unordered_set< nid_t > &	subgraph
	)		const

protected

Return the all non-redundant seeds in the subgraph, including those from minimizers not used for mapping.

◆ set_alignment_scores() [1/4]

void vg::AlignerClient::set_alignment_scores

Set the algner scoring parameters and create the stored aligner instances. The score matrix should by a 4 x 4 array in the order (ACGT). Other overloads of set_alignment_scores all call this one. Note that an override of this method can't be called from the constructor, so when overriding it, make sure to also do your extra work in the constructor.

◆ set_alignment_scores() [2/4]

void vg::MinimizerMapper::set_alignment_scores	(	const int8_t *	score_matrix,
		int8_t	gap_open,
		int8_t	gap_extend,
		int8_t	full_length_bonus
	)

virtual

Set the algner scoring parameters and create the stored aligner instances. The score matrix should by a 4 x 4 array in the order (ACGT). Other overloads of set_alignment_scores all call this one. Note that an override of this method can't be called from the constructor, so when overriding it, make sure to also do your extra work in the constructor.

Reimplemented from vg::AlignerClient.

◆ set_alignment_scores() [3/4]

void vg::AlignerClient::set_alignment_scores

Set all the aligner scoring parameters and create the stored aligner instances.

◆ set_alignment_scores() [4/4]

void vg::AlignerClient::set_alignment_scores

Set the algner scoring parameters and create the stored aligner instances. The stream should contain a 4 x 4 whitespace-separated substitution matrix (in the order ACGT)

◆ sort_minimizers_by_score()

std::vector< size_t > vg::MinimizerMapper::sort_minimizers_by_score	(	const std::vector< Minimizer > &	minimizers_in_read_order,
		LazyRNG &	rng
	)		const

protected

Return the indices of all the minimizers, sorted in descending order by their minimizers' scores.

◆ tag_seeds()

void vg::MinimizerMapper::tag_seeds	(	const Alignment &	aln,
		const std::vector< Seed >::const_iterator &	begin,
		const std::vector< Seed >::const_iterator &	end,
		const VectorView< Minimizer > &	minimizers,
		size_t	funnel_offset,
		Funnel &	funnel
	)		const

protected

If tracking correctness, mark seeds that are correctly mapped as correct in the funnel, based on proximity along paths to the input read's refpos. Otherwise, tag just as placed, with the seed's read interval. Assumes we are tracking provenance.

◆ to_anchor() [1/2]

algorithms::Anchor vg::MinimizerMapper::to_anchor	(	const Alignment &	aln,
		const VectorView< Minimizer > &	minimizers,
		std::vector< Seed > &	seeds,
		size_t	seed_number,
		const HandleGraph &	graph,
		const Aligner *	aligner
	)

staticprotected

Convert a single seed to a single chaining anchor.

◆ to_anchor() [2/2]

algorithms::Anchor vg::MinimizerMapper::to_anchor	(	const Alignment &	aln,
		size_t	read_start,
		size_t	read_end,
		const std::vector< size_t > &	sorted_seeds,
		const std::vector< algorithms::Anchor > &	seed_anchors,
		const std::vector< size_t >::const_iterator &	mismatch_begin,
		const std::vector< size_t >::const_iterator &	mismatch_end,
		const HandleGraph &	graph,
		const Aligner *	aligner
	)

staticprotected

Convert a read region, and the seeds that that region covers the stapled bases of (sorted by stapled base), into a single chaining anchor. Takes an iterator range of positions within the base range that are mismatches.

◆ to_anchors()

std::vector< algorithms::Anchor > vg::MinimizerMapper::to_anchors	(	const Alignment &	aln,
		const VectorView< Minimizer > &	minimizers,
		std::vector< Seed > &	seeds
	)		const

protected

Convert a collection of seeds to a collection of chaining anchors.

◆ to_path()

Path vg::MinimizerMapper::to_path ( const ImmutablePath & path )

staticprotected

Convert an ImmutablePath to a Path.

◆ to_wfa_alignment()

WFAAlignment vg::MinimizerMapper::to_wfa_alignment	(	const algorithms::Anchor &	anchor,
		const Alignment &	aln,
		const Aligner *	aligner
	)		const

protected

Convert an Anchor to a WFAAlignment, given the input read it is from and the Aligner to use for scoring. Accounts for fuill length bonuses if the anchor abuts the end of the read.

◆ unoriented_distance_between()

int64_t vg::MinimizerMapper::unoriented_distance_between	(	const pos_t &	pos1,
		const pos_t &	pos2
	)		const

protected

Get the unoriented distance between a pair of positions

◆ validate_clusters()

bool vg::MinimizerMapper::validate_clusters	(	const std::vector< std::vector< Cluster >> &	clusters,
		const std::vector< std::vector< Seed >> &	seeds,
		size_t	read_limit,
		size_t	fragment_limit
	)		const

protected

Do a brute check of the clusters. Print errors to stderr.

◆ wfa_alignment_to_alignment()

void vg::MinimizerMapper::wfa_alignment_to_alignment	(	const WFAAlignment &	wfa_alignment,
		Alignment &	alignment
	)		const

protected

Convert a WFAAlignment into a vg Alignment. This assumes that the WFAAlignment is a full-length alignment and that the sequence field of the vg Alignment has been set.

◆ window_breaking_quality()

static double vg::MinimizerMapper::window_breaking_quality	(	const VectorView< Minimizer > &	minimizers,
		vector< size_t > &	broken,
		const string &	sequence,
		const string &	quality_bytes
	)

staticprotected

Compute a bound on the Phred score probability of having created the agglomerations of the specified minimizers by base errors from the given sequence, which was sequenced with the given qualities.

No limit is imposed if broken is empty.

Takes the collection of all minimizers found, and a vector of the indices of minimizers we are interested in the agglomerations of. May modify the order of that index vector.

Also takes the sequence of the read (to avoid Ns) and the quality string (interpreted as a byte array).

Currently computes a lower-score-bound, upper-probability-bound, suitable for use as a mapping quality cap, by assuming the easiest-to-disrupt possible layout of the windows, and the lowest possible qualities for the disrupting bases.

◆ with_dagified_local_graph()

void vg::MinimizerMapper::with_dagified_local_graph	(	const pos_t &	left_anchor,
		const pos_t &	right_anchor,
		size_t	max_path_length,
		const HandleGraph &	graph,
		const std::function< void(DeletableHandleGraph &, const handle_t &, const handle_t &, const std::function< std::pair< nid_t, bool >(const handle_t &)> &)> &	callback
	)

staticprotected

Clip out the part of the graph between the given positions (left facing into the region to be extracted and right facing out), and dagify it from the perspective of the anchors. If a left anchor is set, all heads should correspond to the left anchor, and if a right anchor is set, all tails should correspond to the right anchor. At least one anchor must be set. Both anchors may be on the same node.

Calls the callback with an extracted, strand-split, dagified graph, the handles for the anchoring copies of the left and right anchor nodes (if not empty), facing the same way as the anchoring positions, and a function that translates from handle in the dagified graph to node ID and orientation in the base graph.

Friends And Related Function Documentation

◆ TestMinimizerMapper

friend class TestMinimizerMapper

friend

Member Data Documentation

◆ align_from_chains

bool vg::MinimizerMapper::align_from_chains = default_align_from_chains

◆ chain_score_threshold

double vg::MinimizerMapper::chain_score_threshold = default_chain_score_threshold

◆ choose_band_padding

std::function<size_t(const Alignment&, const HandleGraph&)> vg::MinimizerMapper::choose_band_padding

protected

We have a function for determinign band paddding for banded alignment when aligning from chains.

◆ cluster_coverage_threshold

double vg::MinimizerMapper::cluster_coverage_threshold = default_cluster_coverage_threshold

◆ cluster_score_threshold

double vg::MinimizerMapper::cluster_score_threshold = default_cluster_score_threshold

◆ clusterer

SnarlDistanceIndexClusterer vg::MinimizerMapper::clusterer

protected

We have a clusterer.

◆ default_align_from_chains

constexpr bool vg::MinimizerMapper::default_align_from_chains = false

staticconstexpr

If true, produce alignments from extension sets by chaining gapless extensions up and aligning the sequences between them. If false, produce alignments by aligning the tails off of individual gapless extensions.

◆ default_chain_score_threshold

constexpr double vg::MinimizerMapper::default_chain_score_threshold = 100

staticconstexpr

If a chain's score is smaller than the best chain's score by more than this much, don't align it

◆ default_cluster_coverage_threshold

constexpr double vg::MinimizerMapper::default_cluster_coverage_threshold = 0.3

staticconstexpr

If the read coverage of a cluster is less than the best coverage of any tree by more than this much, don't extend it

◆ default_cluster_score_threshold

constexpr double vg::MinimizerMapper::default_cluster_score_threshold = 50

staticconstexpr

this much, then don't extend it

◆ default_distance_limit

constexpr size_t vg::MinimizerMapper::default_distance_limit = 200

staticconstexpr

◆ default_do_dp

constexpr bool vg::MinimizerMapper::default_do_dp = true

staticconstexpr

If false, skip computing base-level alignments.

◆ default_exclude_overlapping_min

constexpr bool vg::MinimizerMapper::default_exclude_overlapping_min = false

staticconstexpr

If set, exclude overlapping minimizers.

◆ default_extension_score_threshold

constexpr int vg::MinimizerMapper::default_extension_score_threshold = 1

staticconstexpr

◆ default_extension_set_min_score

constexpr int vg::MinimizerMapper::default_extension_set_min_score = 20

staticconstexpr

Even if we would have fewer than min_extension_sets results, don't process anything with a score smaller than this.

◆ default_extension_set_score_threshold

constexpr double vg::MinimizerMapper::default_extension_set_score_threshold = 20

staticconstexpr

◆ default_fragment_gap_scale

constexpr double vg::MinimizerMapper::default_fragment_gap_scale = 1.0

staticconstexpr

How much of a multiple should we apply to each transition's gap penalty at fragmenting?

◆ default_fragment_max_graph_lookback_bases

constexpr size_t vg::MinimizerMapper::default_fragment_max_graph_lookback_bases = 300

staticconstexpr

How many bases should we look back in the graph when making fragments?

◆ default_fragment_max_graph_lookback_bases_per_base

constexpr double vg::MinimizerMapper::default_fragment_max_graph_lookback_bases_per_base = 0.03

staticconstexpr

How many bases should we look back in the graph when making fragments, per base of read length?

◆ default_fragment_max_indel_bases

constexpr size_t vg::MinimizerMapper::default_fragment_max_indel_bases = 2000

staticconstexpr

How many bases of indel should we allow in fragments?

◆ default_fragment_max_indel_bases_per_base

constexpr double vg::MinimizerMapper::default_fragment_max_indel_bases_per_base = 0.2

staticconstexpr

How many bases of indel should we allow in fragments per base of read length?

◆ default_fragment_max_min_score

constexpr double vg::MinimizerMapper::default_fragment_max_min_score = std::numeric_limits<double>::max()

staticconstexpr

How high should we get the score threshold based on the best fragment's score get?

◆ default_fragment_max_read_lookback_bases

constexpr size_t vg::MinimizerMapper::default_fragment_max_read_lookback_bases = std::numeric_limits<size_t>::max()

staticconstexpr

How many bases should we look back in the read when making fragments?

◆ default_fragment_max_read_lookback_bases_per_base

constexpr double vg::MinimizerMapper::default_fragment_max_read_lookback_bases_per_base = 1.0

staticconstexpr

How many bases should we look back in the read when making fragments, per base of read length?

◆ default_fragment_min_score

constexpr double vg::MinimizerMapper::default_fragment_min_score = 60

staticconstexpr

What minimum score in points should a fragment have in order to keep it? Needs to be set to some kind of significance threshold.

◆ default_fragment_points_per_possible_match

constexpr double vg::MinimizerMapper::default_fragment_points_per_possible_match = 0

staticconstexpr

◆ default_fragment_score_fraction

constexpr double vg::MinimizerMapper::default_fragment_score_fraction = 0.1

staticconstexpr

How good should a fragment be in order to keep it? Fragments with scores less than this fraction of the best fragment's score will not be used.

◆ default_fragment_set_score_threshold

constexpr double vg::MinimizerMapper::default_fragment_set_score_threshold = 0

staticconstexpr

If a fragment set's score is smaller than the best fragment set's score by more than this much, don't align it

◆ default_gap_scale

constexpr double vg::MinimizerMapper::default_gap_scale = 1.0

staticconstexpr

How much of a multiple should we apply to each transition's gap penalty at chaining?

◆ default_gapless_extension_limit

constexpr size_t vg::MinimizerMapper::default_gapless_extension_limit = 0

staticconstexpr

Do gapless extension to the seeds in each tree before fragmenting the tree if the read length is less than the limit.

◆ default_hard_hit_cap

constexpr size_t vg::MinimizerMapper::default_hard_hit_cap = 500

staticconstexpr

Ignore all minimizers with more than hard_hit_cap hits.

◆ default_hit_cap

constexpr size_t vg::MinimizerMapper::default_hit_cap = 10

staticconstexpr

Use all minimizers with at most hit_cap hits.

◆ default_item_bonus

constexpr int vg::MinimizerMapper::default_item_bonus = 0

staticconstexpr

How much of a bonus should we give to each item in fragmenting/chaining?

◆ default_item_scale

constexpr double vg::MinimizerMapper::default_item_scale = 1.0

staticconstexpr

How much of a multiple should we apply to each item's non-bonus score in fragmenting/chaining?

◆ default_mapq_score_scale

constexpr double vg::MinimizerMapper::default_mapq_score_scale = 1.0

staticconstexpr

How should we scale scores before mapq, for calibration.

◆ default_mapq_score_window

constexpr size_t vg::MinimizerMapper::default_mapq_score_window = 0

staticconstexpr

What number of bp should we re-scale scores to for MAPQ, for calibration? 0 for off.

◆ default_max_alignments

constexpr size_t vg::MinimizerMapper::default_max_alignments = 8

staticconstexpr

How many alignments should we make, max?

◆ default_max_chain_connection

constexpr size_t vg::MinimizerMapper::default_max_chain_connection = 100

staticconstexpr

When converting chains to alignments, what's the longest gap between items we will try to WFA align? Passing strings longer than ~100bp can cause WFAAligner to run for a pathologically long amount of time. May not be 0.

◆ default_max_chaining_problems

constexpr int vg::MinimizerMapper::default_max_chaining_problems = std::numeric_limits<int>::max()

staticconstexpr

Do no more than this many chaining problems.

◆ default_max_chains_per_tree

constexpr size_t vg::MinimizerMapper::default_max_chains_per_tree = 1

staticconstexpr

Allow up to this many chains per tree.

◆ default_max_direct_to_chain

constexpr size_t vg::MinimizerMapper::default_max_direct_to_chain = 0

staticconstexpr

Sometimes we don't do chaining but instead turn fragments directly into chains If this is 0, then do chaining. Otherwise take up to this many fragments and turn them into chains

◆ default_max_dozeu_cells

constexpr size_t vg::MinimizerMapper::default_max_dozeu_cells = (size_t)(1.5 * 1024 * 1024)

staticconstexpr

How big of an alignment in POA cells should we ever try to do with Dozeu? TODO: Lift this when Dozeu's allocator is able to work with >4 MB of memory. Each cell is 16 bits in Dozeu, and we leave some room for the query and padding to full SSE registers. Note that a very chopped graph might still break this!

◆ default_max_dp_cells

constexpr size_t vg::MinimizerMapper::default_max_dp_cells = std::numeric_limits<size_t>::max()

staticconstexpr

How many DP cells should we be willing to do for an end-pinned alignment? If we want to do more than this, just leave tail unaligned.

◆ default_max_extension_mismatches

constexpr size_t vg::MinimizerMapper::default_max_extension_mismatches = GaplessExtender::MAX_MISMATCHES

staticconstexpr

How many mismatches should we allow in gapless extension (except for start node where the limit doesn't count)?

◆ default_max_extensions

constexpr size_t vg::MinimizerMapper::default_max_extensions = 800

staticconstexpr

How many clusters should we produce gapless extensions for, max?

◆ default_max_fragment_length

constexpr size_t vg::MinimizerMapper::default_max_fragment_length = 2000

staticconstexpr

What is the maximum fragment length that we accept as valid for paired-end reads?

◆ default_max_fragments

constexpr size_t vg::MinimizerMapper::default_max_fragments = std::numeric_limits<size_t>::max()

staticconstexpr

How many fragments should we try and make when fragmenting something?

◆ default_max_graph_lookback_bases

constexpr size_t vg::MinimizerMapper::default_max_graph_lookback_bases = 3000

staticconstexpr

How many bases should we look back in the graph when chaining?

◆ default_max_graph_lookback_bases_per_base

constexpr double vg::MinimizerMapper::default_max_graph_lookback_bases_per_base = 0.3

staticconstexpr

How many bases should we look back in the graph when chaining, per base of read length?

◆ default_max_indel_bases

constexpr size_t vg::MinimizerMapper::default_max_indel_bases = 2000

staticconstexpr

How many bases of indel should we allow in chaining?

◆ default_max_indel_bases_per_base

constexpr double vg::MinimizerMapper::default_max_indel_bases_per_base = 0.2

staticconstexpr

How many bases of indel should we allow in chaining, per base of read length?

◆ default_max_local_extensions

constexpr size_t vg::MinimizerMapper::default_max_local_extensions = numeric_limits<size_t>::max()

staticconstexpr

How many extensions should we try as seeds within a mapping location?

◆ default_max_middle_dp_length

constexpr size_t vg::MinimizerMapper::default_max_middle_dp_length = std::numeric_limits<int32_t>::max()

staticconstexpr

How long of a DP can we do before something might go wrong with BandedGlobalAligner or the GBWT-based WFA?

◆ default_max_middle_gap

constexpr size_t vg::MinimizerMapper::default_max_middle_gap = std::numeric_limits<size_t>::max()

staticconstexpr

How many gap bases should we allow in a between-seed alignment, max?

◆ default_max_min_chain_score

constexpr int vg::MinimizerMapper::default_max_min_chain_score = 200

staticconstexpr

Limit the min chain score to no more than this.

◆ default_max_multimaps

constexpr size_t vg::MinimizerMapper::default_max_multimaps = 1

staticconstexpr

◆ default_max_read_lookback_bases

constexpr size_t vg::MinimizerMapper::default_max_read_lookback_bases = std::numeric_limits<size_t>::max()

staticconstexpr

How many bases should we look back in the read when chaining?

◆ default_max_read_lookback_bases_per_base

constexpr double vg::MinimizerMapper::default_max_read_lookback_bases_per_base = 1.0

staticconstexpr

How many bases should we look back in the read when chaining, per base of read length?

◆ default_max_rescue_attempts

constexpr size_t vg::MinimizerMapper::default_max_rescue_attempts = 15

staticconstexpr

For paired end mapping, how many times should we attempt rescue (per read)?

◆ default_max_skipped_bases

constexpr size_t vg::MinimizerMapper::default_max_skipped_bases = 0

staticconstexpr

When turning chains into alignments, we can skip seeds to create gaps up to this length in the graph

◆ default_max_tail_dp_length

constexpr size_t vg::MinimizerMapper::default_max_tail_dp_length = 30000

staticconstexpr

How long of a DP can we do before Dozeu gets lost at traceback due to 16-bit score overflow?

◆ default_max_tail_gap

constexpr size_t vg::MinimizerMapper::default_max_tail_gap = std::numeric_limits<size_t>::max()

staticconstexpr

How many gap bases should we allow in a Dozeu tail alignment, max?

◆ default_max_tail_length

constexpr size_t vg::MinimizerMapper::default_max_tail_length = 100

staticconstexpr

Similarly, what is the maximum tail length we will try to WFA align?

◆ default_max_to_fragment

constexpr size_t vg::MinimizerMapper::default_max_to_fragment = 10

staticconstexpr

How many things should we produce fragments for, max?

◆ default_max_unique_min

constexpr size_t vg::MinimizerMapper::default_max_unique_min = 500

staticconstexpr

Maximum number of distinct minimizers to take.

◆ default_min_chain_score_per_base

constexpr double vg::MinimizerMapper::default_min_chain_score_per_base = 0.01

staticconstexpr

Even if we would have fewer than min_chains results, don't process anything with a score smaller than this, per read base.

◆ default_min_chaining_problems

constexpr int vg::MinimizerMapper::default_min_chaining_problems = 1

staticconstexpr

Disregard the fragment set score thresholds when they would give us fewer than this many chainign problems done.

◆ default_min_chains

constexpr int vg::MinimizerMapper::default_min_chains = 4

staticconstexpr

Disregard the chain score thresholds when they would give us fewer than this many chains aligned.

◆ default_min_extension_sets

constexpr int vg::MinimizerMapper::default_min_extension_sets = 2

staticconstexpr

Disregard the extension set score thresholds when they would give us fewer than this many extension sets.

◆ default_min_extensions

constexpr size_t vg::MinimizerMapper::default_min_extensions = 2

staticconstexpr

Accept at least this many clusters for gapless extension.

◆ default_min_to_fragment

constexpr size_t vg::MinimizerMapper::default_min_to_fragment = 4

staticconstexpr

How many things should we produce fragments for, min?

◆ default_min_unique_node_fraction

constexpr double vg::MinimizerMapper::default_min_unique_node_fraction = 0.0

staticconstexpr

How much of an alignment needs to be from distinct nodes to be a distinct alignment?

◆ default_minimizer_coverage_flank

constexpr size_t vg::MinimizerMapper::default_minimizer_coverage_flank = 250

staticconstexpr

◆ default_minimizer_downsampling_max_window_length

constexpr size_t vg::MinimizerMapper::default_minimizer_downsampling_max_window_length = std::numeric_limits<size_t>::max()

staticconstexpr

◆ default_minimizer_downsampling_window_count

constexpr size_t vg::MinimizerMapper::default_minimizer_downsampling_window_count = 0

staticconstexpr

Window count for minimizer downsampling.

◆ default_minimizer_score_fraction

constexpr double vg::MinimizerMapper::default_minimizer_score_fraction = 0.9

staticconstexpr

Take minimizers between hit_cap and hard_hit_cap hits until this fraction of total score

◆ default_num_bp_per_min

constexpr size_t vg::MinimizerMapper::default_num_bp_per_min = 1000

staticconstexpr

Number of minimzers to select based on read_len/num_min_per_bp.

◆ default_pad_cluster_score_threshold

constexpr double vg::MinimizerMapper::default_pad_cluster_score_threshold = 20

staticconstexpr

If the second best cluster's score is no more than this many points below the cutoff set by cluster_score_threshold, snap that cutoff down to the second best cluster's score, to avoid throwing away promising secondaries.

◆ default_pad_zipcode_tree_score_threshold

constexpr double vg::MinimizerMapper::default_pad_zipcode_tree_score_threshold = 20

staticconstexpr

If the second best tree's score is no more than this many points below the cutoff set by zipcode_tree_score_threshold, snap that cutoff down to the second best tree's score, to avoid throwing away promising secondaries.

◆ default_paired_distance_stdevs

constexpr double vg::MinimizerMapper::default_paired_distance_stdevs = 2.0

staticconstexpr

◆ default_paired_rescue_score_limit

constexpr double vg::MinimizerMapper::default_paired_rescue_score_limit = 0.9

staticconstexpr

How close does an alignment have to be to the best alignment for us to rescue on it.

◆ default_points_per_possible_match

constexpr double vg::MinimizerMapper::default_points_per_possible_match = 0

staticconstexpr

◆ default_rescue_likelihood_limit

constexpr double vg::MinimizerMapper::default_rescue_likelihood_limit = 0.05

staticconstexpr

For rescure, how likely can an alignment be by chance to still accept it?

◆ default_rescue_seed_limit

constexpr size_t vg::MinimizerMapper::default_rescue_seed_limit = 100

staticconstexpr

Do not attempt rescue if there are more seeds in the rescue subgraph.

◆ default_rescue_subgraph_stdevs

constexpr double vg::MinimizerMapper::default_rescue_subgraph_stdevs = 4.0

staticconstexpr

How many stdevs from the mean do we extract a subgraph from?

◆ default_set_refpos

constexpr bool vg::MinimizerMapper::default_set_refpos = false

staticconstexpr

Set refpos field of alignments to positions on nodes they visit.

◆ default_show_work

constexpr bool vg::MinimizerMapper::default_show_work = false

staticconstexpr

If set, log what the mapper is thinking in its mapping of each read.

◆ default_sort_by_chain_score

constexpr bool vg::MinimizerMapper::default_sort_by_chain_score = false

staticconstexpr

Should alignments be ranked by chain score instead of base-level score?

◆ default_track_correctness

constexpr bool vg::MinimizerMapper::default_track_correctness = false

staticconstexpr

Guess which seed hits are correct by location in the linear reference and track if/when their descendants make it through stages of the algorithm. Only works if track_provenance is true.

◆ default_track_position

constexpr bool vg::MinimizerMapper::default_track_position = false

staticconstexpr

Track linear reference position for placements in log output.

◆ default_track_provenance

constexpr bool vg::MinimizerMapper::default_track_provenance = false

staticconstexpr

Track which internal work items came from which others during each stage of the mapping algorithm.

◆ default_use_explored_cap

constexpr bool vg::MinimizerMapper::default_use_explored_cap = false

staticconstexpr

If set, cap mapping quality based on minimizer layout in the read. Only really likely to help for short reads.

◆ default_wfa_distance

constexpr int vg::MinimizerMapper::default_wfa_distance = WFAExtender::ErrorModel::default_distance().min

staticconstexpr

How far behind the leader should the WFA be allowed to get?

◆ default_wfa_distance_per_base

constexpr double vg::MinimizerMapper::default_wfa_distance_per_base = WFAExtender::ErrorModel::default_distance().per_base

staticconstexpr

How far behind the leader should the WFA be allowed to get, per base of read sequence?

◆ default_wfa_max_distance

constexpr int vg::MinimizerMapper::default_wfa_max_distance = WFAExtender::ErrorModel::default_distance().max

staticconstexpr

How far behind the leader should the WFA be allowed to get, at any read length?

◆ default_wfa_max_max_mismatches

constexpr int vg::MinimizerMapper::default_wfa_max_max_mismatches = 20

staticconstexpr

How many mismatch bases (or equivalent score of indels) should we allow in WFA connections and tails maximum, at any read length?

◆ default_wfa_max_mismatches

constexpr int vg::MinimizerMapper::default_wfa_max_mismatches = 2

staticconstexpr

How many mismatch bases (or equivalent score of indels) should we allow in WFA connections and tails?

◆ default_wfa_max_mismatches_per_base

constexpr double vg::MinimizerMapper::default_wfa_max_mismatches_per_base = 0.1

staticconstexpr

How many mismatch bases (or equivalent score of indels) should we allow in WFA connections and tails per base of read sequence?

◆ default_zipcode_tree_coverage_threshold

constexpr double vg::MinimizerMapper::default_zipcode_tree_coverage_threshold = 0.3

staticconstexpr

If the read coverage of a tree is less than the best coverage of any tree by more than this much, don't extend it

◆ default_zipcode_tree_scale

constexpr double vg::MinimizerMapper::default_zipcode_tree_scale = 2.0

staticconstexpr

When making zipcode trees, at what multiple of the read length should the trees be split?

◆ default_zipcode_tree_score_threshold

constexpr double vg::MinimizerMapper::default_zipcode_tree_score_threshold = 50

staticconstexpr

How far do we want to go down looking at zip code trees to make fragments?

◆ distance_index

SnarlDistanceIndex* vg::MinimizerMapper::distance_index

protected

◆ distance_limit

size_t vg::MinimizerMapper::distance_limit = default_distance_limit

◆ do_dp

bool vg::MinimizerMapper::do_dp = default_do_dp

◆ exclude_overlapping_min

bool vg::MinimizerMapper::exclude_overlapping_min = default_exclude_overlapping_min

◆ extender

std::unique_ptr<GaplessExtender> vg::MinimizerMapper::extender

protected

We have a gapless extender to extend seed hits in haplotype space. Because this needs a reference to an Aligner, and because changing the scoring parameters deletes all the alignmers, we need to keep this somewhere we can clear out.

◆ extension_score_threshold

int vg::MinimizerMapper::extension_score_threshold = default_extension_score_threshold

◆ extension_set_min_score

int vg::MinimizerMapper::extension_set_min_score = default_extension_set_min_score

◆ extension_set_score_threshold

double vg::MinimizerMapper::extension_set_score_threshold = default_extension_set_score_threshold

◆ fragment_gap_scale

double vg::MinimizerMapper::fragment_gap_scale = default_fragment_gap_scale

◆ fragment_length_distr

FragmentLengthDistribution vg::MinimizerMapper::fragment_length_distr

protected

We have a distribution for read fragment lengths that takes care of knowing when we've observed enough good ones to learn a good distribution.

◆ fragment_max_graph_lookback_bases

size_t vg::MinimizerMapper::fragment_max_graph_lookback_bases = default_fragment_max_graph_lookback_bases

◆ fragment_max_graph_lookback_bases_per_base

double vg::MinimizerMapper::fragment_max_graph_lookback_bases_per_base = default_fragment_max_graph_lookback_bases_per_base

◆ fragment_max_indel_bases

size_t vg::MinimizerMapper::fragment_max_indel_bases = default_fragment_max_indel_bases

◆ fragment_max_indel_bases_per_base

double vg::MinimizerMapper::fragment_max_indel_bases_per_base = default_fragment_max_indel_bases_per_base

◆ fragment_max_min_score

double vg::MinimizerMapper::fragment_max_min_score = default_fragment_max_min_score

◆ fragment_max_read_lookback_bases

size_t vg::MinimizerMapper::fragment_max_read_lookback_bases = default_fragment_max_read_lookback_bases

◆ fragment_max_read_lookback_bases_per_base

double vg::MinimizerMapper::fragment_max_read_lookback_bases_per_base = default_fragment_max_read_lookback_bases_per_base

◆ fragment_min_score

double vg::MinimizerMapper::fragment_min_score = default_fragment_min_score

◆ fragment_points_per_possible_match

double vg::MinimizerMapper::fragment_points_per_possible_match = default_fragment_points_per_possible_match

◆ fragment_score_fraction

double vg::MinimizerMapper::fragment_score_fraction = default_fragment_score_fraction

◆ fragment_set_score_threshold

double vg::MinimizerMapper::fragment_set_score_threshold = default_fragment_set_score_threshold

◆ gap_scale

double vg::MinimizerMapper::gap_scale = default_gap_scale

◆ gapless_extension_limit

size_t vg::MinimizerMapper::gapless_extension_limit = default_gapless_extension_limit

◆ gbwt_graph

const gbwtgraph::GBWTGraph& vg::MinimizerMapper::gbwt_graph

protected

This is our primary graph.

◆ hard_hit_cap

size_t vg::MinimizerMapper::hard_hit_cap = default_hard_hit_cap

◆ hit_cap

size_t vg::MinimizerMapper::hit_cap = default_hit_cap

◆ item_bonus

int vg::MinimizerMapper::item_bonus = default_item_bonus

◆ item_scale

double vg::MinimizerMapper::item_scale = default_item_scale

◆ LONG_LIMIT

const static size_t vg::MinimizerMapper::LONG_LIMIT = 256

staticprotected

Length at which we cut over to long-alignment logging.

◆ MANY_LIMIT

const static size_t vg::MinimizerMapper::MANY_LIMIT = 10

staticprotected

Count at which we cut over to summary logging.

◆ mapq_score_scale

double vg::MinimizerMapper::mapq_score_scale = default_mapq_score_scale

◆ mapq_score_window

size_t vg::MinimizerMapper::mapq_score_window = default_mapq_score_window

◆ max_alignments

size_t vg::MinimizerMapper::max_alignments = default_max_alignments

◆ max_chain_connection

size_t vg::MinimizerMapper::max_chain_connection = default_max_chain_connection

◆ max_chaining_problems

int vg::MinimizerMapper::max_chaining_problems = default_max_chaining_problems

◆ max_chains_per_tree

size_t vg::MinimizerMapper::max_chains_per_tree = default_max_chains_per_tree

◆ max_direct_to_chain

size_t vg::MinimizerMapper::max_direct_to_chain = default_max_direct_to_chain

◆ max_dozeu_cells

size_t vg::MinimizerMapper::max_dozeu_cells = default_max_dozeu_cells

◆ max_dp_cells

size_t vg::MinimizerMapper::max_dp_cells = default_max_dp_cells

◆ max_extension_mismatches

size_t vg::MinimizerMapper::max_extension_mismatches = default_max_extension_mismatches

◆ max_extensions

size_t vg::MinimizerMapper::max_extensions = default_max_extensions

◆ max_fragment_length

size_t vg::MinimizerMapper::max_fragment_length = default_max_fragment_length

◆ max_fragments

size_t vg::MinimizerMapper::max_fragments = default_max_fragments

◆ max_graph_lookback_bases

size_t vg::MinimizerMapper::max_graph_lookback_bases = default_max_graph_lookback_bases

◆ max_graph_lookback_bases_per_base

double vg::MinimizerMapper::max_graph_lookback_bases_per_base = default_max_graph_lookback_bases_per_base

◆ max_indel_bases

size_t vg::MinimizerMapper::max_indel_bases = default_max_indel_bases

◆ max_indel_bases_per_base

double vg::MinimizerMapper::max_indel_bases_per_base = default_max_indel_bases_per_base

◆ max_local_extensions

size_t vg::MinimizerMapper::max_local_extensions = default_max_local_extensions

◆ max_middle_dp_length

size_t vg::MinimizerMapper::max_middle_dp_length = default_max_middle_dp_length

◆ max_middle_gap

size_t vg::MinimizerMapper::max_middle_gap = default_max_middle_gap

◆ max_min_chain_score

int vg::MinimizerMapper::max_min_chain_score = default_max_min_chain_score

◆ max_multimaps

size_t vg::MinimizerMapper::max_multimaps = default_max_multimaps

◆ max_read_lookback_bases

size_t vg::MinimizerMapper::max_read_lookback_bases = default_max_read_lookback_bases

◆ max_read_lookback_bases_per_base

double vg::MinimizerMapper::max_read_lookback_bases_per_base = default_max_read_lookback_bases_per_base

◆ max_rescue_attempts

size_t vg::MinimizerMapper::max_rescue_attempts = default_max_rescue_attempts

◆ max_skipped_bases

size_t vg::MinimizerMapper::max_skipped_bases = default_max_skipped_bases

◆ max_tail_dp_length

size_t vg::MinimizerMapper::max_tail_dp_length = default_max_tail_dp_length

◆ max_tail_gap

size_t vg::MinimizerMapper::max_tail_gap = default_max_tail_gap

◆ max_tail_length

size_t vg::MinimizerMapper::max_tail_length = default_max_tail_length

◆ max_to_fragment

size_t vg::MinimizerMapper::max_to_fragment = default_max_to_fragment

◆ max_unique_min

size_t vg::MinimizerMapper::max_unique_min = default_max_unique_min

◆ min_chain_score_per_base

double vg::MinimizerMapper::min_chain_score_per_base = default_min_chain_score_per_base

◆ min_chaining_problems

int vg::MinimizerMapper::min_chaining_problems = default_min_chaining_problems

◆ min_chains

int vg::MinimizerMapper::min_chains = default_min_chains

◆ min_extension_sets

int vg::MinimizerMapper::min_extension_sets = default_min_extension_sets

◆ min_extensions

size_t vg::MinimizerMapper::min_extensions = default_min_extensions

◆ min_to_fragment

size_t vg::MinimizerMapper::min_to_fragment = default_min_to_fragment

◆ min_unique_node_fraction

double vg::MinimizerMapper::min_unique_node_fraction = default_min_unique_node_fraction

◆ minimizer_coverage_flank

size_t vg::MinimizerMapper::minimizer_coverage_flank = default_minimizer_coverage_flank

◆ minimizer_downsampling_max_window_length

size_t vg::MinimizerMapper::minimizer_downsampling_max_window_length = default_minimizer_downsampling_max_window_length

◆ minimizer_downsampling_window_count

size_t vg::MinimizerMapper::minimizer_downsampling_window_count = default_minimizer_downsampling_window_count

◆ minimizer_index

const gbwtgraph::DefaultMinimizerIndex& vg::MinimizerMapper::minimizer_index

protected

◆ minimizer_score_fraction

double vg::MinimizerMapper::minimizer_score_fraction = default_minimizer_score_fraction

◆ num_bp_per_min

size_t vg::MinimizerMapper::num_bp_per_min = default_num_bp_per_min

◆ pad_cluster_score_threshold

double vg::MinimizerMapper::pad_cluster_score_threshold = default_pad_cluster_score_threshold

◆ pad_zipcode_tree_score_threshold

double vg::MinimizerMapper::pad_zipcode_tree_score_threshold = default_pad_zipcode_tree_score_threshold

◆ paired_distance_stdevs

double vg::MinimizerMapper::paired_distance_stdevs = default_paired_distance_stdevs

◆ paired_rescue_score_limit

double vg::MinimizerMapper::paired_rescue_score_limit = default_paired_rescue_score_limit

◆ path_graph

const PathPositionHandleGraph* vg::MinimizerMapper::path_graph

protected

◆ points_per_possible_match

double vg::MinimizerMapper::points_per_possible_match = default_points_per_possible_match

◆ read_group

string vg::MinimizerMapper::read_group

Apply this read group name.

◆ rescue_algorithm

RescueAlgorithm vg::MinimizerMapper::rescue_algorithm = rescue_dozeu

The algorithm used for rescue.

◆ rescue_likelihood_limit

double vg::MinimizerMapper::rescue_likelihood_limit = default_rescue_likelihood_limit

◆ rescue_seed_limit

size_t vg::MinimizerMapper::rescue_seed_limit = default_rescue_seed_limit

◆ rescue_subgraph_stdevs

double vg::MinimizerMapper::rescue_subgraph_stdevs = default_rescue_subgraph_stdevs

◆ sample_name

string vg::MinimizerMapper::sample_name

Apply this sample name.

◆ set_refpos

bool vg::MinimizerMapper::set_refpos = default_set_refpos

◆ show_work

bool vg::MinimizerMapper::show_work = default_show_work

◆ sort_by_chain_score

bool vg::MinimizerMapper::sort_by_chain_score = default_sort_by_chain_score

◆ track_correctness

bool vg::MinimizerMapper::track_correctness = default_track_correctness

◆ track_position

bool vg::MinimizerMapper::track_position = default_track_position

◆ track_provenance

bool vg::MinimizerMapper::track_provenance = default_track_provenance

◆ use_explored_cap

bool vg::MinimizerMapper::use_explored_cap = default_use_explored_cap

◆ warned_about_bad_distribution

atomic_flag vg::MinimizerMapper::warned_about_bad_distribution = ATOMIC_FLAG_INIT

protected

We may need to complain exactly once that the distribution is bad.

◆ warned_about_rescue_size

atomic_flag vg::MinimizerMapper::warned_about_rescue_size = ATOMIC_FLAG_INIT

Have we complained about hitting the size limit for rescue?

◆ warned_about_tail_size

atomic_flag vg::MinimizerMapper::warned_about_tail_size = ATOMIC_FLAG_INIT

mutable

Have we complained about hitting the size limit for tails?

◆ wfa_distance

int vg::MinimizerMapper::wfa_distance = default_wfa_distance

◆ wfa_distance_per_base

double vg::MinimizerMapper::wfa_distance_per_base = default_wfa_distance_per_base

◆ wfa_max_distance

int vg::MinimizerMapper::wfa_max_distance = default_wfa_max_distance

◆ wfa_max_max_mismatches

int vg::MinimizerMapper::wfa_max_max_mismatches = default_wfa_max_max_mismatches

◆ wfa_max_mismatches

int vg::MinimizerMapper::wfa_max_mismatches = default_wfa_max_mismatches

◆ wfa_max_mismatches_per_base

double vg::MinimizerMapper::wfa_max_mismatches_per_base = default_wfa_max_mismatches_per_base

◆ zip_forest

ZipCodeForest vg::MinimizerMapper::zip_forest

protected

We have a zip code tree for finding distances between seeds.

◆ zipcode_tree_coverage_threshold

double vg::MinimizerMapper::zipcode_tree_coverage_threshold = default_zipcode_tree_coverage_threshold

◆ zipcode_tree_scale

double vg::MinimizerMapper::zipcode_tree_scale = default_zipcode_tree_scale

◆ zipcode_tree_score_threshold

double vg::MinimizerMapper::zipcode_tree_score_threshold = default_zipcode_tree_score_threshold

◆ zipcodes

const ZipCodeCollection* vg::MinimizerMapper::zipcodes

protected

The documentation for this class was generated from the following files:

Classes

Public Types

Public Member Functions

Static Public Member Functions

Public Attributes

Static Public Attributes

Protected Types

Protected Member Functions

Static Protected Member Functions

Protected Attributes

Static Protected Attributes

Friends

Member Typedef Documentation

◆ Cluster

◆ ImmutablePath

◆ Seed

Member Enumeration Documentation

◆ RescueAlgorithm

Constructor & Destructor Documentation

◆ MinimizerMapper()

Member Function Documentation

◆ align_sequence_between()

◆ align_sequence_between_consistently()

◆ annotate_with_minimizer_statistics()

◆ attempt_rescue()

◆ chain_info_to_seed()

◆ compute_mapq_caps()

◆ connect_consistently()

◆ dfs_gbwt() [1/3]

◆ dfs_gbwt() [2/3]

◆ dfs_gbwt() [3/3]

◆ distance_between() [1/2]

◆ distance_between() [2/2]

◆ distance_to_annotation()

◆ do_alignment_on_chains()

◆ do_chaining_on_fragments()

◆ do_fragmenting_on_trees()

◆ dump_chaining_problem()

◆ dump_debug_clustering()

◆ dump_debug_dotplot()

◆ dump_debug_extension_set()

◆ dump_debug_graph()

◆ dump_debug_minimizers()

◆ dump_debug_query() [1/2]

◆ dump_debug_query() [2/2]

◆ dump_debug_seeds()

◆ dump_debug_sequence()

◆ extend_seed_group()

◆ extension_to_alignment()

◆ faster_cap()

◆ finalize_fragment_length_distr()

◆ find_chain_alignment()

◆ find_minimizers()

◆ find_optimal_tail_alignments()

◆ find_seeds()

◆ fix_dozeu_end_deletions()

◆ fix_dozeu_score()

◆ flag_repetitive_minimizers()

◆ for_each_agglomeration_interval()

◆ force_fragment_length_distr()

◆ fragment_distr_is_finalized()

◆ get_best_alignment_against_any_tree()

◆ get_best_chain_stats()

◆ get_distance_limit()

◆ get_fragment_length_mean()

◆ get_fragment_length_sample_size()

◆ get_fragment_length_stdev()

◆ get_log10_prob_of_disruption_in_interval()

◆ get_prob_of_disruption_in_column()

◆ get_read_coverage()

◆ get_tail_forest()

◆ immutable_path_from_length()

◆ log_alignment() [1/2]

◆ log_alignment() [2/2]

◆ log_bits()

◆ log_name()

◆ longest_detectable_gap_in_range()

◆ map() [1/2]

◆ map() [2/2]

◆ map_from_chains()