vg
tools for working with variation graphs
|
#include <aligner.hpp>
Public Member Functions | |
double | max_possible_mapping_quality (int length) const |
double | estimate_max_possible_mapping_quality (int length, double min_diffs, double next_min_diffs) const |
virtual void | align_pinned (Alignment &alignment, const HandleGraph &g, bool pin_left, bool xdrop=false, uint16_t xdrop_max_gap_length=default_xdrop_max_gap_length) const =0 |
virtual void | align_pinned_multi (Alignment &alignment, vector< Alignment > &alt_alignments, const HandleGraph &g, bool pin_left, int32_t max_alt_alns) const =0 |
virtual void | align_global_banded (Alignment &alignment, const HandleGraph &g, int32_t band_padding=0, bool permissive_banding=true) const =0 |
virtual void | align_global_banded_multi (Alignment &alignment, vector< Alignment > &alt_alignments, const HandleGraph &g, int32_t max_alt_alns, int32_t band_padding=0, bool permissive_banding=true) const =0 |
virtual void | align_xdrop (Alignment &alignment, const HandleGraph &g, const vector< MaximalExactMatch > &mems, bool reverse_complemented, uint16_t max_gap_length=default_xdrop_max_gap_length) const =0 |
xdrop aligner More... | |
virtual void | align_xdrop (Alignment &alignment, const HandleGraph &g, const vector< handle_t > &order, const vector< MaximalExactMatch > &mems, bool reverse_complemented, uint16_t max_gap_length=default_xdrop_max_gap_length) const =0 |
virtual int32_t | score_exact_match (const Alignment &aln, size_t read_offset, size_t length) const =0 |
virtual int32_t | score_exact_match (const string &sequence, const string &base_quality) const =0 |
virtual int32_t | score_exact_match (string::const_iterator seq_begin, string::const_iterator seq_end, string::const_iterator base_qual_begin) const =0 |
virtual int32_t | score_mismatch (string::const_iterator seq_begin, string::const_iterator seq_end, string::const_iterator base_qual_begin) const =0 |
virtual int32_t | score_full_length_bonus (bool left_side, string::const_iterator seq_begin, string::const_iterator seq_end, string::const_iterator base_qual_begin) const =0 |
virtual int32_t | score_full_length_bonus (bool left_side, const Alignment &alignment) const =0 |
virtual int32_t | score_partial_alignment (const Alignment &alignment, const HandleGraph &graph, const path_t &path, string::const_iterator seq_begin, bool no_read_end_scoring=false) const =0 |
Compute the score of a path against the given range of subsequence with the given qualities. More... | |
int32_t | score_gap (size_t gap_length) const |
Returns the score of an insert or deletion of the given length. More... | |
void | compute_mapping_quality (vector< Alignment > &alignments, int max_mapping_quality, bool fast_approximation, double cluster_mq, bool use_cluster_mq, int overlap_count, double mq_estimate, double maybe_mq_threshold, double identity_weight) const |
void | compute_paired_mapping_quality (pair< vector< Alignment >, vector< Alignment >> &alignment_pairs, const vector< double > &frag_weights, int max_mapping_quality1, int max_mapping_quality2, bool fast_approximation, double cluster_mq, bool use_cluster_mq, int overlap_count1, int overlap_count2, double mq_estimate1, double mq_estimate2, double maybe_mq_threshold, double identity_weight) const |
same function for paired reads, mapping qualities are stored in both alignments in the pair More... | |
int32_t | compute_first_mapping_quality (const vector< double > &scores, bool fast_approximation, const vector< double > *multiplicities=nullptr) const |
int32_t | compute_max_mapping_quality (const vector< double > &scores, bool fast_approximation, const vector< double > *multiplicities=nullptr) const |
int32_t | compute_group_mapping_quality (const vector< double > &scores, const vector< size_t > &group, const vector< double > *multiplicities=nullptr) const |
vector< int32_t > | compute_all_mapping_qualities (const vector< double > &scores, const vector< double > *multiplicities=nullptr) const |
double | mapping_quality_score_diff (double mapping_quality) const |
double | score_to_unnormalized_likelihood_ln (double score) const |
size_t | longest_detectable_gap (const Alignment &alignment, const string::const_iterator &read_pos) const |
The longest gap detectable from a read position without soft-clipping. More... | |
size_t | longest_detectable_gap (size_t read_length, size_t read_pos) const |
The longest gap detectable from a read position without soft-clipping, for a generic read. More... | |
size_t | longest_detectable_gap (const Alignment &alignment) const |
The longest gap detectable from any read position without soft-clipping. More... | |
size_t | longest_detectable_gap (size_t read_length) const |
The longest gap detectable from any read position without soft-clipping, for a generic read. More... | |
virtual int32_t | score_discontiguous_alignment (const Alignment &aln, const function< size_t(pos_t, pos_t, size_t)> &estimate_distance, bool strip_bonuses=false) const |
virtual int32_t | score_contiguous_alignment (const Alignment &aln, bool strip_bonuses=false) const |
virtual int32_t | remove_bonuses (const Alignment &aln, bool pinned=false, bool pin_left=false) const |
Public Member Functions inherited from vg::BaseAligner | |
virtual void | align (Alignment &alignment, const HandleGraph &g, bool traceback_aln) const =0 |
Static Public Member Functions | |
static double | maximum_mapping_quality_exact (const vector< double > &scaled_scores, size_t *max_idx_out, const vector< double > *multiplicities=nullptr) |
static double | maximum_mapping_quality_approx (const vector< double > &scaled_scores, size_t *max_idx_out, const vector< double > *multiplicities=nullptr) |
static double | first_mapping_quality_exact (const vector< double > &scaled_scores, const vector< double > *multiplicities=nullptr) |
static double | first_mapping_quality_approx (const vector< double > &scaled_scores, const vector< double > *multiplicities=nullptr) |
Public Attributes | |
DeletionAligner | deletion_aligner |
int8_t * | nt_table = nullptr |
int8_t * | score_matrix = nullptr |
int8_t | match |
int8_t | mismatch |
int8_t | gap_open |
int8_t | gap_extension |
int8_t | full_length_bonus |
double | log_base = 0.0 |
Protected Member Functions | |
GSSWAligner ()=default | |
GSSWAligner (const int8_t *_score_matrix, int8_t _gap_open, int8_t _gap_extension, int8_t _full_length_bonus, double _gc_content) | |
~GSSWAligner () | |
gssw_graph * | create_gssw_graph (const HandleGraph &g) const |
unordered_set< id_t > | identify_pinning_points (const HandleGraph &graph) const |
void | unreverse_graph_mapping (gssw_graph_mapping *gm) const |
void | unreverse_graph (gssw_graph *graph) const |
void | gssw_mapping_to_alignment (gssw_graph *graph, gssw_graph_mapping *gm, Alignment &alignment, bool pinned, bool pin_left) const |
string | graph_cigar (gssw_graph_mapping *gm) const |
double | group_mapping_quality_exact (const vector< double > &scaled_scores, const vector< size_t > &group, const vector< double > *multiplicities=nullptr) const |
double | estimate_next_best_score (int length, double min_diffs) const |
double | recover_log_base (const int8_t *score_matrix, double gc_content, double tol) const |
bool | verify_valid_log_odds_score_matrix (const int8_t *score_matrix, const double *nt_freqs) const |
double | alignment_score_partition_function (double lambda, const int8_t *score_matrix, const double *nt_freqs) const |
vector< double > | all_mapping_qualities_exact (const vector< double > &scaled_scores, const vector< double > *multiplicities=nullptr) const |
The basic GSSW-based core aligner implementation, which can then be quality-adjusted or not.
|
protecteddefault |
|
protected |
|
protected |
|
pure virtual |
store optimal global alignment against a graph within a specified band in the Alignment object permissive banding auto detects the width of band needed so that paths can travel through every node in the graph
Implemented in vg::QualAdjAligner, and vg::Aligner.
|
pure virtual |
store top scoring global alignments in the vector in descending score order up to a maximum number of alternate alignments (including the optimal alignment). if there are fewer than the maximum number of alignments in the return value, then the vector contains all possible alignments. the optimal alignment will be stored in both the vector and the original alignment object
Implemented in vg::QualAdjAligner, and vg::Aligner.
|
pure virtual |
store optimal alignment against a graph in the Alignment object with one end of the sequence guaranteed to align to a source/sink node. if xdrop is selected, use the xdrop heuristic, which does not guarantee an optimal alignment.
pinning left means that that the alignment starts with the first base of the read sequence and the first base of a source node sequence, pinning right means that the alignment starts with the final base of the read sequence and the final base of a sink node sequence
Gives the full length bonus only on the non-pinned end of the alignment.
Implemented in vg::QualAdjAligner, and vg::Aligner.
|
pure virtual |
store the top scoring pinned alignments in the vector in descending score order up to a maximum number of alignments (including the optimal one). if there are fewer than the maximum number in the return value, then it includes all alignments with a positive score. the optimal alignment will be stored in both the vector and in the main alignment object
Implemented in vg::QualAdjAligner, and vg::Aligner.
|
pure virtual |
xdrop aligner, but with a precomputed topological order on the graph, which need not include all of the graph's handles and which may contain both orientations of a handle
Implemented in vg::QualAdjAligner, and vg::Aligner.
|
pure virtual |
xdrop aligner
Implemented in vg::QualAdjAligner, and vg::Aligner.
|
protected |
|
protected |
vector< int32_t > vg::GSSWAligner::compute_all_mapping_qualities | ( | const vector< double > & | scores, |
const vector< double > * | multiplicities = nullptr |
||
) | const |
Computes mapping quality for all of a vector of scores. Optionally includes a vector of implicit counts >= 1 for the scores, but only 1 count can apply toward the mapping quality.
int32_t vg::GSSWAligner::compute_first_mapping_quality | ( | const vector< double > & | scores, |
bool | fast_approximation, | ||
const vector< double > * | multiplicities = nullptr |
||
) | const |
Computes mapping quality for the first score in a vector of scores. Optionally includes a vector of implicit counts >= 1 for the scores, but only 1 count can apply toward the mapping quality.
int32_t vg::GSSWAligner::compute_group_mapping_quality | ( | const vector< double > & | scores, |
const vector< size_t > & | group, | ||
const vector< double > * | multiplicities = nullptr |
||
) | const |
Computes mapping quality for a group of scores in a vector of scores (group given by indexes). Optionally includes a vector of implicit counts >= 1 for the score, but the mapping quality is always calculated as if each member of the group has a count of 1.
void vg::GSSWAligner::compute_mapping_quality | ( | vector< Alignment > & | alignments, |
int | max_mapping_quality, | ||
bool | fast_approximation, | ||
double | cluster_mq, | ||
bool | use_cluster_mq, | ||
int | overlap_count, | ||
double | mq_estimate, | ||
double | maybe_mq_threshold, | ||
double | identity_weight | ||
) | const |
stores -10 * log_10(P_err) in alignment mapping_quality field where P_err is the probability that the alignment is not the correct one (assuming that one of the alignments in the vector is correct). alignments must have been created with this Aligner for quality score to be valid
int32_t vg::GSSWAligner::compute_max_mapping_quality | ( | const vector< double > & | scores, |
bool | fast_approximation, | ||
const vector< double > * | multiplicities = nullptr |
||
) | const |
Computes mapping quality for the optimal score in a vector of scores. Optionally includes a vector of implicit counts >= 1 for the scores, but only 1 count can apply toward the mapping quality.
void vg::GSSWAligner::compute_paired_mapping_quality | ( | pair< vector< Alignment >, vector< Alignment >> & | alignment_pairs, |
const vector< double > & | frag_weights, | ||
int | max_mapping_quality1, | ||
int | max_mapping_quality2, | ||
bool | fast_approximation, | ||
double | cluster_mq, | ||
bool | use_cluster_mq, | ||
int | overlap_count1, | ||
int | overlap_count2, | ||
double | mq_estimate1, | ||
double | mq_estimate2, | ||
double | maybe_mq_threshold, | ||
double | identity_weight | ||
) | const |
same function for paired reads, mapping qualities are stored in both alignments in the pair
|
protected |
double vg::GSSWAligner::estimate_max_possible_mapping_quality | ( | int | length, |
double | min_diffs, | ||
double | next_min_diffs | ||
) | const |
|
protected |
|
static |
Same as maximum_mapping_quality_approx except alway s computes mapping quality for the first score
|
static |
Same as maximum_mapping_quality_exact except alway s computes mapping quality for the first score
|
protected |
|
protected |
|
protected |
|
protected |
size_t vg::GSSWAligner::longest_detectable_gap | ( | const Alignment & | alignment | ) | const |
The longest gap detectable from any read position without soft-clipping.
size_t vg::GSSWAligner::longest_detectable_gap | ( | const Alignment & | alignment, |
const string::const_iterator & | read_pos | ||
) | const |
The longest gap detectable from a read position without soft-clipping.
size_t vg::GSSWAligner::longest_detectable_gap | ( | size_t | read_length | ) | const |
The longest gap detectable from any read position without soft-clipping, for a generic read.
size_t vg::GSSWAligner::longest_detectable_gap | ( | size_t | read_length, |
size_t | read_pos | ||
) | const |
The longest gap detectable from a read position without soft-clipping, for a generic read.
double vg::GSSWAligner::mapping_quality_score_diff | ( | double | mapping_quality | ) | const |
Returns the difference between an optimal and second-best alignment scores that would result in this mapping quality using the fast mapping quality approximation
double vg::GSSWAligner::max_possible_mapping_quality | ( | int | length | ) | const |
|
static |
Given a nonempty vector of nonnegative scaled alignment scores, approximate the mapping quality of the maximal score in the vector. Sets max_idx_out to the index of that score in the vector. Optionally includes a vector of implicit counts >= 1 for the scores, but the mapping quality is always calculated as if its multiplicity is 1.
|
static |
Given a nonempty vector of nonnegative scaled alignment scores, compute the mapping quality of the maximal score in the vector. Sets max_idx_out to the index of that score in the vector. Optionally includes a vector of implicit counts >= 1 for the scores, but the mapping quality is always calculated as if its multiplicity is 1.
|
protected |
|
virtual |
Without necessarily rescoring the entire alignment, return the score of the given alignment with bonuses removed. Assumes that bonuses are actually included in the score. Needs to know if the alignment was pinned-end or not, and, if so, which end was pinned.
|
virtual |
Use the score values in the aligner to score the given alignment assuming that there are no gaps between Mappings in the Path
|
virtual |
Use the score values in the aligner to score the given alignment, scoring gaps caused by jumping between between nodes using a custom gap length estimation function (which takes the from position, the to position, and a search limit in bp that happens to be the read length).
May include full length bonus or not. TODO: bool flags are bad.
|
pure virtual |
Compute the score of an exact match in the given alignment, from the given offset, of the given length.
Implemented in vg::QualAdjAligner, and vg::Aligner.
|
pure virtual |
Compute the score of an exact match of the given sequence with the given qualities. Qualities may be ignored by some implementations.
Implemented in vg::QualAdjAligner, and vg::Aligner.
|
pure virtual |
Compute the score of an exact match of the given range of sequence with the given qualities. Qualities may be ignored by some implementations.
Implemented in vg::QualAdjAligner, and vg::Aligner.
|
pure virtual |
Implemented in vg::QualAdjAligner, and vg::Aligner.
|
pure virtual |
Implemented in vg::QualAdjAligner, and vg::Aligner.
int32_t vg::GSSWAligner::score_gap | ( | size_t | gap_length | ) | const |
Returns the score of an insert or deletion of the given length.
|
pure virtual |
Compute the score of a mismatch of the given range of sequence with the given qualities. Qualities may be ignored by some implementations. Note that the return value is SIGNED, and almost certainly NEGATIVE, because mismatches are bad.
Implemented in vg::QualAdjAligner, and vg::Aligner.
|
pure virtual |
Compute the score of a path against the given range of subsequence with the given qualities.
Implemented in vg::QualAdjAligner, and vg::Aligner.
double vg::GSSWAligner::score_to_unnormalized_likelihood_ln | ( | double | score | ) | const |
Convert a score to an unnormalized log likelihood for the sequence. Requires log_base to have been set.
|
protected |
|
protected |
|
protected |
DeletionAligner vg::GSSWAligner::deletion_aligner |
int8_t vg::GSSWAligner::full_length_bonus |
int8_t vg::GSSWAligner::gap_extension |
int8_t vg::GSSWAligner::gap_open |
double vg::GSSWAligner::log_base = 0.0 |
int8_t vg::GSSWAligner::match |
int8_t vg::GSSWAligner::mismatch |
int8_t* vg::GSSWAligner::nt_table = nullptr |
int8_t* vg::GSSWAligner::score_matrix = nullptr |