tools for working with variation graphs
Public Member Functions | Static Public Member Functions | Public Attributes | Protected Member Functions | List of all members
vg::GSSWAligner Class Referenceabstract

#include <aligner.hpp>

Inheritance diagram for vg::GSSWAligner:
vg::BaseAligner vg::Aligner vg::QualAdjAligner

Public Member Functions

double max_possible_mapping_quality (int length) const
double estimate_max_possible_mapping_quality (int length, double min_diffs, double next_min_diffs) const
virtual void align_pinned (Alignment &alignment, const HandleGraph &g, bool pin_left, bool xdrop=false, uint16_t xdrop_max_gap_length=default_xdrop_max_gap_length) const =0
virtual void align_pinned_multi (Alignment &alignment, vector< Alignment > &alt_alignments, const HandleGraph &g, bool pin_left, int32_t max_alt_alns) const =0
virtual void align_global_banded (Alignment &alignment, const HandleGraph &g, int32_t band_padding=0, bool permissive_banding=true) const =0
virtual void align_global_banded_multi (Alignment &alignment, vector< Alignment > &alt_alignments, const HandleGraph &g, int32_t max_alt_alns, int32_t band_padding=0, bool permissive_banding=true) const =0
virtual void align_xdrop (Alignment &alignment, const HandleGraph &g, const vector< MaximalExactMatch > &mems, bool reverse_complemented, uint16_t max_gap_length=default_xdrop_max_gap_length) const =0
 xdrop aligner More...
virtual void align_xdrop (Alignment &alignment, const HandleGraph &g, const vector< handle_t > &order, const vector< MaximalExactMatch > &mems, bool reverse_complemented, uint16_t max_gap_length=default_xdrop_max_gap_length) const =0
virtual int32_t score_exact_match (const Alignment &aln, size_t read_offset, size_t length) const =0
virtual int32_t score_exact_match (const string &sequence, const string &base_quality) const =0
virtual int32_t score_exact_match (string::const_iterator seq_begin, string::const_iterator seq_end, string::const_iterator base_qual_begin) const =0
virtual int32_t score_mismatch (string::const_iterator seq_begin, string::const_iterator seq_end, string::const_iterator base_qual_begin) const =0
virtual int32_t score_full_length_bonus (bool left_side, string::const_iterator seq_begin, string::const_iterator seq_end, string::const_iterator base_qual_begin) const =0
virtual int32_t score_full_length_bonus (bool left_side, const Alignment &alignment) const =0
virtual int32_t score_partial_alignment (const Alignment &alignment, const HandleGraph &graph, const path_t &path, string::const_iterator seq_begin, bool no_read_end_scoring=false) const =0
 Compute the score of a path against the given range of subsequence with the given qualities. More...
int32_t score_gap (size_t gap_length) const
 Returns the score of an insert or deletion of the given length. More...
void compute_mapping_quality (vector< Alignment > &alignments, int max_mapping_quality, bool fast_approximation, double cluster_mq, bool use_cluster_mq, int overlap_count, double mq_estimate, double maybe_mq_threshold, double identity_weight) const
void compute_paired_mapping_quality (pair< vector< Alignment >, vector< Alignment >> &alignment_pairs, const vector< double > &frag_weights, int max_mapping_quality1, int max_mapping_quality2, bool fast_approximation, double cluster_mq, bool use_cluster_mq, int overlap_count1, int overlap_count2, double mq_estimate1, double mq_estimate2, double maybe_mq_threshold, double identity_weight) const
 same function for paired reads, mapping qualities are stored in both alignments in the pair More...
int32_t compute_first_mapping_quality (const vector< double > &scores, bool fast_approximation, const vector< double > *multiplicities=nullptr) const
int32_t compute_max_mapping_quality (const vector< double > &scores, bool fast_approximation, const vector< double > *multiplicities=nullptr) const
int32_t compute_group_mapping_quality (const vector< double > &scores, const vector< size_t > &group, const vector< double > *multiplicities=nullptr) const
vector< int32_t > compute_all_mapping_qualities (const vector< double > &scores, const vector< double > *multiplicities=nullptr) const
double mapping_quality_score_diff (double mapping_quality) const
double score_to_unnormalized_likelihood_ln (double score) const
size_t longest_detectable_gap (const Alignment &alignment, const string::const_iterator &read_pos) const
 The longest gap detectable from a read position without soft-clipping. More...
size_t longest_detectable_gap (size_t read_length, size_t read_pos) const
 The longest gap detectable from a read position without soft-clipping, for a generic read. More...
size_t longest_detectable_gap (const Alignment &alignment) const
 The longest gap detectable from any read position without soft-clipping. More...
size_t longest_detectable_gap (size_t read_length) const
 The longest gap detectable from any read position without soft-clipping, for a generic read. More...
virtual int32_t score_discontiguous_alignment (const Alignment &aln, const function< size_t(pos_t, pos_t, size_t)> &estimate_distance, bool strip_bonuses=false) const
virtual int32_t score_contiguous_alignment (const Alignment &aln, bool strip_bonuses=false) const
virtual int32_t remove_bonuses (const Alignment &aln, bool pinned=false, bool pin_left=false) const
- Public Member Functions inherited from vg::BaseAligner
virtual void align (Alignment &alignment, const HandleGraph &g, bool traceback_aln) const =0

Static Public Member Functions

static double maximum_mapping_quality_exact (const vector< double > &scaled_scores, size_t *max_idx_out, const vector< double > *multiplicities=nullptr)
static double maximum_mapping_quality_approx (const vector< double > &scaled_scores, size_t *max_idx_out, const vector< double > *multiplicities=nullptr)
static double first_mapping_quality_exact (const vector< double > &scaled_scores, const vector< double > *multiplicities=nullptr)
static double first_mapping_quality_approx (const vector< double > &scaled_scores, const vector< double > *multiplicities=nullptr)

Public Attributes

DeletionAligner deletion_aligner
int8_t * nt_table = nullptr
int8_t * score_matrix = nullptr
int8_t match
int8_t mismatch
int8_t gap_open
int8_t gap_extension
int8_t full_length_bonus
double log_base = 0.0

Protected Member Functions

 GSSWAligner ()=default
 GSSWAligner (const int8_t *_score_matrix, int8_t _gap_open, int8_t _gap_extension, int8_t _full_length_bonus, double _gc_content)
 ~GSSWAligner ()
gssw_graph * create_gssw_graph (const HandleGraph &g) const
unordered_set< id_tidentify_pinning_points (const HandleGraph &graph) const
void unreverse_graph_mapping (gssw_graph_mapping *gm) const
void unreverse_graph (gssw_graph *graph) const
void gssw_mapping_to_alignment (gssw_graph *graph, gssw_graph_mapping *gm, Alignment &alignment, bool pinned, bool pin_left) const
string graph_cigar (gssw_graph_mapping *gm) const
double group_mapping_quality_exact (const vector< double > &scaled_scores, const vector< size_t > &group, const vector< double > *multiplicities=nullptr) const
double estimate_next_best_score (int length, double min_diffs) const
double recover_log_base (const int8_t *score_matrix, double gc_content, double tol) const
bool verify_valid_log_odds_score_matrix (const int8_t *score_matrix, const double *nt_freqs) const
double alignment_score_partition_function (double lambda, const int8_t *score_matrix, const double *nt_freqs) const
vector< double > all_mapping_qualities_exact (const vector< double > &scaled_scores, const vector< double > *multiplicities=nullptr) const

Detailed Description

The basic GSSW-based core aligner implementation, which can then be quality-adjusted or not.

Constructor & Destructor Documentation

◆ GSSWAligner() [1/2]

vg::GSSWAligner::GSSWAligner ( )

◆ GSSWAligner() [2/2]

vg::GSSWAligner::GSSWAligner ( const int8_t *  _score_matrix,
int8_t  _gap_open,
int8_t  _gap_extension,
int8_t  _full_length_bonus,
double  _gc_content 

◆ ~GSSWAligner()

vg::GSSWAligner::~GSSWAligner ( )

Member Function Documentation

◆ align_global_banded()

virtual void vg::GSSWAligner::align_global_banded ( Alignment alignment,
const HandleGraph g,
int32_t  band_padding = 0,
bool  permissive_banding = true 
) const
pure virtual

store optimal global alignment against a graph within a specified band in the Alignment object permissive banding auto detects the width of band needed so that paths can travel through every node in the graph

Implemented in vg::QualAdjAligner, and vg::Aligner.

◆ align_global_banded_multi()

virtual void vg::GSSWAligner::align_global_banded_multi ( Alignment alignment,
vector< Alignment > &  alt_alignments,
const HandleGraph g,
int32_t  max_alt_alns,
int32_t  band_padding = 0,
bool  permissive_banding = true 
) const
pure virtual

store top scoring global alignments in the vector in descending score order up to a maximum number of alternate alignments (including the optimal alignment). if there are fewer than the maximum number of alignments in the return value, then the vector contains all possible alignments. the optimal alignment will be stored in both the vector and the original alignment object

Implemented in vg::QualAdjAligner, and vg::Aligner.

◆ align_pinned()

virtual void vg::GSSWAligner::align_pinned ( Alignment alignment,
const HandleGraph g,
bool  pin_left,
bool  xdrop = false,
uint16_t  xdrop_max_gap_length = default_xdrop_max_gap_length 
) const
pure virtual

store optimal alignment against a graph in the Alignment object with one end of the sequence guaranteed to align to a source/sink node. if xdrop is selected, use the xdrop heuristic, which does not guarantee an optimal alignment.

pinning left means that that the alignment starts with the first base of the read sequence and the first base of a source node sequence, pinning right means that the alignment starts with the final base of the read sequence and the final base of a sink node sequence

Gives the full length bonus only on the non-pinned end of the alignment.

Implemented in vg::QualAdjAligner, and vg::Aligner.

◆ align_pinned_multi()

virtual void vg::GSSWAligner::align_pinned_multi ( Alignment alignment,
vector< Alignment > &  alt_alignments,
const HandleGraph g,
bool  pin_left,
int32_t  max_alt_alns 
) const
pure virtual

store the top scoring pinned alignments in the vector in descending score order up to a maximum number of alignments (including the optimal one). if there are fewer than the maximum number in the return value, then it includes all alignments with a positive score. the optimal alignment will be stored in both the vector and in the main alignment object

Implemented in vg::QualAdjAligner, and vg::Aligner.

◆ align_xdrop() [1/2]

virtual void vg::GSSWAligner::align_xdrop ( Alignment alignment,
const HandleGraph g,
const vector< handle_t > &  order,
const vector< MaximalExactMatch > &  mems,
bool  reverse_complemented,
uint16_t  max_gap_length = default_xdrop_max_gap_length 
) const
pure virtual

xdrop aligner, but with a precomputed topological order on the graph, which need not include all of the graph's handles and which may contain both orientations of a handle

Implemented in vg::QualAdjAligner, and vg::Aligner.

◆ align_xdrop() [2/2]

virtual void vg::GSSWAligner::align_xdrop ( Alignment alignment,
const HandleGraph g,
const vector< MaximalExactMatch > &  mems,
bool  reverse_complemented,
uint16_t  max_gap_length = default_xdrop_max_gap_length 
) const
pure virtual

xdrop aligner

Implemented in vg::QualAdjAligner, and vg::Aligner.

◆ alignment_score_partition_function()

double vg::GSSWAligner::alignment_score_partition_function ( double  lambda,
const int8_t *  score_matrix,
const double *  nt_freqs 
) const

◆ all_mapping_qualities_exact()

vector< double > vg::GSSWAligner::all_mapping_qualities_exact ( const vector< double > &  scaled_scores,
const vector< double > *  multiplicities = nullptr 
) const

◆ compute_all_mapping_qualities()

vector< int32_t > vg::GSSWAligner::compute_all_mapping_qualities ( const vector< double > &  scores,
const vector< double > *  multiplicities = nullptr 
) const

Computes mapping quality for all of a vector of scores. Optionally includes a vector of implicit counts >= 1 for the scores, but only 1 count can apply toward the mapping quality.

◆ compute_first_mapping_quality()

int32_t vg::GSSWAligner::compute_first_mapping_quality ( const vector< double > &  scores,
bool  fast_approximation,
const vector< double > *  multiplicities = nullptr 
) const

Computes mapping quality for the first score in a vector of scores. Optionally includes a vector of implicit counts >= 1 for the scores, but only 1 count can apply toward the mapping quality.

◆ compute_group_mapping_quality()

int32_t vg::GSSWAligner::compute_group_mapping_quality ( const vector< double > &  scores,
const vector< size_t > &  group,
const vector< double > *  multiplicities = nullptr 
) const

Computes mapping quality for a group of scores in a vector of scores (group given by indexes). Optionally includes a vector of implicit counts >= 1 for the score, but the mapping quality is always calculated as if each member of the group has a count of 1.

◆ compute_mapping_quality()

void vg::GSSWAligner::compute_mapping_quality ( vector< Alignment > &  alignments,
int  max_mapping_quality,
bool  fast_approximation,
double  cluster_mq,
bool  use_cluster_mq,
int  overlap_count,
double  mq_estimate,
double  maybe_mq_threshold,
double  identity_weight 
) const

stores -10 * log_10(P_err) in alignment mapping_quality field where P_err is the probability that the alignment is not the correct one (assuming that one of the alignments in the vector is correct). alignments must have been created with this Aligner for quality score to be valid

◆ compute_max_mapping_quality()

int32_t vg::GSSWAligner::compute_max_mapping_quality ( const vector< double > &  scores,
bool  fast_approximation,
const vector< double > *  multiplicities = nullptr 
) const

Computes mapping quality for the optimal score in a vector of scores. Optionally includes a vector of implicit counts >= 1 for the scores, but only 1 count can apply toward the mapping quality.

◆ compute_paired_mapping_quality()

void vg::GSSWAligner::compute_paired_mapping_quality ( pair< vector< Alignment >, vector< Alignment >> &  alignment_pairs,
const vector< double > &  frag_weights,
int  max_mapping_quality1,
int  max_mapping_quality2,
bool  fast_approximation,
double  cluster_mq,
bool  use_cluster_mq,
int  overlap_count1,
int  overlap_count2,
double  mq_estimate1,
double  mq_estimate2,
double  maybe_mq_threshold,
double  identity_weight 
) const

same function for paired reads, mapping qualities are stored in both alignments in the pair

◆ create_gssw_graph()

gssw_graph * vg::GSSWAligner::create_gssw_graph ( const HandleGraph g) const

◆ estimate_max_possible_mapping_quality()

double vg::GSSWAligner::estimate_max_possible_mapping_quality ( int  length,
double  min_diffs,
double  next_min_diffs 
) const

◆ estimate_next_best_score()

double vg::GSSWAligner::estimate_next_best_score ( int  length,
double  min_diffs 
) const

◆ first_mapping_quality_approx()

double vg::GSSWAligner::first_mapping_quality_approx ( const vector< double > &  scaled_scores,
const vector< double > *  multiplicities = nullptr 

Same as maximum_mapping_quality_approx except alway s computes mapping quality for the first score

◆ first_mapping_quality_exact()

double vg::GSSWAligner::first_mapping_quality_exact ( const vector< double > &  scaled_scores,
const vector< double > *  multiplicities = nullptr 

Same as maximum_mapping_quality_exact except alway s computes mapping quality for the first score

◆ graph_cigar()

string vg::GSSWAligner::graph_cigar ( gssw_graph_mapping *  gm) const

◆ group_mapping_quality_exact()

double vg::GSSWAligner::group_mapping_quality_exact ( const vector< double > &  scaled_scores,
const vector< size_t > &  group,
const vector< double > *  multiplicities = nullptr 
) const

◆ gssw_mapping_to_alignment()

void vg::GSSWAligner::gssw_mapping_to_alignment ( gssw_graph *  graph,
gssw_graph_mapping *  gm,
Alignment alignment,
bool  pinned,
bool  pin_left 
) const

◆ identify_pinning_points()

unordered_set< vg::id_t > vg::GSSWAligner::identify_pinning_points ( const HandleGraph graph) const

◆ longest_detectable_gap() [1/4]

size_t vg::GSSWAligner::longest_detectable_gap ( const Alignment alignment) const

The longest gap detectable from any read position without soft-clipping.

◆ longest_detectable_gap() [2/4]

size_t vg::GSSWAligner::longest_detectable_gap ( const Alignment alignment,
const string::const_iterator &  read_pos 
) const

The longest gap detectable from a read position without soft-clipping.

◆ longest_detectable_gap() [3/4]

size_t vg::GSSWAligner::longest_detectable_gap ( size_t  read_length) const

The longest gap detectable from any read position without soft-clipping, for a generic read.

◆ longest_detectable_gap() [4/4]

size_t vg::GSSWAligner::longest_detectable_gap ( size_t  read_length,
size_t  read_pos 
) const

The longest gap detectable from a read position without soft-clipping, for a generic read.

◆ mapping_quality_score_diff()

double vg::GSSWAligner::mapping_quality_score_diff ( double  mapping_quality) const

Returns the difference between an optimal and second-best alignment scores that would result in this mapping quality using the fast mapping quality approximation

◆ max_possible_mapping_quality()

double vg::GSSWAligner::max_possible_mapping_quality ( int  length) const

◆ maximum_mapping_quality_approx()

double vg::GSSWAligner::maximum_mapping_quality_approx ( const vector< double > &  scaled_scores,
size_t *  max_idx_out,
const vector< double > *  multiplicities = nullptr 

Given a nonempty vector of nonnegative scaled alignment scores, approximate the mapping quality of the maximal score in the vector. Sets max_idx_out to the index of that score in the vector. Optionally includes a vector of implicit counts >= 1 for the scores, but the mapping quality is always calculated as if its multiplicity is 1.

◆ maximum_mapping_quality_exact()

double vg::GSSWAligner::maximum_mapping_quality_exact ( const vector< double > &  scaled_scores,
size_t *  max_idx_out,
const vector< double > *  multiplicities = nullptr 

Given a nonempty vector of nonnegative scaled alignment scores, compute the mapping quality of the maximal score in the vector. Sets max_idx_out to the index of that score in the vector. Optionally includes a vector of implicit counts >= 1 for the scores, but the mapping quality is always calculated as if its multiplicity is 1.

◆ recover_log_base()

double vg::GSSWAligner::recover_log_base ( const int8_t *  score_matrix,
double  gc_content,
double  tol 
) const

◆ remove_bonuses()

int32_t vg::GSSWAligner::remove_bonuses ( const Alignment aln,
bool  pinned = false,
bool  pin_left = false 
) const

Without necessarily rescoring the entire alignment, return the score of the given alignment with bonuses removed. Assumes that bonuses are actually included in the score. Needs to know if the alignment was pinned-end or not, and, if so, which end was pinned.

◆ score_contiguous_alignment()

int32_t vg::GSSWAligner::score_contiguous_alignment ( const Alignment aln,
bool  strip_bonuses = false 
) const

Use the score values in the aligner to score the given alignment assuming that there are no gaps between Mappings in the Path

◆ score_discontiguous_alignment()

int32_t vg::GSSWAligner::score_discontiguous_alignment ( const Alignment aln,
const function< size_t(pos_t, pos_t, size_t)> &  estimate_distance,
bool  strip_bonuses = false 
) const

Use the score values in the aligner to score the given alignment, scoring gaps caused by jumping between between nodes using a custom gap length estimation function (which takes the from position, the to position, and a search limit in bp that happens to be the read length).

May include full length bonus or not. TODO: bool flags are bad.

◆ score_exact_match() [1/3]

virtual int32_t vg::GSSWAligner::score_exact_match ( const Alignment aln,
size_t  read_offset,
size_t  length 
) const
pure virtual

Compute the score of an exact match in the given alignment, from the given offset, of the given length.

Implemented in vg::QualAdjAligner, and vg::Aligner.

◆ score_exact_match() [2/3]

virtual int32_t vg::GSSWAligner::score_exact_match ( const string &  sequence,
const string &  base_quality 
) const
pure virtual

Compute the score of an exact match of the given sequence with the given qualities. Qualities may be ignored by some implementations.

Implemented in vg::QualAdjAligner, and vg::Aligner.

◆ score_exact_match() [3/3]

virtual int32_t vg::GSSWAligner::score_exact_match ( string::const_iterator  seq_begin,
string::const_iterator  seq_end,
string::const_iterator  base_qual_begin 
) const
pure virtual

Compute the score of an exact match of the given range of sequence with the given qualities. Qualities may be ignored by some implementations.

Implemented in vg::QualAdjAligner, and vg::Aligner.

◆ score_full_length_bonus() [1/2]

virtual int32_t vg::GSSWAligner::score_full_length_bonus ( bool  left_side,
const Alignment alignment 
) const
pure virtual

Implemented in vg::QualAdjAligner, and vg::Aligner.

◆ score_full_length_bonus() [2/2]

virtual int32_t vg::GSSWAligner::score_full_length_bonus ( bool  left_side,
string::const_iterator  seq_begin,
string::const_iterator  seq_end,
string::const_iterator  base_qual_begin 
) const
pure virtual

Implemented in vg::QualAdjAligner, and vg::Aligner.

◆ score_gap()

int32_t vg::GSSWAligner::score_gap ( size_t  gap_length) const

Returns the score of an insert or deletion of the given length.

◆ score_mismatch()

virtual int32_t vg::GSSWAligner::score_mismatch ( string::const_iterator  seq_begin,
string::const_iterator  seq_end,
string::const_iterator  base_qual_begin 
) const
pure virtual

Compute the score of a mismatch of the given range of sequence with the given qualities. Qualities may be ignored by some implementations. Note that the return value is SIGNED, and almost certainly NEGATIVE, because mismatches are bad.

Implemented in vg::QualAdjAligner, and vg::Aligner.

◆ score_partial_alignment()

virtual int32_t vg::GSSWAligner::score_partial_alignment ( const Alignment alignment,
const HandleGraph graph,
const path_t path,
string::const_iterator  seq_begin,
bool  no_read_end_scoring = false 
) const
pure virtual

Compute the score of a path against the given range of subsequence with the given qualities.

Implemented in vg::QualAdjAligner, and vg::Aligner.

◆ score_to_unnormalized_likelihood_ln()

double vg::GSSWAligner::score_to_unnormalized_likelihood_ln ( double  score) const

Convert a score to an unnormalized log likelihood for the sequence. Requires log_base to have been set.

◆ unreverse_graph()

void vg::GSSWAligner::unreverse_graph ( gssw_graph *  graph) const

◆ unreverse_graph_mapping()

void vg::GSSWAligner::unreverse_graph_mapping ( gssw_graph_mapping *  gm) const

◆ verify_valid_log_odds_score_matrix()

bool vg::GSSWAligner::verify_valid_log_odds_score_matrix ( const int8_t *  score_matrix,
const double *  nt_freqs 
) const

Member Data Documentation

◆ deletion_aligner

DeletionAligner vg::GSSWAligner::deletion_aligner

◆ full_length_bonus

int8_t vg::GSSWAligner::full_length_bonus

◆ gap_extension

int8_t vg::GSSWAligner::gap_extension

◆ gap_open

int8_t vg::GSSWAligner::gap_open

◆ log_base

double vg::GSSWAligner::log_base = 0.0

◆ match

int8_t vg::GSSWAligner::match

◆ mismatch

int8_t vg::GSSWAligner::mismatch

◆ nt_table

int8_t* vg::GSSWAligner::nt_table = nullptr

◆ score_matrix

int8_t* vg::GSSWAligner::score_matrix = nullptr

The documentation for this class was generated from the following files: