vg
tools for working with variation graphs
|
#include <multipath_mapper.hpp>
Public Types | |
using | memcluster_t = pair< vector< pair< const MaximalExactMatch *, pos_t > >, double > |
We often pass around clusters of MEMs and their graph positions, paired with a multiplicity. More... | |
using | clustergraph_t = tuple< unique_ptr< bdsg::HashGraph >, memcluster_t, size_t > |
using | match_fanouts_t = unordered_map< const MaximalExactMatch *, deque< pair< string::const_iterator, char > >> |
using | candidate_id_t = tuple< bool, int64_t, const MaximalExactMatch *, pos_t > |
Public Member Functions | |
MultipathMapper (PathPositionHandleGraph *graph, gcsa::GCSA *gcsa_index, gcsa::LCPArray *lcp_array, haplo::ScoreProvider *haplo_score_provider=nullptr, SnarlManager *snarl_manager=nullptr, SnarlDistanceIndex *distance_index=nullptr) | |
~MultipathMapper () | |
void | multipath_map (const Alignment &alignment, vector< multipath_alignment_t > &multipath_alns_out) |
Map read in alignment to graph and make multipath alignments. More... | |
bool | multipath_map_paired (const Alignment &alignment1, const Alignment &alignment2, vector< pair< multipath_alignment_t, multipath_alignment_t >> &multipath_aln_pairs_out, vector< pair< Alignment, Alignment >> &ambiguous_pair_buffer) |
void | reduce_to_single_path (const multipath_alignment_t &multipath_aln, vector< Alignment > &alns_out, size_t max_number) const |
void | set_automatic_min_clustering_length (double random_mem_probability=0.5) |
void | calibrate_mismapping_detection (size_t num_simulations, const vector< size_t > &simulated_read_lengths) |
void | determine_distance_correlation () |
Experimental: skeleton code for predicting path distance from minimum distance. More... | |
void | set_alignment_scores (const int8_t *score_matrix, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus) |
void | set_min_softclip_length_for_splice (size_t length) |
How big of a softclip should lead us to attempt spliced alignment? More... | |
void | set_log_odds_against_splice (double log_odds) |
What should the prior odds against a spliced alignment be? More... | |
void | set_intron_length_distribution (const vector< double > &intron_mixture_weights, const vector< pair< double, double >> &intron_component_params) |
Use a non-default intron length distribution. More... | |
void | set_max_merge_supression_length () |
Decide how long of a tail alignment we want before we allow its subpath to be merged. More... | |
void | set_read_1_adapter (const string &adapter) |
void | set_read_2_adapter (const string &adapter) |
virtual void | set_alignment_scores (int8_t match, int8_t mismatch, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus) |
Set all the aligner scoring parameters and create the stored aligner instances. More... | |
virtual void | set_alignment_scores (std::istream &matrix_stream, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus) |
virtual void | set_alignment_scores (const int8_t *score_matrix, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus) |
Public Member Functions inherited from vg::BaseMapper | |
BaseMapper (PathPositionHandleGraph *xidex, gcsa::GCSA *g, gcsa::LCPArray *a, haplo::ScoreProvider *haplo_score_provider=nullptr) | |
BaseMapper (void) | |
int | random_match_length (double chance_random) |
void | set_alignment_scores (int8_t match, int8_t mismatch, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus, double haplotype_consistency_exponent=1) |
Override alignment score setting to support haplotype consistency exponent. More... | |
void | set_alignment_scores (istream &matrix_stream, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus, double haplotype_consistency_exponent=1) |
Same, but loading a 4x4 substitution score matrix from a stream. More... | |
vector< MaximalExactMatch > | find_mems_deep (string::const_iterator seq_begin, string::const_iterator seq_end, double &lcp_avg, double &fraction_filtered, int max_mem_length=0, int min_mem_length=1, int reseed_length=0, bool use_lcp_reseed_heuristic=false, bool use_diff_based_fast_reseed=false, bool include_parent_in_sub_mem_count=false, bool record_max_lcp=false, int reseed_below_count=0) |
vector< MaximalExactMatch > | find_mems_simple (string::const_iterator seq_begin, string::const_iterator seq_end, int max_mem_length=0, int min_mem_length=1, int reseed_length=0) |
vector< MaximalExactMatch > | find_stripped_matches (string::const_iterator seq_begin, string::const_iterator seq_end, size_t strip_length, size_t max_match_length, size_t target_count) |
vector< MaximalExactMatch > | find_fanout_mems (string::const_iterator seq_begin, string::const_iterator seq_end, string::const_iterator qual_begin, int max_fans_out, char max_fanout_base_quality, vector< deque< pair< string::const_iterator, char >>> *mem_fanout_breaks=nullptr) |
vector< pos_t > | walk_fanout_path (string::const_iterator begin, string::const_iterator end, const deque< pair< string::const_iterator, char >> &fanout_breaks, gcsa::node_type pos) |
void | rescue_high_count_order_length_mems (vector< MaximalExactMatch > &mems, size_t max_rescue_hit_count) |
void | precollapse_order_length_runs (string::const_iterator seq_begin, vector< MaximalExactMatch > &mems) |
void | prefilter_redundant_sub_mems (vector< MaximalExactMatch > &mems, vector< pair< int, vector< size_t >>> &sub_mem_containment_graph) |
void | find_sub_mems (const vector< MaximalExactMatch > &mems, int parent_layer_begin, int parent_layer_end, int mem_idx, string::const_iterator next_mem_end, int min_mem_length, vector< pair< MaximalExactMatch, vector< size_t >>> &sub_mems_out) |
void | find_sub_mems_fast (const vector< MaximalExactMatch > &mems, int parent_layer_begin, int parent_layer_end, int mem_idx, string::const_iterator leftmost_guaranteed_disjoint_bound, string::const_iterator leftmost_seeding_bound, int min_sub_mem_length, vector< pair< MaximalExactMatch, vector< size_t >>> &sub_mems_out) |
gcsa::range_type | accelerate_mem_query (string::const_iterator begin, string::const_iterator &cursor) const |
set< pos_t > | sequence_positions (const string &seq) |
size_t | get_adaptive_min_reseed_length (size_t parent_mem_length) |
void | apply_haplotype_consistency_scores (const vector< Alignment * > &alns) |
Public Member Functions inherited from vg::AlignerClient | |
virtual void | set_alignment_scores (int8_t match, int8_t mismatch, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus) |
Set all the aligner scoring parameters and create the stored aligner instances. More... | |
virtual void | set_alignment_scores (std::istream &matrix_stream, int8_t gap_open, int8_t gap_extend, int8_t full_length_bonus) |
Public Member Functions inherited from vg::PairedEndMapper | |
void | set_fragment_length_distr_params (size_t maximum_sample_size=1000, size_t reestimation_frequency=1000, double robust_estimation_fraction=0.95) |
bool | has_fixed_fragment_length_distr () |
Returns true if fragment length distribution has been fixed. More... | |
void | force_fragment_length_distr (double mean, double stddev) |
Protected Types | |
enum | SpliceStrand { Undetermined, Forward, Reverse } |
Enum for the strand of a splice alignment's splice motifs. More... | |
Protected Member Functions | |
bool | attempt_unpaired_multipath_map_of_pair (const Alignment &alignment1, const Alignment &alignment2, vector< pair< multipath_alignment_t, multipath_alignment_t >> &multipath_aln_pairs_out, vector< pair< Alignment, Alignment >> &ambiguous_pair_buffer) |
bool | attempt_rescue (const multipath_alignment_t &multipath_aln, const Alignment &other_aln, bool rescue_forward, multipath_alignment_t &rescue_multipath_aln) |
bool | do_rescue_alignment (const multipath_alignment_t &multipath_aln, const Alignment &other_aln, bool rescue_forward, multipath_alignment_t &rescue_multipath_aln, double rescue_mean_length, double num_std_devs) const |
void | extract_rescue_graph (const multipath_alignment_t &multipath_aln, const Alignment &other_aln, bool rescue_forward, MutableHandleGraph *rescue_graph, double rescue_mean_length, double num_std_devs) const |
Use the algorithm implied by the mapper settings to extract a subgraph to perform a rescue alignment against. More... | |
void | align_to_cluster_graphs (const Alignment &alignment, vector< clustergraph_t > &cluster_graphs, vector< multipath_alignment_t > &multipath_alns_out, vector< double > &multiplicities_out, size_t num_mapping_attempts, const match_fanouts_t *fanouts=nullptr, vector< size_t > *cluster_idxs=nullptr) |
void | align_to_cluster_graph_pairs (const Alignment &alignment1, const Alignment &alignment2, vector< clustergraph_t > &cluster_graphs1, vector< clustergraph_t > &cluster_graphs2, vector< pair< multipath_alignment_t, multipath_alignment_t >> &multipath_aln_pairs_out, vector< pair< pair< size_t, size_t >, int64_t >> &cluster_pairs, vector< double > &pair_multiplicities, vector< pair< size_t, size_t >> &duplicate_pairs_out, const match_fanouts_t *fanouts1, const match_fanouts_t *fanouts2) |
bool | align_to_cluster_graphs_with_rescue (const Alignment &alignment1, const Alignment &alignment2, vector< clustergraph_t > &cluster_graphs1, vector< clustergraph_t > &cluster_graphs2, vector< MaximalExactMatch > &mems1, vector< MaximalExactMatch > &mems2, vector< pair< multipath_alignment_t, multipath_alignment_t >> &multipath_aln_pairs_out, vector< pair< pair< size_t, size_t >, int64_t >> &pair_distances_out, vector< double > &pair_multiplicities_out, const match_fanouts_t *fanouts1, const match_fanouts_t *fanouts2) |
void | attempt_rescue_for_secondaries (const Alignment &alignment1, const Alignment &alignment2, vector< clustergraph_t > &cluster_graphs1, vector< clustergraph_t > &cluster_graphs2, vector< pair< size_t, size_t >> &duplicate_pairs, vector< pair< multipath_alignment_t, multipath_alignment_t >> &multipath_aln_pairs_out, vector< pair< pair< size_t, size_t >, int64_t >> &cluster_pairs, vector< double > &pair_multiplicities, const match_fanouts_t *fanouts1, const match_fanouts_t *fanouts2) |
void | merge_rescued_mappings (vector< pair< multipath_alignment_t, multipath_alignment_t >> &multipath_aln_pairs_out, vector< pair< pair< size_t, size_t >, int64_t >> &cluster_pairs, vector< double > &pair_multiplicities, vector< pair< multipath_alignment_t, multipath_alignment_t >> &rescued_multipath_aln_pairs, vector< pair< pair< size_t, size_t >, int64_t >> &rescued_cluster_pairs, vector< double > &rescued_multiplicities) const |
Merge the rescued mappings into the output vector and deduplicate pairs. More... | |
vector< memcluster_t > | get_clusters (const Alignment &alignment, const vector< MaximalExactMatch > &mems, OrientedDistanceMeasurer *distance_measurer=nullptr, const match_fanouts_t *fanouts=nullptr) const |
vector< pair< pair< size_t, size_t >, int64_t > > | get_cluster_pairs (const Alignment &alignment1, const Alignment &alignment2, vector< clustergraph_t > &cluster_graphs1, vector< clustergraph_t > &cluster_graphs2, OrientedDistanceMeasurer *distance_measurer=nullptr) |
vector< clustergraph_t > | query_cluster_graphs (const Alignment &alignment, const vector< MaximalExactMatch > &mems, const vector< memcluster_t > &clusters) const |
pair< unique_ptr< bdsg::HashGraph >, bool > | extract_cluster_graph (const Alignment &alignment, const memcluster_t &mem_cluster) const |
pair< unique_ptr< bdsg::HashGraph >, bool > | extract_maximal_graph (const Alignment &alignment, const memcluster_t &mem_cluster) const |
pair< unique_ptr< bdsg::HashGraph >, bool > | extract_restrained_graph (const Alignment &alignment, const memcluster_t &mem_cluster) const |
vector< pair< int64_t, int64_t > > | covered_intervals (const Alignment &alignment, const clustergraph_t &cluster) const |
Returns the union of the intervals on the read that a cluster cover in sorted order. More... | |
void | split_multicomponent_alignments (vector< multipath_alignment_t > &multipath_alns_out, const Alignment *alignment=nullptr, vector< clustergraph_t > *cluster_graphs=nullptr, vector< size_t > *cluster_idxs=nullptr, vector< double > *multiplicities=nullptr) const |
void | split_multicomponent_alignments (const Alignment &alignment1, const Alignment &alignment2, vector< pair< multipath_alignment_t, multipath_alignment_t >> &multipath_aln_pairs_out, vector< clustergraph_t > &cluster_graphs1, vector< clustergraph_t > &cluster_graphs2, vector< pair< pair< size_t, size_t >, int64_t >> &cluster_pairs, vector< double > &multiplicities) const |
void | simplify_complicated_multipath_alignment (multipath_alignment_t &multipath_aln) const |
If the alignment seems very complicated, try to simplify low-scoring parts out of it. More... | |
void | reassign_split_clusters (const Alignment &alignment, vector< clustergraph_t > &cluster_graphs, const vector< const multipath_alignment_t * > &split_mp_alns, const vector< size_t * > &cluster_assignments, const vector< size_t * > &all_cluster_assignments) const |
void | agglomerate_alignments (vector< multipath_alignment_t > &multipath_alns_out, vector< double > *multiplicities=nullptr) |
void | agglomerate_alignment_pairs (vector< pair< multipath_alignment_t, multipath_alignment_t >> &multipath_aln_pairs_out, vector< pair< pair< size_t, size_t >, int64_t >> &cluster_pairs, vector< double > &multiplicities) |
void | purge_unmapped_alignments (vector< multipath_alignment_t > &multipath_alns_out) |
void | purge_unmapped_alignments (vector< pair< multipath_alignment_t, multipath_alignment_t >> &multipath_aln_pairs_out, bool proper_paired) |
void | agglomerate (size_t idx, multipath_alignment_t &agglomerating, const multipath_alignment_t &multipath_aln, vector< size_t > &agglomerated_group, unordered_set< pos_t > &agg_start_positions, unordered_set< pos_t > &agg_end_positions) const |
The internal agglomeration procedure. More... | |
bool | find_spliced_alignments (const Alignment &alignment, vector< multipath_alignment_t > &multipath_alns_out, vector< double > &multiplicities, vector< size_t > &cluster_idxs, const vector< MaximalExactMatch > &mems, vector< clustergraph_t > &cluster_graphs, const match_fanouts_t *fanouts=nullptr, const multipath_alignment_t *rescue_anchor=nullptr, bool rescue_left=false, double rescue_multiplicity=1.0) |
bool | find_spliced_alignments (const Alignment &alignment1, const Alignment &alignment2, vector< pair< multipath_alignment_t, multipath_alignment_t >> &multipath_aln_pairs_out, vector< pair< pair< size_t, size_t >, int64_t >> &cluster_pairs, vector< double > &pair_multiplicities, const vector< MaximalExactMatch > &mems1, const vector< MaximalExactMatch > &mems2, vector< clustergraph_t > &cluster_graphs1, vector< clustergraph_t > &cluster_graphs2, const match_fanouts_t *fanouts=nullptr) |
void | identify_aligned_splice_candidates (const Alignment &alignment, bool search_left, const pair< int64_t, int64_t > &primary_interval, const vector< multipath_alignment_t > &multipath_alns, const vector< size_t > &cluster_idxs, const vector< int64_t > ¤t_index, int64_t anchor, unordered_set< size_t > &clusters_used_out, vector< size_t > &mp_aln_candidates_out) const |
void | identify_aligned_splice_candidates (const Alignment &alignment, bool read_1, bool search_left, const pair< int64_t, int64_t > &primary_interval, const vector< pair< multipath_alignment_t, multipath_alignment_t >> &multipath_aln_pairs, const vector< pair< pair< size_t, size_t >, int64_t >> &cluster_pairs, const vector< int64_t > ¤t_index, int64_t anchor, unordered_set< size_t > &clusters_used_out, vector< size_t > &mp_aln_candidates_out) const |
void | identify_unaligned_splice_candidates (const Alignment &alignment, bool search_left, const pair< int64_t, int64_t > &primary_interval, const vector< MaximalExactMatch > &mems, const vector< clustergraph_t > &cluster_graphs, const unordered_set< size_t > &clusters_already_used, vector< size_t > &cluster_candidates_out, vector< pair< const MaximalExactMatch *, pos_t >> &hit_candidates_out) const |
void | align_to_splice_candidates (const Alignment &alignment, vector< clustergraph_t > &cluster_graphs, const vector< MaximalExactMatch > &mems, const vector< size_t > &cluster_candidates, const vector< pair< const MaximalExactMatch *, pos_t >> &hit_candidates, const pair< int64_t, int64_t > &primary_interval, bool searching_left, bool is_read_1, unordered_map< candidate_id_t, pair< multipath_alignment_t, double >> &unaligned_candidate_bank, vector< candidate_id_t > &candidates_out, const match_fanouts_t *mem_fanouts=nullptr) const |
Make alignments for the splice alignment cancidates from MEMs and unaligned clusters. More... | |
bool | test_splice_candidates (const Alignment &alignment, bool searching_left, multipath_alignment_t &anchor_mp_aln, double *anchor_multiplicity_out, SpliceStrand &strand, int64_t num_candidates, const function< const multipath_alignment_t &(int64_t)> &get_candidate, const function< double(int64_t)> &get_multiplicity, const function< multipath_alignment_t &&(int64_t)> &consume_candidate) const |
bool | attempt_rescue_for_splice_segment (const Alignment &alignment, const pair< int64_t, int64_t > &primary_interval, const multipath_alignment_t &rescue_anchor, bool rescue_left, multipath_alignment_t &rescued_out) const |
bool | find_rescuable_spliced_alignments (const Alignment &alignment, multipath_alignment_t &splice_anchor, double &anchor_multiplicity, SpliceStrand &strand, const multipath_alignment_t &rescue_anchor, double rescue_multiplicity, bool rescue_left, const pair< int64_t, int64_t > &primary_interval) const |
See if we can find a spliced alignment segment by aligning between the pair. More... | |
bool | retry_pairing_spliced_alignments (const Alignment &alignment1, const Alignment &alignment2, vector< multipath_alignment_t > &multipath_alns_1, vector< multipath_alignment_t > &multipath_alns_2, const vector< size_t > &cluster_idxs_1, const vector< size_t > &cluster_idxs_2, const vector< double > &multiplicities_1, const vector< double > &multiplicities_2, vector< pair< multipath_alignment_t, multipath_alignment_t >> &multipath_aln_pairs_out, vector< pair< pair< size_t, size_t >, int64_t >> &cluster_pairs_out, vector< double > &pair_multiplicities_out) const |
void | multipath_align (const Alignment &alignment, clustergraph_t &cluster_graph, multipath_alignment_t &multipath_aln_out, const match_fanouts_t *fanouts) const |
bool | expand_for_softclips (clustergraph_t &cluster_graph, const multipath_alignment_t &multipath_aln) const |
void | make_nontrivial_multipath_alignment (const Alignment &alignment, const HandleGraph &subgraph, const function< pair< id_t, bool >(id_t)> &translator, multipath_alignment_t &multipath_aln_out) const |
void | strip_full_length_bonuses (multipath_alignment_t &multipath_aln) const |
Remove the full length bonus from all source or sink subpaths that received it. More... | |
vector< double > | mapping_likelihoods (vector< multipath_alignment_t > &multipath_alns) const |
Returns a vector of log-likelihoods for each mapping. More... | |
vector< double > | pair_mapping_likelihoods (vector< pair< multipath_alignment_t, multipath_alignment_t >> &multipath_aln_pairs, const vector< pair< pair< size_t, size_t >, int64_t >> &cluster_pairs) const |
Returns a vector of log-likelihoods for each pair mapping. More... | |
vector< int32_t > | compute_raw_mapping_qualities_from_scores (const vector< double > &scores, bool have_qualities, const vector< double > *multiplicities=nullptr) const |
void | sort_and_compute_mapping_quality (vector< multipath_alignment_t > &multipath_alns, vector< size_t > *cluster_idxs=nullptr, vector< double > *multiplicities=nullptr) const |
void | sort_and_compute_mapping_quality (vector< pair< multipath_alignment_t, multipath_alignment_t >> &multipath_aln_pairs, vector< pair< pair< size_t, size_t >, int64_t >> &cluster_pairs, vector< pair< size_t, size_t >> *duplicate_pairs_out=nullptr, vector< double > *pair_multiplicities=nullptr) const |
double | estimate_missed_rescue_multiplicity (size_t which_pair, const vector< pair< pair< size_t, size_t >, int64_t >> &cluster_pairs, const vector< clustergraph_t > &cluster_graphs1, const vector< clustergraph_t > &cluster_graphs2, bool from_secondary_rescue) const |
double | cluster_multiplicity (const memcluster_t &cluster) const |
double | pair_cluster_multiplicity (const memcluster_t &cluster_1, const memcluster_t &cluster_2) const |
double | fragment_length_log_likelihood (int64_t length) const |
Computes the log-likelihood of a given fragment length in the trained distribution. More... | |
bool | likely_mismapping (const multipath_alignment_t &multipath_aln) |
Would an alignment this good be expected against a graph this big by chance alone. More... | |
bool | likely_misrescue (const multipath_alignment_t &multipath_aln) |
Would an alignment this good be expected against a graph this big by chance alone. More... | |
int64_t | pseudo_length (const multipath_alignment_t &multipath_aln) const |
A scaling of a score so that it approximately follows the distribution of the longest match in p-value test. More... | |
double | random_match_p_value (int64_t match_length, size_t read_length) |
The approximate p-value for a match length of the given size against the current graph. More... | |
match_fanouts_t | record_fanouts (const vector< MaximalExactMatch > &mems, vector< deque< pair< string::const_iterator, char >>> &fanouts) const |
Reorganizes the fan-out breaks into the format that MultipathAlignmentGraph wants it in. More... | |
unique_ptr< OrientedDistanceMeasurer > | get_distance_measurer (MemoizingGraph &memoizing_graph) const |
Get a distance measurer based on the configuartion of the mapper. More... | |
int64_t | distance_between (const multipath_alignment_t &multipath_aln_1, const multipath_alignment_t &multipath_aln_2, bool full_fragment=false, bool forward_strand=false) const |
int64_t | distance (const pos_t &pos_1, const pos_t &pos_2) const |
bool | are_consistent (const multipath_alignment_t &multipath_aln_1, const multipath_alignment_t &multipath_aln_2) const |
Are two multipath alignments consistently placed based on the learned fragment length distribution? More... | |
bool | is_consistent (int64_t distance) const |
Is this a consistent inter-pair distance based on the learned fragment length distribution? More... | |
bool | share_terminal_positions (const multipath_alignment_t &multipath_aln_1, const multipath_alignment_t &multipath_aln_2) const |
haploMath::RRMemo & | get_rr_memo (double recombination_penalty, size_t population_size) const |
Get a thread_local RRMemo with these parameters. More... | |
void | establish_strand_consistency (vector< pair< multipath_alignment_t, multipath_alignment_t >> &multipath_aln_pairs, vector< pair< pair< size_t, size_t >, int64_t >> &cluster_pairs) |
int64_t | pessimistic_gap (int64_t length, double multiplier) const |
A restrained estimate of the amount of gap we would like to align for a read tail. More... | |
vector< MaximalExactMatch > | find_mems (const Alignment &alignment, vector< deque< pair< string::const_iterator, char >>> *mem_fanout_breaks=nullptr) |
Protected Member Functions inherited from vg::AlignerClient | |
AlignerClient (double gc_content_estimate=vg::default_gc_content) | |
const GSSWAligner * | get_aligner (bool have_qualities=true) const |
const QualAdjAligner * | get_qual_adj_aligner () const |
const Aligner * | get_regular_aligner () const |
Static Protected Member Functions | |
static void | set_read_coverage (clustergraph_t &cluster_graph) |
Computes the number of read bases a cluster of MEM hits covers. More... | |
Protected Attributes | |
string | read_1_adapter = "" |
string | read_2_adapter = "" |
vector< size_t > | read_1_adapter_lps |
vector< size_t > | read_2_adapter_lps |
int64_t | min_softclip_length_for_splice = 20 |
int64_t | min_softclipped_score_for_splice = 25 |
double | no_splice_natural_log_odds = 22.55 |
int32_t | no_splice_log_odds = 16 |
DinucleotideMachine | dinuc_machine |
SpliceStats | splice_stats |
SnarlManager * | snarl_manager |
SnarlDistanceIndex * | distance_index |
unique_ptr< PathComponentIndex > | path_component_index |
Protected Attributes inherited from vg::PairedEndMapper | |
FragmentLengthDistribution | fragment_length_distr |
Holds the actual fragment length distribution and estimation information. More... | |
Static Protected Attributes | |
static const size_t | RESCUED = numeric_limits<size_t>::max() |
static thread_local unordered_map< pair< double, size_t >, haploMath::RRMemo > | rr_memos |
Memos used by population model. More... | |
static thread_local unordered_map< pair< int64_t, size_t >, double > | p_value_memo |
static thread_local unordered_map< double, vector< int64_t > > | pessimistic_gap_memo |
static const size_t | gap_memo_max_size = 1000 |
Additional Inherited Members | |
Static Public Member Functions inherited from vg::BaseMapper | |
static double | estimate_gc_content (const gcsa::GCSA *gcsa) |
Static Public Member Functions inherited from vg::AlignerClient | |
static int8_t * | parse_matrix (std::istream &matrix_stream) |
Allocates an array to hold a 4x4 substitution matrix and returns it. More... | |
Static Public Attributes inherited from vg::BaseMapper | |
static thread_local vector< size_t > | adaptive_reseed_length_memo |
using vg::MultipathMapper::candidate_id_t = tuple<bool, int64_t, const MaximalExactMatch*, pos_t> |
Unique identifier for an unaligned splicing candidate. Specified by:
using vg::MultipathMapper::clustergraph_t = tuple<unique_ptr<bdsg::HashGraph>, memcluster_t, size_t> |
This represents a graph for a cluster, and holds a pointer to the actual extracted graph, a list of assigned MEMs, and the number of bases of read coverage that that MEM cluster provides (which serves as a priority).
using vg::MultipathMapper::match_fanouts_t = unordered_map<const MaximalExactMatch*, deque<pair<string::const_iterator, char> >> |
Represents the mismatches that were allowed in "MEMs" from the fanout match algorithm
using vg::MultipathMapper::memcluster_t = pair<vector<pair<const MaximalExactMatch*, pos_t> >, double> |
We often pass around clusters of MEMs and their graph positions, paired with a multiplicity.
|
protected |
vg::MultipathMapper::MultipathMapper | ( | PathPositionHandleGraph * | graph, |
gcsa::GCSA * | gcsa_index, | ||
gcsa::LCPArray * | lcp_array, | ||
haplo::ScoreProvider * | haplo_score_provider = nullptr , |
||
SnarlManager * | snarl_manager = nullptr , |
||
SnarlDistanceIndex * | distance_index = nullptr |
||
) |
vg::MultipathMapper::~MultipathMapper | ( | ) |
|
protected |
The internal agglomeration procedure.
|
protected |
Combine all of the significant alignments into one pair. Requires alignments to be sorted by significance already
|
protected |
Combine all of the significant alignments into one. Requires alignments to be sorted by significance already
|
protected |
After clustering MEMs, extracting graphs, assigning hits to cluster graphs, and determining which cluster graph pairs meet the fragment length distance constraints, perform multipath alignment Produces topologically sorted multipath_alignment_ts.
|
protected |
After clustering MEMs, extracting graphs, and assigning hits to cluster graphs, perform multipath alignment. Produces topologically sorted multipath_alignment_ts.
|
protected |
Align the read ends independently, but also try to form rescue alignments for each from the other. Return true if output obeys pair consistency and false otherwise. Produces topologically sorted multipath_alignment_ts.
|
protected |
Make alignments for the splice alignment cancidates from MEMs and unaligned clusters.
|
protected |
Are two multipath alignments consistently placed based on the learned fragment length distribution?
|
protected |
Extracts a section of graph at a distance from the multipath_alignment_t based on the fragment length distribution and attempts to align the other paired read to it. If rescuing forward, assumes the provided multipath_alignment_t is the first read and vice versa if rescuing backward. Rescue constructs a conventional local alignment with gssw and converts the Alignment to a multipath_alignment_t. The multipath_alignment_t will be stored in the object passed by reference as an argument.
|
protected |
Use the rescue routine on strong suboptimal clusters to see if we can find a good secondary. Produces topologically sorted multipath_alignment_ts.
|
protected |
Try to rescue an anchor for a missing spliced alignment section between the reads in a pair
|
protected |
Before the fragment length distribution has been estimated, look for an unambiguous mapping of the reads using the single ended routine. If we find one record the fragment length and report the pair, if we don't find one, add the read pair to a buffer instead of the output vector. Returns true if we successfully find a measurable pair.
void vg::MultipathMapper::calibrate_mismapping_detection | ( | size_t | num_simulations, |
const vector< size_t > & | simulated_read_lengths | ||
) |
Map random sequences against the graph to calibrate a parameterized distribution that detects when mappings are likely to have occurred by chance
|
protected |
Estimates the number of equivalent mappings (including this one), which we may not have seen due to limits on the numbers of hits returns for a MEM
|
protected |
Compute a mapping quality from a list of scores, using the selected method. Optionally considers non-present duplicates of the scores encoded as multiplicities Depending on settings, may only return mapping qualities for a prefix of the scores
|
protected |
Returns the union of the intervals on the read that a cluster cover in sorted order.
void vg::MultipathMapper::determine_distance_correlation | ( | ) |
Experimental: skeleton code for predicting path distance from minimum distance.
|
protected |
Compute the approximate distance between two multipath alignments If either is unmapped, or the distance cannot be obtained, returns numeric_limits<int64_t>::max()
|
protected |
Make an alignment to a rescue graph and translate it back to the original node space Returns false if the alignment fails, but does not check statistical significance
|
protected |
Detects if each pair can be assigned to a consistent strand of a path, and if not removes them. Also inverts the distances in the cluster pairs vector according to the strand
|
protected |
Estimates the number of equivalent mappings (including this one), which we may not have seen due to unexplored rescues.
|
protected |
If any softclips could have arisen because not enough graph was extracted, extract extra graph in those areas. Returns true if the graph was expanded.
|
protected |
Return a graph (on the heap) that contains a cluster. The paired bool indicates whether the graph is known to be connected (but it is possible for the graph to be connected and have it return false)
|
protected |
Extract a graph that is guaranteed to contain all local alignments that include the MEMs of the cluster. The paired bool indicates whether the graph is known to be connected (but it is possible for the graph to be connected and have it return false)
|
protected |
Use the algorithm implied by the mapper settings to extract a subgraph to perform a rescue alignment against.
|
protected |
Extract a graph with an algorithm that tries to extract not much more than what is required to contain the cluster in a single connected component (can be slower than the maximal algorithm for alignments that require large indels), The paired bool indicates whether the graph is known to be connected (but it is possible for the graph to be connected and have it return false)
|
protected |
Return exact matches according to the object's parameters If using the fan-out algorithm, we can optionally leave fan-out MEMs in tact and return a vector of their breaks.
|
protected |
See if we can find a spliced alignment segment by aligning between the pair.
|
protected |
Look for spliced alignments among the results of various stages in the mapping algorithm Returns true if any spliced alignments were made
|
protected |
Look for spliced alignments among the results of various stages in the mapping algorithm for pairs Returns true if any spliced alignments were made
|
protected |
Computes the log-likelihood of a given fragment length in the trained distribution.
|
protected |
Use the oriented distance clusterer or the TVS clusterer to cluster pairs of clusters. Assumes that the fragment length distribution has been estimated and fixed.
|
protected |
Use the oriented distance clusterer or the TVS clusterer to cluster MEMs depending on parameters. If using oriented distance cluster, must alo provide an oriented distance measurer.
|
protected |
Get a distance measurer based on the configuartion of the mapper.
|
protected |
Get a thread_local RRMemo with these parameters.
|
protected |
Find candidates for spliced alignment sections for a given multipath alignment among the aligned cluster pairs
|
protected |
Find candidates for spliced alignment sections for a given multipath alignment among the aligned clusters
|
protected |
Find candidates for spliced alignment sections for a given multipath alignment among the unaligned clusters and MEMs
|
protected |
Is this a consistent inter-pair distance based on the learned fragment length distribution?
|
protected |
Would an alignment this good be expected against a graph this big by chance alone.
|
protected |
Would an alignment this good be expected against a graph this big by chance alone.
|
protected |
Removes the sections of an Alignment's path within snarls and re-aligns them with multiple traceback to create a multipath alignment with non-trivial topology. Guarantees that the resulting multipath_alignment_t is in topological order.
|
protected |
Returns a vector of log-likelihoods for each mapping.
Get all the linearizations we are going to work with, possibly with duplicates. The first alignment will be optimal.
|
protected |
Merge the rescued mappings into the output vector and deduplicate pairs.
|
protected |
Make a multipath alignment of the read against the indicated graph and add it to the list of multimappings. Does NOT necessarily produce a multipath_alignment_t in topological order.
void vg::MultipathMapper::multipath_map | ( | const Alignment & | alignment, |
vector< multipath_alignment_t > & | multipath_alns_out | ||
) |
Map read in alignment to graph and make multipath alignments.
bool vg::MultipathMapper::multipath_map_paired | ( | const Alignment & | alignment1, |
const Alignment & | alignment2, | ||
vector< pair< multipath_alignment_t, multipath_alignment_t >> & | multipath_aln_pairs_out, | ||
vector< pair< Alignment, Alignment >> & | ambiguous_pair_buffer | ||
) |
Map a paired read to the graph and make paired multipath alignments. Assumes reads are on the same strand of the DNA/RNA molecule. If the fragment length distribution is still being estimated and the pair cannot be mapped unambiguously, adds the reads to a buffer for ambiguous pairs and does not output any multipath alignments. Returns true if the output is properly paired, or false if it is independent end mappings.
|
protected |
Estimates the number of equivalent pair mappings (including this one), which we may not have seen due to limits on the numbers of hits returns for a MEM
|
protected |
Returns a vector of log-likelihoods for each pair mapping.
|
protected |
A restrained estimate of the amount of gap we would like to align for a read tail.
|
protected |
A scaling of a score so that it approximately follows the distribution of the longest match in p-value test.
|
protected |
Before returning, remove alignments that are likely noise and add a placeholder for an unmapped read if necessary
|
protected |
Before returning, remove alignments that are likely noise and add placeholders for unmapped reads if necessary
|
protected |
Extracts a subgraph around each cluster of MEMs that encompasses any graph position reachable (according to the Mapper's aligner) with local alignment anchored at the MEMs. If any subgraphs overlap, they are merged into one subgraph. Returns a vector of all the merged cluster subgraphs, their MEMs assigned from the mems vector according to the MEMs' hits, and their read coverages in bp. The caller must delete the VG objects produced!
|
protected |
The approximate p-value for a match length of the given size against the current graph.
|
protected |
Helper function to be called by split_multicomponent_alignments to reassign hits to the split clusters
|
protected |
Reorganizes the fan-out breaks into the format that MultipathAlignmentGraph wants it in.
void vg::MultipathMapper::reduce_to_single_path | ( | const multipath_alignment_t & | multipath_aln, |
vector< Alignment > & | alns_out, | ||
size_t | max_number | ||
) | const |
Given a mapped multipath_alignment_t, reduce it to up to max_number + 1 nonoverlapping single path alignments, with mapping qualities accounting for positional uncertainty between them. Even if the read is unmapped, there will always be at least one (possibly score 0) output alignment.
|
protected |
Check if any of the unpaired spliced alignments can make pairs now If any pairs are identified, can invalidate the input alignments
void vg::AlignerClient::set_alignment_scores |
Set the algner scoring parameters and create the stored aligner instances. The score matrix should by a 4 x 4 array in the order (ACGT). Other overloads of set_alignment_scores all call this one.
|
virtual |
Set the algner scoring parameters and create the stored aligner instances. The score matrix should by a 4 x 4 array in the order (ACGT)
Reimplemented from vg::AlignerClient.
void vg::AlignerClient::set_alignment_scores |
Set all the aligner scoring parameters and create the stored aligner instances.
void vg::AlignerClient::set_alignment_scores |
Set the algner scoring parameters and create the stored aligner instances. The stream should contain a 4 x 4 whitespace-separated substitution matrix (in the order ACGT)
void vg::MultipathMapper::set_automatic_min_clustering_length | ( | double | random_mem_probability = 0.5 | ) |
Sets the minimum clustering MEM length to the approximate length that a MEM would have to be to have at most the given probability of occurring in random sequence of the same size as the graph
void vg::MultipathMapper::set_intron_length_distribution | ( | const vector< double > & | intron_mixture_weights, |
const vector< pair< double, double >> & | intron_component_params | ||
) |
Use a non-default intron length distribution.
void vg::MultipathMapper::set_log_odds_against_splice | ( | double | log_odds | ) |
What should the prior odds against a spliced alignment be?
void vg::MultipathMapper::set_max_merge_supression_length | ( | ) |
Decide how long of a tail alignment we want before we allow its subpath to be merged.
void vg::MultipathMapper::set_min_softclip_length_for_splice | ( | size_t | length | ) |
How big of a softclip should lead us to attempt spliced alignment?
void vg::MultipathMapper::set_read_1_adapter | ( | const string & | adapter | ) |
Use adapter sequences to help identify soft-clips that should not be splice-aligned, sequences should be ~12 bp presented in the orientation that a trimmable sequence is found in the sequencing data (reverse complement to the actual sequence, since it is encountered on the other read)
void vg::MultipathMapper::set_read_2_adapter | ( | const string & | adapter | ) |
|
staticprotected |
Computes the number of read bases a cluster of MEM hits covers.
|
protected |
Return true if any of the initial positions of the source Subpaths are shared between the two multipath alignments
|
protected |
If the alignment seems very complicated, try to simplify low-scoring parts out of it.
|
protected |
Sorts mappings by score and store mapping quality of the optimal alignment in the multipath_alignment_t object Optionally also sorts a vector of indexes to keep track of the cluster-of-origin Allows multipath alignments where the best single path alignment is leaving the read unmapped. multipath_alignment_ts MUST be topologically sorted.
|
protected |
Sorts mappings by score and store mapping quality of the optimal alignment in the multipath_alignment_t object If there are ties between scores, breaks them by the expected distance between pairs as computed by the OrientedDistanceClusterer::cluster_pairs function (modified cluster_pairs vector) Allows multipath alignments where the best single path alignment is leaving the read unmapped. multipath_alignment_ts MUST be topologically sorted. Optionally considers non-present duplicates of the scores encoded as multiplicities
|
protected |
If there are any multipath_alignment_ts with multiple connected components, split them up and add them to the return vector, also measure the distance between them and add a record to the cluster pairs vector. Properly handles multipath_alignment_ts that are unmapped. Does not depend on or guarantee topological order in the multipath_alignment_ts.
|
protected |
If there are any multipath_alignment_ts with multiple connected components, split them up and add them to the return vector. Properly handles multipath_alignment_ts that are unmapped. Does not depend on or guarantee topological order in the multipath_alignment_ts.
|
protected |
Remove the full length bonus from all source or sink subpaths that received it.
|
protected |
Check whether splice segment candidates can form a statistically significant spliced alignment. Returns true if a spliced alignment is made
bool vg::MultipathMapper::agglomerate_multipath_alns = false |
size_t vg::MultipathMapper::alt_anchor_max_length_diff = 5 |
bool vg::MultipathMapper::always_check_population = false |
std::function<size_t(const Alignment&, const HandleGraph&)> vg::MultipathMapper::choose_band_padding |
bool vg::MultipathMapper::component_min_dist = false |
|
protected |
|
protected |
bool vg::MultipathMapper::do_spliced_alignment = false |
bool vg::MultipathMapper::dynamic_max_alt_alns = false |
size_t vg::MultipathMapper::force_haplotype_count = 0 |
size_t vg::MultipathMapper::fragment_length_warning_factor = 0 |
|
staticprotected |
bool vg::MultipathMapper::get_rescue_graph_from_paths = true |
bool vg::MultipathMapper::greedy_min_dist = false |
double vg::MultipathMapper::log_likelihood_approx_factor = 1.0 |
double vg::MultipathMapper::mapq_scaling_factor = 1.0 |
size_t vg::MultipathMapper::max_alignment_gap = 5000 |
size_t vg::MultipathMapper::max_alt_mappings = 1 |
size_t vg::MultipathMapper::max_branch_trim_length = 1 |
size_t vg::MultipathMapper::max_dagify_duplications = 10 |
size_t vg::MultipathMapper::max_expected_dist_approx_error = 8 |
double vg::MultipathMapper::max_exponential_rate_intercept = 0.612045 |
double vg::MultipathMapper::max_exponential_rate_slope = 0.000555181 |
double vg::MultipathMapper::max_exponential_shape_intercept = 12.136 |
double vg::MultipathMapper::max_exponential_shape_slope = 0.0113637 |
int vg::MultipathMapper::max_fanout_base_quality = 20 |
int vg::MultipathMapper::max_fans_out = 5 |
int64_t vg::MultipathMapper::max_intron_length = 1 << 18 |
double vg::MultipathMapper::max_mapping_p_value = 0.0001 |
size_t vg::MultipathMapper::max_motif_pairs = 1024 |
size_t vg::MultipathMapper::max_p_value_memo_size = 500 |
size_t vg::MultipathMapper::max_rescue_attempts = 32 |
double vg::MultipathMapper::max_rescue_p_value = 0.1 |
size_t vg::MultipathMapper::max_single_end_mappings_for_rescue = 64 |
int64_t vg::MultipathMapper::max_snarl_cut_size = 5 |
int64_t vg::MultipathMapper::max_softclip_overlap = 8 |
int64_t vg::MultipathMapper::max_splice_overhang = 3 |
int64_t vg::MultipathMapper::max_splice_ref_search_length = 32 |
double vg::MultipathMapper::max_suboptimal_path_score_ratio = 2.0 |
size_t vg::MultipathMapper::max_tail_merge_supress_length = 4 |
double vg::MultipathMapper::mem_coverage_min_ratio = 0.5 |
size_t vg::MultipathMapper::min_clustering_mem_length = 0 |
size_t vg::MultipathMapper::min_median_mem_coverage_for_split = 0 |
|
protected |
|
protected |
int64_t vg::MultipathMapper::min_splice_rescue_matches = 6 |
size_t vg::MultipathMapper::min_tail_anchor_length = 3 |
bool vg::MultipathMapper::no_clustering = false |
|
protected |
|
protected |
int32_t vg::MultipathMapper::num_alt_alns = 4 |
size_t vg::MultipathMapper::num_mapping_attempts = 48 |
size_t vg::MultipathMapper::order_length_repeat_hit_max = 0 |
|
staticprotected |
|
protected |
|
staticprotected |
double vg::MultipathMapper::pessimistic_gap_multiplier = 0.0 |
size_t vg::MultipathMapper::plausible_rescue_cluster_coverage_diff = 5 |
size_t vg::MultipathMapper::population_max_paths = 10 |
size_t vg::MultipathMapper::population_paths_hard_cap = 1000 |
double vg::MultipathMapper::prune_subpaths_multiplier = 2.0 |
|
protected |
|
protected |
|
protected |
|
protected |
double vg::MultipathMapper::recombination_penalty = 20.7 |
unordered_set<path_handle_t> vg::MultipathMapper::ref_path_handles |
bool vg::MultipathMapper::report_allelic_mapq = false |
bool vg::MultipathMapper::report_group_mapq = false |
double vg::MultipathMapper::rescue_graph_std_devs = 6.0 |
size_t vg::MultipathMapper::rescue_only_anchor_max = 16 |
size_t vg::MultipathMapper::rescue_only_min = 128 |
|
staticprotected |
bool vg::MultipathMapper::restrained_graph_extraction = false |
size_t vg::MultipathMapper::reversing_walk_length = 0 |
|
staticprotected |
Memos used by population model.
size_t vg::MultipathMapper::secondary_rescue_attempts = 4 |
double vg::MultipathMapper::secondary_rescue_score_diff = 1.0 |
int32_t vg::MultipathMapper::secondary_rescue_subopt_diff = 10 |
bool vg::MultipathMapper::simplify_topologies = false |
|
protected |
double vg::MultipathMapper::splice_rescue_graph_std_devs = 3.0 |
|
protected |
size_t vg::MultipathMapper::stripped_match_alg_max_length = 0 |
size_t vg::MultipathMapper::stripped_match_alg_strip_length = 16 |
size_t vg::MultipathMapper::stripped_match_alg_target_count = 5 |
bool vg::MultipathMapper::suppress_cluster_merging = false |
bool vg::MultipathMapper::suppress_mismapping_detection = false |
bool vg::MultipathMapper::suppress_multicomponent_splitting = false |
bool vg::MultipathMapper::suppress_p_value_memoization = false |
bool vg::MultipathMapper::suppress_tail_anchors = false |
bool vg::MultipathMapper::top_tracebacks = false |
double vg::MultipathMapper::truncation_multiplicity_mq_limit = 7.0 |
bool vg::MultipathMapper::use_fanout_match_alg = false |
bool vg::MultipathMapper::use_min_dist_clusterer = false |
bool vg::MultipathMapper::use_pessimistic_tail_alignment = false |
bool vg::MultipathMapper::use_population_mapqs = false |
bool vg::MultipathMapper::use_stripped_match_alg = false |
bool vg::MultipathMapper::use_tvs_clusterer = false |