vg
tools for working with variation graphs
|
#include <snarl_caller.hpp>
Classes | |
struct | PoissonCallInfo |
Public Member Functions | |
PoissonSupportSnarlCaller (const PathHandleGraph &graph, SnarlManager &snarl_manager, TraversalSupportFinder &support_finder, const algorithms::BinnedDepthIndex &depth_index, bool use_mapq) | |
virtual | ~PoissonSupportSnarlCaller () |
void | set_baseline_error (double small_variant_error, double large_variant_error) |
Set some parameters. More... | |
void | set_insertion_bias (double insertion_threshold, double small_insertion_bias, double large_insertion_bias) |
These are multipliers applied to the errors if the site has an insertion. More... | |
virtual pair< vector< int >, unique_ptr< CallInfo > > | genotype (const Snarl &snarl, const vector< SnarlTraversal > &traversals, int ref_trav_idx, int ploidy, const string &ref_path_name, pair< size_t, size_t > ref_range) |
Get the genotype of a site. More... | |
virtual void | update_vcf_info (const Snarl &snarl, const vector< SnarlTraversal > &traversals, const vector< int > &genotype, const unique_ptr< CallInfo > &call_info, const string &sample_name, vcflib::Variant &variant) |
Update INFO and FORMAT fields of the called variant. More... | |
virtual void | update_vcf_header (string &header) const |
Define any header fields needed by the above. More... | |
Public Member Functions inherited from vg::SupportBasedSnarlCaller | |
SupportBasedSnarlCaller (const PathHandleGraph &graph, SnarlManager &snarl_manager, TraversalSupportFinder &support_finder) | |
virtual | ~SupportBasedSnarlCaller () |
void | set_min_supports (double min_mad_for_call, double min_support_for_call, double min_site_support) |
Set some of the parameters. More... | |
TraversalSupportFinder & | get_support_finder () const |
Get the traversal support finder. More... | |
virtual int | get_min_total_support_for_call () const |
Get the minimum total support for call. More... | |
virtual function< bool(const SnarlTraversal &, int iteration)> | get_skip_allele_fn () const |
Use min_alt_path_support threshold as cutoff. More... | |
Public Member Functions inherited from vg::SnarlCaller | |
virtual | ~SnarlCaller () |
Protected Member Functions | |
double | genotype_likelihood (const vector< int > &genotype, const vector< SnarlTraversal > &traversals, const set< int > &trav_subset, const vector< int > &traversal_sizes, const vector< double > &traversal_mapqs, int ref_trav_idx, double exp_depth, double depth_err, int max_trav_size, int ref_trav_size) |
vector< int > | rank_by_support (const vector< Support > &supports) |
Rank supports. More... | |
Protected Attributes | |
double | baseline_error_small = 0.005 |
Baseline error rate for smaller variants. More... | |
double | baseline_error_large = 0.01 |
Baseline error rate for larger variants. More... | |
double | insertion_bias_large = 1. |
double | insertion_bias_small = 1. |
double | insertion_threshold = 5. |
size_t | top_k = 20 |
Consider up to the top-k traversals (based on support) for genotyping. More... | |
size_t | top_m = 100 |
double | depth_padding_factor = 1. |
padding to apply wrt to longest traversal to snarl ranges when looking up binned depth More... | |
const algorithms::BinnedDepthIndex & | depth_index |
Map path name to <mean, std_err> of depth coverage from the packer. More... | |
bool | use_mapq |
MAPQ information is available from the packer and we want to use it. More... | |
Protected Attributes inherited from vg::SupportBasedSnarlCaller | |
const PathHandleGraph & | graph |
SnarlManager & | snarl_manager |
TraversalSupportFinder & | support_finder |
Get support from traversals. More... | |
int | min_total_support_for_call = 2 |
size_t | min_mad_for_filter = 1 |
size_t | min_site_depth = 4 |
double | min_alt_path_support = 0.5 |
Additional Inherited Members | |
Static Protected Member Functions inherited from vg::SupportBasedSnarlCaller | |
static int | get_best_support (const vector< Support > &supports, const vector< int > &skips) |
Get the best support out of a list of supports, ignoring skips. More... | |
static double | support_val (const Support &support) |
Relic from old code. More... | |
Find the genotype of some traversals in a site using read support and a Poisson model based on expected depth. Inspired, in part, by Paragraph, which uses a similar approach for genotyping break points
vg::PoissonSupportSnarlCaller::PoissonSupportSnarlCaller | ( | const PathHandleGraph & | graph, |
SnarlManager & | snarl_manager, | ||
TraversalSupportFinder & | support_finder, | ||
const algorithms::BinnedDepthIndex & | depth_index, | ||
bool | use_mapq | ||
) |
|
virtual |
|
virtual |
Get the genotype of a site.
Implements vg::SnarlCaller.
|
protected |
Compute likelihood of genotype as product of poisson probabilities P[allele1] * P[allle2] * P[uncalled alleles] Homozygous alleles are split into two, with half support each The (natural) logoarithm is returned If trav_subset is not empty, traversals outside that set (and genotype) will be ignored to save time
|
protected |
Rank supports.
void vg::PoissonSupportSnarlCaller::set_baseline_error | ( | double | small_variant_error, |
double | large_variant_error | ||
) |
Set some parameters.
void vg::PoissonSupportSnarlCaller::set_insertion_bias | ( | double | insertion_threshold, |
double | small_insertion_bias, | ||
double | large_insertion_bias | ||
) |
These are multipliers applied to the errors if the site has an insertion.
|
virtual |
Define any header fields needed by the above.
Implements vg::SnarlCaller.
|
virtual |
Update INFO and FORMAT fields of the called variant.
Reimplemented from vg::SupportBasedSnarlCaller.
|
protected |
Baseline error rate for larger variants.
|
protected |
Baseline error rate for smaller variants.
Error rates are different for small and large variants, which depend more on base and mapping qualities respectively. The switch threshold is in TraversalSupportFinder. Error stats from the Packer object get added to these baselines when computing the scores.
|
protected |
Map path name to <mean, std_err> of depth coverage from the packer.
|
protected |
padding to apply wrt to longest traversal to snarl ranges when looking up binned depth
|
protected |
multiply error by this much in pressence of insertion (after some testing, this does not in fact seem to help much in practice. best just to boost overall error above. hence not in CLI and off by default)
|
protected |
|
protected |
a site is an insertion if one (supported)allele is this many times bigger than another unlike above, default comes from call_main.cpp (todo: straighten this out?)
|
protected |
Consider up to the top-k traversals (based on support) for genotyping.
|
protected |
Consider up to the tom-m secondary traversals (based on support) for each top traversal (so at most top_k * top_m considered)
|
protected |
MAPQ information is available from the packer and we want to use it.