vg
tools for working with variation graphs
Public Member Functions | Private Attributes | List of all members
vg::MCMCGenotyper Class Reference

#include <mcmc_genotyper.hpp>

Public Member Functions

 MCMCGenotyper (SnarlManager &snarls, VG &graph, const int n_iterations, const int seed, const int burn_in, const int frequency)
 
unique_ptr< PhasedGenomerun_genotype (const vector< multipath_alignment_t > &reads, const double log_base) const
 
double log_target (unique_ptr< PhasedGenome > &phased_genome, const vector< multipath_alignment_t > &reads) const
 
tuple< int, const Snarl *, vector< NodeTraversal > > proposal_sample (unique_ptr< PhasedGenome > &current) const
 
int generate_discrete_uniform (minstd_rand0 &random_engine, int lower_bound, int upper_bound) const
 
double generate_continuous_uniform (const double a, const double b) const
 
unique_ptr< PhasedGenomegenerate_initial_guess () const
 
unordered_map< pair< const Snarl *, const Snarl * >, int32_t > make_snarl_map (const vector< multipath_alignment_t > &reads, PhasedGenome &phased_genome) const
 
algorithms::Graph make_snarl_graph (unordered_map< pair< const Snarl *, const Snarl * >, int32_t > map) const
 
vector< unordered_set< size_t > > karger_stein (const vector< multipath_alignment_t > &reads, PhasedGenome &genome) const
 
unordered_set< size_t > alt_proposal_sample (vector< unordered_set< size_t >> &gamma, PhasedGenome &genome) const
 

Private Attributes

SnarlManagersnarls
 
VGgraph
 
const int n_iterations
 
const int seed
 
const int burn_in
 
const int frequency
 
minstd_rand0 random_engine
 

Detailed Description

This class is a genotyper that uses MCMC to find two optimal paths through the graph given a set of aligned reads.

Constructor & Destructor Documentation

◆ MCMCGenotyper()

vg::MCMCGenotyper::MCMCGenotyper ( SnarlManager snarls,
VG graph,
const int  n_iterations,
const int  seed,
const int  burn_in,
const int  frequency 
)

Member Function Documentation

◆ alt_proposal_sample()

unordered_set< size_t > vg::MCMCGenotyper::alt_proposal_sample ( vector< unordered_set< size_t >> &  gamma,
PhasedGenome genome 
) const

◆ generate_continuous_uniform()

double vg::MCMCGenotyper::generate_continuous_uniform ( const double  a,
const double  b 
) const

Given a range [a,b] will return a random number uniformly distributed within that range

◆ generate_discrete_uniform()

int vg::MCMCGenotyper::generate_discrete_uniform ( minstd_rand0 &  random_engine,
int  lower_bound,
int  upper_bound 
) const

Generates a number randomly using the discrete uniform distribution

◆ generate_initial_guess()

unique_ptr< PhasedGenome > vg::MCMCGenotyper::generate_initial_guess ( ) const

Generate a PhasedGenome to use as an initial value in M-H Uses the two non-alt paths from the linear reference as haplotypes

◆ karger_stein()

vector< unordered_set< size_t > > vg::MCMCGenotyper::karger_stein ( const vector< multipath_alignment_t > &  reads,
PhasedGenome genome 
) const

Make a snarl graph with edge weights scored by how well mapped reads support phasing of snarl Use an alternative proposal distribution using sets generated from karger-stein min cut algorithm to escape bottlenecks leading to rapid convergence

◆ log_target()

double vg::MCMCGenotyper::log_target ( unique_ptr< PhasedGenome > &  phased_genome,
const vector< multipath_alignment_t > &  reads 
) const

Represents the poseterior distribution function returns the posterir probability

◆ make_snarl_graph()

algorithms::Graph vg::MCMCGenotyper::make_snarl_graph ( unordered_map< pair< const Snarl *, const Snarl * >, int32_t >  map) const

Generate a graph using the snarl map

◆ make_snarl_map()

unordered_map< pair< const Snarl *, const Snarl * >, int32_t > vg::MCMCGenotyper::make_snarl_map ( const vector< multipath_alignment_t > &  reads,
PhasedGenome phased_genome 
) const

Generate a map from a pair of snarls to an edge weight Uses snarls read API, reads and the optimal_score_on pahsed genome as a scoring scheme for the edge weight overlapping the snarl pair.

◆ proposal_sample()

tuple< int, const Snarl *, vector< NodeTraversal > > vg::MCMCGenotyper::proposal_sample ( unique_ptr< PhasedGenome > &  current) const

Generates a proposal sample over the desired distrubution returns a sample from the proposal distribution

◆ run_genotype()

unique_ptr< PhasedGenome > vg::MCMCGenotyper::run_genotype ( const vector< multipath_alignment_t > &  reads,
const double  log_base 
) const

Takes as input a collection of mapped reads stored as a vector of multipath alignments and uses MCMC to find two optimal paths through the graph. Output: phased genome

Member Data Documentation

◆ burn_in

const int vg::MCMCGenotyper::burn_in
private

◆ frequency

const int vg::MCMCGenotyper::frequency
private

◆ graph

VG& vg::MCMCGenotyper::graph
private

◆ n_iterations

const int vg::MCMCGenotyper::n_iterations
private

◆ random_engine

minstd_rand0 vg::MCMCGenotyper::random_engine
mutableprivate

◆ seed

const int vg::MCMCGenotyper::seed
private

◆ snarls

SnarlManager& vg::MCMCGenotyper::snarls
private

The documentation for this class was generated from the following files: