vg
tools for working with variation graphs
Public Member Functions | Protected Attributes | List of all members
vg::FlowCaller Class Reference

#include <graph_caller.hpp>

Inheritance diagram for vg::FlowCaller:
vg::GraphCaller vg::VCFOutputCaller vg::GAFOutputCaller

Public Member Functions

 FlowCaller (const PathPositionHandleGraph &graph, SupportBasedSnarlCaller &snarl_caller, SnarlManager &snarl_manager, const string &sample_name, TraversalFinder &traversal_finder, const vector< string > &ref_paths, const vector< size_t > &ref_path_offsets, const vector< int > &ref_path_ploidies, AlignmentEmitter *aln_emitter, bool traversals_only, bool gaf_output, size_t trav_padding, bool genotype_snarls, const pair< size_t, size_t > &allele_length_range)
 
virtual ~FlowCaller ()
 
virtual bool call_snarl (const Snarl &snarl)
 Call a given snarl, and print the output to out_stream. More...
 
virtual string vcf_header (const PathHandleGraph &graph, const vector< string > &contigs, const vector< size_t > &contig_length_overrides={}) const
 Write the vcf header (version and contigs and basic info) More...
 
- Public Member Functions inherited from vg::GraphCaller
 GraphCaller (SnarlCaller &snarl_caller, SnarlManager &snarl_manager)
 
virtual ~GraphCaller ()
 
virtual void call_top_level_snarls (const HandleGraph &graph, RecurseType recurse_type=RecurseOnFail)
 
virtual void call_top_level_chains (const HandleGraph &graph, size_t max_edges, size_t max_trivial, RecurseType recurise_type=RecurseOnFail)
 
void set_show_progress (bool show_progress)
 toggle progress messages More...
 
- Public Member Functions inherited from vg::VCFOutputCaller
 VCFOutputCaller (const string &sample_name)
 
virtual ~VCFOutputCaller ()
 
bool add_variant (vcflib::Variant &var) const
 
void write_variants (ostream &out_stream, const SnarlManager *snarl_manager=nullptr)
 
void vcf_fixup (vcflib::Variant &var) const
 Run vcffixup from vcflib. More...
 
void set_translation (const unordered_map< nid_t, pair< string, size_t >> *translation)
 Add a translation map. More...
 
void set_nested (bool nested)
 Assume writing nested snarls is enabled. More...
 
- Public Member Functions inherited from vg::GAFOutputCaller
 GAFOutputCaller (AlignmentEmitter *emitter, const string &sample_name, const vector< string > &ref_paths, size_t trav_padding)
 The emitter object is created and owned by external forces. More...
 
virtual ~GAFOutputCaller ()
 
void emit_gaf_traversals (const PathHandleGraph &graph, const string &snarl_name, const vector< SnarlTraversal > &travs, int64_t ref_trav_idx, const string &ref_path_name, int64_t ref_path_position, const TraversalSupportFinder *support_finder=nullptr)
 print the GAF traversals More...
 
void emit_gaf_variant (const PathHandleGraph &graph, const string &snarl_name, const vector< SnarlTraversal > &travs, const vector< int > &genotype, int64_t ref_trav_idx, const string &ref_path_name, int64_t ref_path_position, const TraversalSupportFinder *support_finder=nullptr)
 print the GAF genotype More...
 
SnarlTraversal pad_traversal (const PathHandleGraph &graph, const SnarlTraversal &trav) const
 pad a traversal with (first found) reference path, adding up to trav_padding to each side More...
 

Protected Attributes

const PathPositionHandleGraphgraph
 the graph More...
 
TraversalFindertraversal_finder
 the traversal finder More...
 
vector< string > ref_paths
 keep track of the reference paths More...
 
unordered_set< string > ref_path_set
 
map< string, size_t > ref_offsets
 keep track of offsets in the reference paths More...
 
map< string, int > ref_ploidies
 keep traco of the ploidies (todo: just one map for all path stuff!!) More...
 
size_t max_snarl_edges = 10000
 until we support nested snarls, cap snarl size we attempt to process More...
 
AlignmentEmitteralignment_emitter
 
bool traversals_only
 toggle whether to genotype or just output the traversals More...
 
bool gaf_output
 toggle whether to output vcf or gaf More...
 
bool genotype_snarls
 
pair< size_t, size_t > allele_length_range
 
- Protected Attributes inherited from vg::GraphCaller
SnarlCallersnarl_caller
 Our Genotyper. More...
 
SnarlManagersnarl_manager
 Our snarls. More...
 
bool show_progress
 Toggle progress messages. More...
 
- Protected Attributes inherited from vg::VCFOutputCaller
vcflib::VariantCallFile output_vcf
 output vcf More...
 
string sample_name
 Sample name. More...
 
vector< vector< pair< pair< string, size_t >, string > > > output_variants
 
size_t max_uncalled_alleles = 5
 print up to this many uncalled alleles when doing ref-genotpes in -a mode More...
 
const unordered_map< nid_t, pair< string, size_t > > * translation
 
bool include_nested
 
- Protected Attributes inherited from vg::GAFOutputCaller
AlignmentEmitteremitter
 
string gaf_sample_name
 Sample name. More...
 
size_t trav_padding = 0
 
unordered_set< string > ref_paths
 Reference paths are used to pad out traversals. If there are none, then first path found is used. More...
 

Additional Inherited Members

- Public Types inherited from vg::GraphCaller
enum  RecurseType { RecurseOnFail, RecurseAlways, RecurseNever }
 
- Protected Member Functions inherited from vg::GraphCaller
vector< Chainbreak_chain (const HandleGraph &graph, const Chain &chain, size_t max_edges, size_t max_trivial)
 Break up a chain into bits that we want to call using size heuristics. More...
 
- Protected Member Functions inherited from vg::VCFOutputCaller
void add_allele_path_to_info (const HandleGraph *graph, vcflib::Variant &v, int allele, const Traversal &trav, bool reversed, bool one_based) const
 add a traversal to the VCF info field in the format of a GFA W-line or GAF path More...
 
void add_allele_path_to_info (vcflib::Variant &v, int allele, const SnarlTraversal &trav, bool reversed, bool one_based) const
 legacy version of above More...
 
string trav_string (const HandleGraph &graph, const SnarlTraversal &trav) const
 convert a traversal into an allele string More...
 
bool emit_variant (const PathPositionHandleGraph &graph, SnarlCaller &snarl_caller, const Snarl &snarl, const vector< SnarlTraversal > &called_traversals, const vector< int > &genotype, int ref_trav_idx, const unique_ptr< SnarlCaller::CallInfo > &call_info, const string &ref_path_name, int ref_offset, bool genotype_snarls, int ploidy, function< string(const vector< SnarlTraversal > &, const vector< int > &, int, int, int)> trav_to_string=nullptr)
 
tuple< int64_t, int64_t, bool, step_handle_t, step_handle_tget_ref_interval (const PathPositionHandleGraph &graph, const Snarl &snarl, const string &ref_path_name) const
 
pair< string, int64_t > get_ref_position (const PathPositionHandleGraph &graph, const Snarl &snarl, const string &ref_path_name, int64_t ref_path_offset) const
 used for making gaf traversal names More...
 
void flatten_common_allele_ends (vcflib::Variant &variant, bool backward, size_t len_override) const
 
string print_snarl (const HandleGraph *grpah, const handle_t &snarl_start, const handle_t &snarl_end, bool in_brackets=false) const
 
string print_snarl (const Snarl &snarl, bool in_brackets=false) const
 legacy version of above More...
 
void scan_snarl (const string &allele_string, function< void(const string &, Snarl &)> callback) const
 
void update_nesting_info_tags (const SnarlManager *snarl_manager)
 
- Static Protected Attributes inherited from vg::VCFOutputCaller
static const int64_t max_vcf_line_length = 2000000000
 

Detailed Description

FlowCaller : Uses any traversals finder (ex, FlowTraversalFinder) to find traversals, and calls those based on how much support they have.
Should work on any graph but will not report cyclic traversals. Does not (yet, anyway) support nested calling, so the entire site is processes in one shot. Designed to replace LegacyCaller, as it should miss fewer obviously good traversals, and is not dependent on old protobuf-based structures.

Constructor & Destructor Documentation

◆ FlowCaller()

vg::FlowCaller::FlowCaller ( const PathPositionHandleGraph graph,
SupportBasedSnarlCaller snarl_caller,
SnarlManager snarl_manager,
const string &  sample_name,
TraversalFinder traversal_finder,
const vector< string > &  ref_paths,
const vector< size_t > &  ref_path_offsets,
const vector< int > &  ref_path_ploidies,
AlignmentEmitter aln_emitter,
bool  traversals_only,
bool  gaf_output,
size_t  trav_padding,
bool  genotype_snarls,
const pair< size_t, size_t > &  allele_length_range 
)

◆ ~FlowCaller()

vg::FlowCaller::~FlowCaller ( )
virtual

Member Function Documentation

◆ call_snarl()

bool vg::FlowCaller::call_snarl ( const Snarl snarl)
virtual

Call a given snarl, and print the output to out_stream.

Implements vg::GraphCaller.

◆ vcf_header()

string vg::FlowCaller::vcf_header ( const PathHandleGraph graph,
const vector< string > &  contigs,
const vector< size_t > &  contig_length_overrides = {} 
) const
virtual

Write the vcf header (version and contigs and basic info)

Reimplemented from vg::VCFOutputCaller.

Member Data Documentation

◆ alignment_emitter

AlignmentEmitter* vg::FlowCaller::alignment_emitter
protected

alignment emitter. if not null, traversals will be output here and no genotyping will be done

◆ allele_length_range

pair<size_t, size_t> vg::FlowCaller::allele_length_range
protected

clamp calling to alleles of a given length range more specifically, a snarl is only called if 1) its largest allele is >= allele_length_range.first and 2) all alleles are < allele_length_range.second

◆ gaf_output

bool vg::FlowCaller::gaf_output
protected

toggle whether to output vcf or gaf

◆ genotype_snarls

bool vg::FlowCaller::genotype_snarls
protected

toggle whether to genotype every snarl (by default, uncalled snarls are skipped, and coordinates are flattened out to minimize variant size – this turns all that off)

◆ graph

const PathPositionHandleGraph& vg::FlowCaller::graph
protected

the graph

◆ max_snarl_edges

size_t vg::FlowCaller::max_snarl_edges = 10000
protected

until we support nested snarls, cap snarl size we attempt to process

◆ ref_offsets

map<string, size_t> vg::FlowCaller::ref_offsets
protected

keep track of offsets in the reference paths

◆ ref_path_set

unordered_set<string> vg::FlowCaller::ref_path_set
protected

◆ ref_paths

vector<string> vg::FlowCaller::ref_paths
protected

keep track of the reference paths

◆ ref_ploidies

map<string, int> vg::FlowCaller::ref_ploidies
protected

keep traco of the ploidies (todo: just one map for all path stuff!!)

◆ traversal_finder

TraversalFinder& vg::FlowCaller::traversal_finder
protected

the traversal finder

◆ traversals_only

bool vg::FlowCaller::traversals_only
protected

toggle whether to genotype or just output the traversals


The documentation for this class was generated from the following files: