vg
tools for working with variation graphs
Public Member Functions | Protected Member Functions | Protected Attributes | Static Protected Attributes | List of all members
vg::VCFOutputCaller Class Reference

#include <graph_caller.hpp>

Inheritance diagram for vg::VCFOutputCaller:
vg::Deconstructor vg::FlowCaller vg::LegacyCaller vg::MCMCCaller vg::NestedFlowCaller vg::VCFGenotyper

Public Member Functions

 VCFOutputCaller (const string &sample_name)
 
virtual ~VCFOutputCaller ()
 
virtual string vcf_header (const PathHandleGraph &graph, const vector< string > &contigs, const vector< size_t > &contig_length_overrides) const
 Write the vcf header (version and contigs and basic info) More...
 
bool add_variant (vcflib::Variant &var) const
 
void write_variants (ostream &out_stream, const SnarlManager *snarl_manager=nullptr)
 
void vcf_fixup (vcflib::Variant &var) const
 Run vcffixup from vcflib. More...
 
void set_translation (const unordered_map< nid_t, pair< string, size_t >> *translation)
 Add a translation map. More...
 
void set_nested (bool nested)
 Assume writing nested snarls is enabled. More...
 

Protected Member Functions

void add_allele_path_to_info (const HandleGraph *graph, vcflib::Variant &v, int allele, const Traversal &trav, bool reversed, bool one_based) const
 add a traversal to the VCF info field in the format of a GFA W-line or GAF path More...
 
void add_allele_path_to_info (vcflib::Variant &v, int allele, const SnarlTraversal &trav, bool reversed, bool one_based) const
 legacy version of above More...
 
string trav_string (const HandleGraph &graph, const SnarlTraversal &trav) const
 convert a traversal into an allele string More...
 
bool emit_variant (const PathPositionHandleGraph &graph, SnarlCaller &snarl_caller, const Snarl &snarl, const vector< SnarlTraversal > &called_traversals, const vector< int > &genotype, int ref_trav_idx, const unique_ptr< SnarlCaller::CallInfo > &call_info, const string &ref_path_name, int ref_offset, bool genotype_snarls, int ploidy, function< string(const vector< SnarlTraversal > &, const vector< int > &, int, int, int)> trav_to_string=nullptr)
 
tuple< int64_t, int64_t, bool, step_handle_t, step_handle_tget_ref_interval (const PathPositionHandleGraph &graph, const Snarl &snarl, const string &ref_path_name) const
 
pair< string, int64_t > get_ref_position (const PathPositionHandleGraph &graph, const Snarl &snarl, const string &ref_path_name, int64_t ref_path_offset) const
 used for making gaf traversal names More...
 
void flatten_common_allele_ends (vcflib::Variant &variant, bool backward, size_t len_override) const
 
string print_snarl (const HandleGraph *grpah, const handle_t &snarl_start, const handle_t &snarl_end, bool in_brackets=false) const
 
string print_snarl (const Snarl &snarl, bool in_brackets=false) const
 legacy version of above More...
 
void scan_snarl (const string &allele_string, function< void(const string &, Snarl &)> callback) const
 
void update_nesting_info_tags (const SnarlManager *snarl_manager)
 

Protected Attributes

vcflib::VariantCallFile output_vcf
 output vcf More...
 
string sample_name
 Sample name. More...
 
vector< vector< pair< pair< string, size_t >, string > > > output_variants
 
size_t max_uncalled_alleles = 5
 print up to this many uncalled alleles when doing ref-genotpes in -a mode More...
 
const unordered_map< nid_t, pair< string, size_t > > * translation
 
bool include_nested
 

Static Protected Attributes

static const int64_t max_vcf_line_length = 2000000000
 

Detailed Description

Helper class that vcf writers can inherit from to for some common code to output sorted VCF

Constructor & Destructor Documentation

◆ VCFOutputCaller()

vg::VCFOutputCaller::VCFOutputCaller ( const string &  sample_name)

◆ ~VCFOutputCaller()

vg::VCFOutputCaller::~VCFOutputCaller ( )
virtual

Member Function Documentation

◆ add_allele_path_to_info() [1/2]

void vg::VCFOutputCaller::add_allele_path_to_info ( const HandleGraph graph,
vcflib::Variant &  v,
int  allele,
const Traversal trav,
bool  reversed,
bool  one_based 
) const
protected

add a traversal to the VCF info field in the format of a GFA W-line or GAF path

◆ add_allele_path_to_info() [2/2]

void vg::VCFOutputCaller::add_allele_path_to_info ( vcflib::Variant &  v,
int  allele,
const SnarlTraversal trav,
bool  reversed,
bool  one_based 
) const
protected

legacy version of above

◆ add_variant()

bool vg::VCFOutputCaller::add_variant ( vcflib::Variant &  var) const

Add a variant to our buffer Returns false if the variant line length exceeds VCFOutputCaller::max_vcf_line_length

◆ emit_variant()

bool vg::VCFOutputCaller::emit_variant ( const PathPositionHandleGraph graph,
SnarlCaller snarl_caller,
const Snarl snarl,
const vector< SnarlTraversal > &  called_traversals,
const vector< int > &  genotype,
int  ref_trav_idx,
const unique_ptr< SnarlCaller::CallInfo > &  call_info,
const string &  ref_path_name,
int  ref_offset,
bool  genotype_snarls,
int  ploidy,
function< string(const vector< SnarlTraversal > &, const vector< int > &, int, int, int)>  trav_to_string = nullptr 
)
protected

print a vcf variant return value is taken from add_variant (see above)

◆ flatten_common_allele_ends()

void vg::VCFOutputCaller::flatten_common_allele_ends ( vcflib::Variant &  variant,
bool  backward,
size_t  len_override 
) const
protected

clean up the alleles to not share common prefixes / suffixes if len_override given, just do that many bases without thinking

◆ get_ref_interval()

tuple< int64_t, int64_t, bool, step_handle_t, step_handle_t > vg::VCFOutputCaller::get_ref_interval ( const PathPositionHandleGraph graph,
const Snarl snarl,
const string &  ref_path_name 
) const
protected

get the interval of a snarl from our reference path using the PathPositionHandleGraph interface the bool is true if the snarl's backward on the path first returned value -1 if no traversal found

◆ get_ref_position()

pair< string, int64_t > vg::VCFOutputCaller::get_ref_position ( const PathPositionHandleGraph graph,
const Snarl snarl,
const string &  ref_path_name,
int64_t  ref_path_offset 
) const
protected

used for making gaf traversal names

◆ print_snarl() [1/2]

string vg::VCFOutputCaller::print_snarl ( const HandleGraph grpah,
const handle_t snarl_start,
const handle_t snarl_end,
bool  in_brackets = false 
) const
protected

print a snarl in a consistent form like >3435<12222 if in_brackets set to true, do (>3435<12222) instead (this is only used for nested caller)

◆ print_snarl() [2/2]

string vg::VCFOutputCaller::print_snarl ( const Snarl snarl,
bool  in_brackets = false 
) const
protected

legacy version of above

◆ scan_snarl()

void vg::VCFOutputCaller::scan_snarl ( const string &  allele_string,
function< void(const string &, Snarl &)>  callback 
) const
protected

do the opposite of above So a string that looks like AACT(>12<17)TTT would invoke the callback three times with ("AACT", Snarl), ("", Snarl(12,-17)), ("TTT", Snarl(12,-17)) The parameters are to be treated as unions: A sequence fragment if non-empty, otherwise a snarl

◆ set_nested()

void vg::VCFOutputCaller::set_nested ( bool  nested)

Assume writing nested snarls is enabled.

◆ set_translation()

void vg::VCFOutputCaller::set_translation ( const unordered_map< nid_t, pair< string, size_t >> *  translation)

Add a translation map.

◆ trav_string()

string vg::VCFOutputCaller::trav_string ( const HandleGraph graph,
const SnarlTraversal trav 
) const
protected

convert a traversal into an allele string

◆ update_nesting_info_tags()

void vg::VCFOutputCaller::update_nesting_info_tags ( const SnarlManager snarl_manager)
protected

◆ vcf_fixup()

void vg::VCFOutputCaller::vcf_fixup ( vcflib::Variant &  var) const

Run vcffixup from vcflib.

◆ vcf_header()

string vg::VCFOutputCaller::vcf_header ( const PathHandleGraph graph,
const vector< string > &  contigs,
const vector< size_t > &  contig_length_overrides 
) const
virtual

Write the vcf header (version and contigs and basic info)

Reimplemented in vg::NestedFlowCaller, vg::FlowCaller, vg::LegacyCaller, and vg::VCFGenotyper.

◆ write_variants()

void vg::VCFOutputCaller::write_variants ( ostream &  out_stream,
const SnarlManager snarl_manager = nullptr 
)

Sort then write variants in the buffer snarl_manager needed if include_nested is true

Member Data Documentation

◆ include_nested

bool vg::VCFOutputCaller::include_nested
protected

◆ max_uncalled_alleles

size_t vg::VCFOutputCaller::max_uncalled_alleles = 5
protected

print up to this many uncalled alleles when doing ref-genotpes in -a mode

◆ max_vcf_line_length

const int64_t vg::VCFOutputCaller::max_vcf_line_length = 2000000000
staticprotected

◆ output_variants

vector<vector<pair<pair<string, size_t>, string> > > vg::VCFOutputCaller::output_variants
mutableprotected

output buffers (1/thread) (for sorting) variants stored as strings (and position key pairs) because vcflib::Variant in-memory struct so huge

◆ output_vcf

vcflib::VariantCallFile vg::VCFOutputCaller::output_vcf
mutableprotected

output vcf

◆ sample_name

string vg::VCFOutputCaller::sample_name
protected

Sample name.

◆ translation

const unordered_map<nid_t, pair<string, size_t> >* vg::VCFOutputCaller::translation
protected

The documentation for this class was generated from the following files: