#include <traversal_finder.hpp>
|
| VCFTraversalFinder (const PathHandleGraph &graph, SnarlManager &snarl_manager, vcflib::VariantCallFile &vcf, const vector< string > &ref_path_names={}, FastaReference *fasta_ref=nullptr, FastaReference *ins_ref=nullptr, function< bool(const SnarlTraversal &, int)> skip_alt=nullptr, size_t max_traversal_cutoff=50000) |
|
virtual | ~VCFTraversalFinder () |
|
pair< vector< pair< SnarlTraversal, vector< int > > >, vector< vcflib::Variant * > > | find_allele_traversals (Snarl site) |
|
virtual vector< SnarlTraversal > | find_traversals (const Snarl &site) |
|
vector< vcflib::Variant * > | get_variants_in_site (const Snarl &site) |
|
virtual | ~TraversalFinder ()=default |
|
virtual vector< Traversal > | find_traversals (const handle_t &snarl_start, const handle_t &snarl_end) |
|
|
void | create_variant_index (vcflib::VariantCallFile &vcf, FastaReference *ref_fasta=nullptr, FastaReference *ins_fasta=nullptr) |
|
void | delete_variant_index () |
|
void | brute_force_alt_traversals (const Snarl &site, const vector< vcflib::Variant * > &site_variants, path_handle_t ref_path, step_handle_t start_step, step_handle_t end_step, vector< pair< SnarlTraversal, vector< int > > > &output_traversals) |
|
pair< SnarlTraversal, bool > | get_alt_traversal (const Snarl &site, const vector< vcflib::Variant * > &site_variants, path_handle_t ref_path, step_handle_t start_step, step_handle_t end_step, const vector< int > &haplotype) |
|
pair< unordered_set< handle_t >, unordered_set< pair< handle_t, handle_t > > > | get_haplotype_alt_contents (const vector< vcflib::Variant * > &site_variants, const vector< int > &haplotype, path_handle_t ref_path) |
|
pair< SnarlTraversal, vector< edge_t > > | get_alt_path (vcflib::Variant *site_variant, int allele, path_handle_t ref_path) |
|
pair< SnarlTraversal, vector< edge_t > > | scan_for_deletion (vcflib::Variant *var, int allele, path_handle_t ref_path, step_handle_t first_path_step, step_handle_t last_path_step) |
|
vector< vector< int > > | get_pruned_alt_alleles (const Snarl &site, const vector< vcflib::Variant * > &site_variants, path_handle_t ref_path) |
|
bool | check_max_trav_cutoff (const vector< vector< int > > &alleles) |
|
pair< step_handle_t, bool > | step_in_path (handle_t handle, path_handle_t path_handle) const |
|
This TraversalFinder returns a traversals and their corresponding genotypes from an input vcf. It relies on alt-paths in the graph (via construct -a) to map between the vcf and the graph.
◆ VCFTraversalFinder()
vg::VCFTraversalFinder::VCFTraversalFinder |
( |
const PathHandleGraph & |
graph, |
|
|
SnarlManager & |
snarl_manager, |
|
|
vcflib::VariantCallFile & |
vcf, |
|
|
const vector< string > & |
ref_path_names = {} , |
|
|
FastaReference * |
fasta_ref = nullptr , |
|
|
FastaReference * |
ins_ref = nullptr , |
|
|
function< bool(const SnarlTraversal &, int)> |
skip_alt = nullptr , |
|
|
size_t |
max_traversal_cutoff = 50000 |
|
) |
| |
Make a new VCFTraversalFinder. Builds the indexes needed to find all the variants in a site.
The skip_alt() method is defined, it is run on the alt-path of each variant allele in the snarl. If it returns true, that alt-path will never be included in any traversals returned in find_traversals().
This is used to, for example, use read support to prune the number of traversals that are enumerated.
◆ ~VCFTraversalFinder()
vg::VCFTraversalFinder::~VCFTraversalFinder |
( |
| ) |
|
|
virtual |
◆ brute_force_alt_traversals()
Get a traversal for every possible haplotype (but reference) in the most naive way possibe. This will blow up terribly for sites that contain more than a few variants. There's an obvious dynamic programming speedup, but the main issue is that the output size is exponential in the number of variants.
◆ check_max_trav_cutoff()
bool vg::VCFTraversalFinder::check_max_trav_cutoff |
( |
const vector< vector< int > > & |
alleles | ) |
|
|
protected |
Count the possible traversal paths. Return false if we ever get beyond our cutoff
◆ create_variant_index()
void vg::VCFTraversalFinder::create_variant_index |
( |
vcflib::VariantCallFile & |
vcf, |
|
|
FastaReference * |
ref_fasta = nullptr , |
|
|
FastaReference * |
ins_fasta = nullptr |
|
) |
| |
|
protected |
Load up all the variants into our node index
◆ delete_variant_index()
void vg::VCFTraversalFinder::delete_variant_index |
( |
| ) |
|
|
protected |
◆ find_allele_traversals()
pair< vector< pair< SnarlTraversal, vector< int > > >, vector< vcflib::Variant * > > vg::VCFTraversalFinder::find_allele_traversals |
( |
Snarl |
site | ) |
|
Find traversals for the site. Each traversa is returned in a pair with its haplotype. The haplotype refers to the list of variants (also returned)
◆ find_traversals()
Return a list of traversals for the site. The same traversals as above, only the haplotype information not included
Implements vg::TraversalFinder.
◆ get_alt_path()
Get one alt-path out of the graph in the form of a snarl traversal. if the path is a deletion, the edges corresponding to the deletion are also returned. note that it is indeed possible for one alt path (and therefore one vcf alleles) to correspond to several deletion edges in the graph due to normalization during construction.
◆ get_alt_traversal()
Get a traversal for a given haplotype. It gets all the nodes and edges from the alt paths, and greedily walks over them whenever possible (traversing the reference otherwise). if there is no traversal that can satisfy the haplotype, then the returned bool is set to false
◆ get_haplotype_alt_contents()
pair< unordered_set< handle_t >, unordered_set< pair< handle_t, handle_t > > > vg::VCFTraversalFinder::get_haplotype_alt_contents |
( |
const vector< vcflib::Variant * > & |
site_variants, |
|
|
const vector< int > & |
haplotype, |
|
|
path_handle_t |
ref_path |
|
) |
| |
|
protected |
Get a set of all alt path nodes and deletion edges for a halptype.
◆ get_pruned_alt_alleles()
vector< vector< int > > vg::VCFTraversalFinder::get_pruned_alt_alleles |
( |
const Snarl & |
site, |
|
|
const vector< vcflib::Variant * > & |
site_variants, |
|
|
path_handle_t |
ref_path |
|
) |
| |
|
protected |
Prune our search space using the skip_alt method. Will return a list of pruned VCF alleles/
ex, if the input has A --> T G --> C,A there input alleles are <0,1>, <0,1,2>. If there's no support for the G->C on the second one, the output would be <0,1>, <0,2>.
◆ get_variants_in_site()
vector< vcflib::Variant * > vg::VCFTraversalFinder::get_variants_in_site |
( |
const Snarl & |
site | ) |
|
Get all the variants that are contained in a site
◆ scan_for_deletion()
An alt path for a deletion is the deleted reference path. But sometimes vg construct doesn't write a deletion edge that exactly jumps over the alt path. In these cases, we need to search the graph for one. This does a brute-force check of all deletion edges in the vicinity for one that's the same size as the one we're looking for.
It tries to find a set of nearyby deletions that match the desired length. Todo: check the sequence as well Also todo: It'd be really nice if construct -fa would make the deletion-edge easily inferrable from the alt path. It really shouldn't be necessary to hunt around. Returns: <deletion traversal, list of deletion edges>
◆ step_in_path()
Lookup a node in the reference path (mimics old PathIndex)
◆ graph
◆ include_endpoints
bool vg::VCFTraversalFinder::include_endpoints = true |
|
protected |
Include snarl endpoints in traversals.
◆ max_deletion_scan_nodes
size_t vg::VCFTraversalFinder::max_deletion_scan_nodes = 50 |
|
protected |
How far to scan when looking for deletions.
◆ max_prune_iterations
size_t vg::VCFTraversalFinder::max_prune_iterations = 2 |
|
protected |
Maximum number of pruning iterations.
◆ max_traversal_cutoff
size_t vg::VCFTraversalFinder::max_traversal_cutoff |
|
protected |
If a snarl has more than this many traversals, return nothing and print a warning. Dense and large deletions will make this happen from time to time. In practice, skip_alt (above) can be used to prune down the search space by selecting alleles to ignore.
◆ node_to_variant
unordered_map<id_t, list<vcflib::Variant*> > vg::VCFTraversalFinder::node_to_variant |
|
protected |
Store variants indexed by an arbitrary node in one of their associated alt paths. We can then use this to find all variants in a top-level snarl
◆ path_finder
Use this to check if our snarl runs through a reference path (may be overkill, but can be used for sanity checking)
◆ skip_alt
function<bool(const SnarlTraversal& alt_path, int iteration)> vg::VCFTraversalFinder::skip_alt |
|
protected |
Use this method to prune the search space by selecting alt-alleles to skip by considering their paths (in SnarlTraversal) format It will try again and again until enough traversals are pruned, with iteration keeping track of how many tries (so it should become stricter as iteration increases)
◆ snarl_manager
The documentation for this class was generated from the following files: