vg
tools for working with variation graphs
Classes | Functions | Variables
haplotypes_main.cpp File Reference
#include "subcommand.hpp"
#include "../hash_map.hpp"
#include "../recombinator.hpp"
#include <cmath>
#include <fstream>
#include <functional>
#include <iostream>
#include <thread>
#include <vector>
#include <unordered_map>
#include <getopt.h>
#include <omp.h>
#include <gbwtgraph/index.h>

Classes

struct  HaplotypesConfig
 
struct  ReferenceInterval
 

Functions

size_t haplotypes_default_threads ()
 
constexpr size_t haplotypes_default_k ()
 
constexpr size_t haplotypes_default_w ()
 
constexpr size_t haplotypes_default_subchain_length ()
 
constexpr size_t haplotypes_default_n ()
 
constexpr size_t haplotypes_default_candidates ()
 
constexpr size_t haplotypes_default_coverage ()
 
constexpr double haplotypes_default_discount ()
 
constexpr double haplotypes_default_adjustment ()
 
constexpr double haplotypes_default_absent ()
 
void preprocess_graph (const gbwtgraph::GBZ &gbz, Haplotypes &haplotypes, HaplotypesConfig &config)
 
void sample_haplotypes (const gbwtgraph::GBZ &gbz, const Haplotypes &haplotypes, const HaplotypesConfig &config)
 
void map_variants (const gbwtgraph::GBZ &gbz, const Haplotypes &haplotypes, const HaplotypesConfig &config)
 
void extract_haplotypes (const gbwtgraph::GBZ &gbz, const Haplotypes &haplotypes, const HaplotypesConfig &config)
 
void classify_kmers (const gbwtgraph::GBZ &gbz, const Haplotypes &haplotypes, const HaplotypesConfig &config)
 
int main_haplotypes (int argc, char **argv)
 
void help_haplotypes (char **argv, bool developer_options)
 
void validate_haplotypes (const Haplotypes &haplotypes, const gbwtgraph::GBWTGraph &graph, const gbwt::FastLocate &r_index, const HaplotypePartitioner::minimizer_index_type &minimizer_index, size_t expected_chains, HaplotypePartitioner::Verbosity verbosity)
 
bool ends_with (const std::string &str, const std::string &suffix)
 
std::string get_name (const std::string &graph_name, const std::string &extension)
 
size_t threads_to_jobs (size_t threads)
 
void validate_subgraph (const gbwtgraph::GBWTGraph &graph, const gbwtgraph::GBWTGraph &subgraph, HaplotypePartitioner::Verbosity verbosity)
 
gbwt::size_type path_for_contig (const gbwtgraph::GBZ &gbz, gbwt::size_type contig_id, const std::string &contig_name)
 
std::pair< gbwt::size_type, size_t > seq_chain_for_path (const gbwtgraph::GBZ &gbz, const Haplotypes &haplotypes, gbwt::size_type path_id, const std::string &contig_name)
 
std::vector< ReferenceIntervalsubchain_intervals (const gbwtgraph::GBZ &gbz, const Haplotypes &haplotypes, gbwt::size_type sequence_id, size_t chain_id, bool reverse)
 
void validate_error (const std::string &header, const std::string &message)
 
template<typename T >
std::string expected_got (T expected, T got)
 
template<typename T >
std::string pair_to_string (std::pair< T, T > value)
 
void validate_error_chain (size_t chain_id, const std::string &message)
 
void validate_error_subchain (size_t chain_id, size_t subchain_id, const std::string &message)
 
void validate_error_sequence (size_t chain_id, size_t subchain_id, size_t sequence_id, const std::string &message)
 
std::string validate_unary_path (const HandleGraph &graph, handle_t from, handle_t to)
 
bool trace_path (const gbwt::GBWT &index, gbwt::node_type start, gbwt::size_type offset, gbwt::node_type end)
 
std::string get_haplotype (const gbwtgraph::GBWTGraph &graph, Haplotypes::sequence_type sequence, gbwt::node_type from, gbwt::node_type to, size_t k)
 
void validate_chain (const Haplotypes::TopLevelChain &chain, const gbwtgraph::GBWTGraph &graph, const gbwt::FastLocate &r_index, const HaplotypePartitioner::minimizer_index_type &minimizer_index, size_t chain_id, HaplotypePartitioner::Verbosity verbosity)
 
std::string subchain_to_string (size_t chain_id, size_t subchain_id, const Haplotypes::Subchain &subchain)
 
void validate_nodes (const gbwtgraph::GBWTGraph &graph, const gbwtgraph::GBWTGraph &subgraph)
 
void validate_edges (const gbwtgraph::GBWTGraph &graph, const gbwtgraph::GBWTGraph &subgraph)
 

Variables

constexpr size_t DEFAULT_MAX_THREADS = 16
 

Detailed Description

Defines the "vg haplotypes" subcommand, which samples haplotypes by kmer counts in the reads.

Function Documentation

◆ classify_kmers()

void classify_kmers ( const gbwtgraph::GBZ &  gbz,
const Haplotypes haplotypes,
const HaplotypesConfig config 
)

◆ ends_with()

bool ends_with ( const std::string &  str,
const std::string &  suffix 
)

◆ expected_got()

template<typename T >
std::string expected_got ( expected,
got 
)

◆ extract_haplotypes()

void extract_haplotypes ( const gbwtgraph::GBZ &  gbz,
const Haplotypes haplotypes,
const HaplotypesConfig config 
)

◆ get_haplotype()

std::string get_haplotype ( const gbwtgraph::GBWTGraph &  graph,
Haplotypes::sequence_type  sequence,
gbwt::node_type  from,
gbwt::node_type  to,
size_t  k 
)

◆ get_name()

std::string get_name ( const std::string &  graph_name,
const std::string &  extension 
)

◆ haplotypes_default_absent()

constexpr double haplotypes_default_absent ( )
constexpr

◆ haplotypes_default_adjustment()

constexpr double haplotypes_default_adjustment ( )
constexpr

◆ haplotypes_default_candidates()

constexpr size_t haplotypes_default_candidates ( )
constexpr

◆ haplotypes_default_coverage()

constexpr size_t haplotypes_default_coverage ( )
constexpr

◆ haplotypes_default_discount()

constexpr double haplotypes_default_discount ( )
constexpr

◆ haplotypes_default_k()

constexpr size_t haplotypes_default_k ( )
constexpr

◆ haplotypes_default_n()

constexpr size_t haplotypes_default_n ( )
constexpr

◆ haplotypes_default_subchain_length()

constexpr size_t haplotypes_default_subchain_length ( )
constexpr

◆ haplotypes_default_threads()

size_t haplotypes_default_threads ( )

◆ haplotypes_default_w()

constexpr size_t haplotypes_default_w ( )
constexpr

◆ help_haplotypes()

void help_haplotypes ( char **  argv,
bool  developer_options 
)

◆ main_haplotypes()

int main_haplotypes ( int  argc,
char **  argv 
)

◆ map_variants()

void map_variants ( const gbwtgraph::GBZ &  gbz,
const Haplotypes haplotypes,
const HaplotypesConfig config 
)

◆ pair_to_string()

template<typename T >
std::string pair_to_string ( std::pair< T, T >  value)

◆ path_for_contig()

gbwt::size_type path_for_contig ( const gbwtgraph::GBZ &  gbz,
gbwt::size_type  contig_id,
const std::string &  contig_name 
)

◆ preprocess_graph()

void preprocess_graph ( const gbwtgraph::GBZ &  gbz,
Haplotypes haplotypes,
HaplotypesConfig config 
)

◆ sample_haplotypes()

void sample_haplotypes ( const gbwtgraph::GBZ &  gbz,
const Haplotypes haplotypes,
const HaplotypesConfig config 
)

◆ seq_chain_for_path()

std::pair<gbwt::size_type, size_t> seq_chain_for_path ( const gbwtgraph::GBZ &  gbz,
const Haplotypes haplotypes,
gbwt::size_type  path_id,
const std::string &  contig_name 
)

◆ subchain_intervals()

std::vector<ReferenceInterval> subchain_intervals ( const gbwtgraph::GBZ &  gbz,
const Haplotypes haplotypes,
gbwt::size_type  sequence_id,
size_t  chain_id,
bool  reverse 
)

◆ subchain_to_string()

std::string subchain_to_string ( size_t  chain_id,
size_t  subchain_id,
const Haplotypes::Subchain subchain 
)

◆ threads_to_jobs()

size_t threads_to_jobs ( size_t  threads)

◆ trace_path()

bool trace_path ( const gbwt::GBWT &  index,
gbwt::node_type  start,
gbwt::size_type  offset,
gbwt::node_type  end 
)

◆ validate_chain()

void validate_chain ( const Haplotypes::TopLevelChain chain,
const gbwtgraph::GBWTGraph &  graph,
const gbwt::FastLocate &  r_index,
const HaplotypePartitioner::minimizer_index_type minimizer_index,
size_t  chain_id,
HaplotypePartitioner::Verbosity  verbosity 
)

◆ validate_edges()

void validate_edges ( const gbwtgraph::GBWTGraph &  graph,
const gbwtgraph::GBWTGraph &  subgraph 
)

◆ validate_error()

void validate_error ( const std::string &  header,
const std::string &  message 
)

◆ validate_error_chain()

void validate_error_chain ( size_t  chain_id,
const std::string &  message 
)

◆ validate_error_sequence()

void validate_error_sequence ( size_t  chain_id,
size_t  subchain_id,
size_t  sequence_id,
const std::string &  message 
)

◆ validate_error_subchain()

void validate_error_subchain ( size_t  chain_id,
size_t  subchain_id,
const std::string &  message 
)

◆ validate_haplotypes()

void validate_haplotypes ( const Haplotypes haplotypes,
const gbwtgraph::GBWTGraph &  graph,
const gbwt::FastLocate &  r_index,
const HaplotypePartitioner::minimizer_index_type minimizer_index,
size_t  expected_chains,
HaplotypePartitioner::Verbosity  verbosity 
)

◆ validate_nodes()

void validate_nodes ( const gbwtgraph::GBWTGraph &  graph,
const gbwtgraph::GBWTGraph &  subgraph 
)

◆ validate_subgraph()

void validate_subgraph ( const gbwtgraph::GBWTGraph &  graph,
const gbwtgraph::GBWTGraph &  subgraph,
HaplotypePartitioner::Verbosity  verbosity 
)

◆ validate_unary_path()

std::string validate_unary_path ( const HandleGraph graph,
handle_t  from,
handle_t  to 
)

Variable Documentation

◆ DEFAULT_MAX_THREADS

constexpr size_t DEFAULT_MAX_THREADS = 16
constexpr