vg
tools for working with variation graphs
Classes | Namespaces | Functions | Variables
haplotypes_main.cpp File Reference
#include "subcommand.hpp"
#include "../hash_map.hpp"
#include "../recombinator.hpp"
#include "../algorithms/extract_subchain.hpp"
#include <cmath>
#include <fstream>
#include <functional>
#include <iostream>
#include <thread>
#include <vector>
#include <unordered_map>
#include <getopt.h>
#include <omp.h>
#include <gbwtgraph/index.h>

Classes

struct  HaplotypesConfig
 
struct  ReferenceInterval
 

Namespaces

 haplotypes_defaults
 

Functions

size_t haplotypes_defaults::threads ()
 
constexpr size_t haplotypes_defaults::k ()
 
constexpr size_t haplotypes_defaults::w ()
 
constexpr size_t haplotypes_defaults::subchain_length ()
 
constexpr size_t haplotypes_defaults::n ()
 
constexpr size_t haplotypes_defaults::candidates ()
 
constexpr size_t haplotypes_defaults::coverage ()
 
constexpr double haplotypes_defaults::discount ()
 
constexpr double haplotypes_defaults::adjustment ()
 
constexpr double haplotypes_defaults::absent ()
 
constexpr double haplotypes_defaults::badness ()
 
void preprocess_graph (const gbwtgraph::GBZ &gbz, Haplotypes &haplotypes, HaplotypesConfig &config)
 
void set_reference_samples (gbwtgraph::GBZ &gbz, const HaplotypesConfig &config)
 
void sample_haplotypes (const gbwtgraph::GBZ &gbz, const Haplotypes &haplotypes, const HaplotypesConfig &config)
 
void subchain_statistics (const gbwtgraph::GBZ &gbz, const Haplotypes &haplotypes, const HaplotypesConfig &config)
 
int main_haplotypes (int argc, char **argv)
 
void help_haplotypes (char **argv, bool developer_options)
 
void validate_haplotypes (const Haplotypes &haplotypes, const gbwtgraph::GBWTGraph &graph, const gbwt::FastLocate &r_index, const HaplotypePartitioner::minimizer_index_type &minimizer_index, size_t expected_chains, HaplotypePartitioner::Verbosity verbosity)
 
bool ends_with (const std::string &str, const std::string &suffix)
 
std::string get_name (const std::string &graph_name, const std::string &extension)
 
size_t threads_to_jobs (size_t threads)
 
void validate_subgraph (const gbwtgraph::GBWTGraph &graph, const gbwtgraph::GBWTGraph &subgraph, HaplotypePartitioner::Verbosity verbosity)
 
gbwt::size_type seq_for_chain (const gbwtgraph::GBZ &gbz, const Haplotypes &haplotypes, gbwt::size_type path_id, size_t chain_id)
 
std::pair< std::vector< ReferenceInterval >, size_t > subchain_intervals (const gbwtgraph::GBZ &gbz, const Haplotypes &haplotypes, gbwt::size_type sequence_id, size_t chain_id)
 
gbwt::size_type path_for_sample_contig (const gbwtgraph::GBZ &gbz, const std::string &sample_name, const std::string &contig_name)
 
void validate_error (const std::string &header, const std::string &message)
 
template<typename T >
std::string expected_got (T expected, T got)
 
template<typename T >
std::string pair_to_string (std::pair< T, T > value)
 
void validate_error_chain (size_t chain_id, const std::string &message)
 
void validate_error_subchain (size_t chain_id, size_t subchain_id, const std::string &message)
 
void validate_error_sequence (size_t chain_id, size_t subchain_id, size_t sequence_id, const std::string &message)
 
std::string validate_unary_path (const HandleGraph &graph, handle_t from, handle_t to)
 
bool trace_path (const gbwt::GBWT &index, const gbwt::FragmentMap &fragment_map, const hash_set< nid_t > &subchain_nodes, gbwt::size_type sequence_id, gbwt::node_type start, gbwt::size_type offset, gbwt::node_type end)
 
std::vector< std::string > get_haplotype (const gbwtgraph::GBWTGraph &graph, const gbwt::FragmentMap &fragment_map, Haplotypes::sequence_type sequence, gbwt::node_type from, gbwt::node_type to, size_t k)
 
void validate_chain (const Haplotypes::TopLevelChain &chain, const gbwtgraph::GBWTGraph &graph, const gbwt::FragmentMap &fragment_map, const gbwt::FastLocate &r_index, const HaplotypePartitioner::minimizer_index_type &minimizer_index, size_t chain_id, HaplotypePartitioner::Verbosity verbosity)
 
std::string subchain_to_string (size_t chain_id, size_t subchain_id, const Haplotypes::Subchain &subchain)
 
void validate_nodes (const gbwtgraph::GBWTGraph &graph, const gbwtgraph::GBWTGraph &subgraph)
 
void validate_edges (const gbwtgraph::GBWTGraph &graph, const gbwtgraph::GBWTGraph &subgraph)
 

Variables

constexpr size_t haplotypes_defaults::DEFAULT_MAX_THREADS = 16
 

Detailed Description

Defines the "vg haplotypes" subcommand, which samples haplotypes by kmer counts in the reads.

TODO: Tests for –linear-structure, –extra-fragments, and fragmented haplotypes.

Function Documentation

◆ ends_with()

bool ends_with ( const std::string &  str,
const std::string &  suffix 
)

◆ expected_got()

template<typename T >
std::string expected_got ( expected,
got 
)

◆ get_haplotype()

std::vector<std::string> get_haplotype ( const gbwtgraph::GBWTGraph &  graph,
const gbwt::FragmentMap &  fragment_map,
Haplotypes::sequence_type  sequence,
gbwt::node_type  from,
gbwt::node_type  to,
size_t  k 
)

◆ get_name()

std::string get_name ( const std::string &  graph_name,
const std::string &  extension 
)

◆ help_haplotypes()

void help_haplotypes ( char **  argv,
bool  developer_options 
)

◆ main_haplotypes()

int main_haplotypes ( int  argc,
char **  argv 
)

◆ pair_to_string()

template<typename T >
std::string pair_to_string ( std::pair< T, T >  value)

◆ path_for_sample_contig()

gbwt::size_type path_for_sample_contig ( const gbwtgraph::GBZ &  gbz,
const std::string &  sample_name,
const std::string &  contig_name 
)

◆ preprocess_graph()

void preprocess_graph ( const gbwtgraph::GBZ &  gbz,
Haplotypes haplotypes,
HaplotypesConfig config 
)

◆ sample_haplotypes()

void sample_haplotypes ( const gbwtgraph::GBZ &  gbz,
const Haplotypes haplotypes,
const HaplotypesConfig config 
)

◆ seq_for_chain()

gbwt::size_type seq_for_chain ( const gbwtgraph::GBZ &  gbz,
const Haplotypes haplotypes,
gbwt::size_type  path_id,
size_t  chain_id 
)

◆ set_reference_samples()

void set_reference_samples ( gbwtgraph::GBZ &  gbz,
const HaplotypesConfig config 
)

◆ subchain_intervals()

std::pair<std::vector<ReferenceInterval>, size_t> subchain_intervals ( const gbwtgraph::GBZ &  gbz,
const Haplotypes haplotypes,
gbwt::size_type  sequence_id,
size_t  chain_id 
)

◆ subchain_statistics()

void subchain_statistics ( const gbwtgraph::GBZ &  gbz,
const Haplotypes haplotypes,
const HaplotypesConfig config 
)

◆ subchain_to_string()

std::string subchain_to_string ( size_t  chain_id,
size_t  subchain_id,
const Haplotypes::Subchain subchain 
)

◆ threads_to_jobs()

size_t threads_to_jobs ( size_t  threads)

◆ trace_path()

bool trace_path ( const gbwt::GBWT &  index,
const gbwt::FragmentMap &  fragment_map,
const hash_set< nid_t > &  subchain_nodes,
gbwt::size_type  sequence_id,
gbwt::node_type  start,
gbwt::size_type  offset,
gbwt::node_type  end 
)

◆ validate_chain()

void validate_chain ( const Haplotypes::TopLevelChain chain,
const gbwtgraph::GBWTGraph &  graph,
const gbwt::FragmentMap &  fragment_map,
const gbwt::FastLocate &  r_index,
const HaplotypePartitioner::minimizer_index_type minimizer_index,
size_t  chain_id,
HaplotypePartitioner::Verbosity  verbosity 
)

◆ validate_edges()

void validate_edges ( const gbwtgraph::GBWTGraph &  graph,
const gbwtgraph::GBWTGraph &  subgraph 
)

◆ validate_error()

void validate_error ( const std::string &  header,
const std::string &  message 
)

◆ validate_error_chain()

void validate_error_chain ( size_t  chain_id,
const std::string &  message 
)

◆ validate_error_sequence()

void validate_error_sequence ( size_t  chain_id,
size_t  subchain_id,
size_t  sequence_id,
const std::string &  message 
)

◆ validate_error_subchain()

void validate_error_subchain ( size_t  chain_id,
size_t  subchain_id,
const std::string &  message 
)

◆ validate_haplotypes()

void validate_haplotypes ( const Haplotypes haplotypes,
const gbwtgraph::GBWTGraph &  graph,
const gbwt::FastLocate &  r_index,
const HaplotypePartitioner::minimizer_index_type minimizer_index,
size_t  expected_chains,
HaplotypePartitioner::Verbosity  verbosity 
)

◆ validate_nodes()

void validate_nodes ( const gbwtgraph::GBWTGraph &  graph,
const gbwtgraph::GBWTGraph &  subgraph 
)

◆ validate_subgraph()

void validate_subgraph ( const gbwtgraph::GBWTGraph &  graph,
const gbwtgraph::GBWTGraph &  subgraph,
HaplotypePartitioner::Verbosity  verbosity 
)

◆ validate_unary_path()

std::string validate_unary_path ( const HandleGraph graph,
handle_t  from,
handle_t  to 
)