vg
tools for working with variation graphs
Functions | Variables
minimizer_main.cpp File Reference
#include "subcommand.hpp"
#include <vg/io/vpkg.hpp>
#include <algorithm>
#include <iostream>
#include <vector>
#include <getopt.h>
#include <omp.h>
#include "../gbwtgraph_helper.hpp"
#include "../gbwt_helper.hpp"
#include "../index_registry.hpp"
#include "../utility.hpp"
#include "../handle.hpp"
#include "../snarl_distance_index.hpp"
#include <gbwtgraph/index.h>

Functions

int get_default_threads ()
 
size_t estimate_hash_table_size (const gbwtgraph::GBZ &gbz, bool progress)
 
void help_minimizer (char **argv)
 
int main_minimizer (int argc, char **argv)
 
size_t trailing_zeros (size_t value)
 

Variables

constexpr int DEFAULT_MAX_THREADS = 16
 
constexpr size_t DEFAULT_THRESHOLD = 500
 
constexpr size_t DEFAULT_ITERATIONS = 3
 
constexpr size_t MAX_ITERATIONS = gbwtgraph::MinimizerHeader::FLAG_WEIGHT_MASK >> gbwtgraph::MinimizerHeader::FLAG_WEIGHT_OFFSET
 
constexpr size_t HASH_TABLE_MIN_WIDTH = 10
 
constexpr size_t HASH_TABLE_MAX_WIDTH = 36
 

Detailed Description

Defines the "vg minimizer" subcommand, which builds the minimizer index.

The index contains the lexicographically smallest kmer in a window of w successive kmers and their reverse complements. If the kmer contains characters other than A, C, G, and T, it will not be indexed.

The index contains either all or haplotype-consistent minimizers. Indexing all minimizers from complex graph regions can take a long time (e.g. tens of hours vs 5-10 minutes for 1000GP), because many windows have the same minimizer. As the total number of minimizers is manageable (e.g. 1.5x more for 1000GP) it should be possible to develop a better algorithm for finding the minimizers.

A quick idea for indexing the entire graph:

Function Documentation

◆ estimate_hash_table_size()

size_t estimate_hash_table_size ( const gbwtgraph::GBZ &  gbz,
bool  progress 
)

◆ get_default_threads()

int get_default_threads ( )

◆ help_minimizer()

void help_minimizer ( char **  argv)

◆ main_minimizer()

int main_minimizer ( int  argc,
char **  argv 
)

◆ trailing_zeros()

size_t trailing_zeros ( size_t  value)

Variable Documentation

◆ DEFAULT_ITERATIONS

constexpr size_t DEFAULT_ITERATIONS = 3
constexpr

◆ DEFAULT_MAX_THREADS

constexpr int DEFAULT_MAX_THREADS = 16
constexpr

◆ DEFAULT_THRESHOLD

constexpr size_t DEFAULT_THRESHOLD = 500
constexpr

◆ HASH_TABLE_MAX_WIDTH

constexpr size_t HASH_TABLE_MAX_WIDTH = 36
constexpr

◆ HASH_TABLE_MIN_WIDTH

constexpr size_t HASH_TABLE_MIN_WIDTH = 10
constexpr

◆ MAX_ITERATIONS

constexpr size_t MAX_ITERATIONS = gbwtgraph::MinimizerHeader::FLAG_WEIGHT_MASK >> gbwtgraph::MinimizerHeader::FLAG_WEIGHT_OFFSET
constexpr