vg
tools for working with variation graphs
|
#include "subcommand.hpp"
#include <vg/io/vpkg.hpp>
#include <algorithm>
#include <iostream>
#include <vector>
#include <getopt.h>
#include <omp.h>
#include "../gbwtgraph_helper.hpp"
#include "../gbwt_helper.hpp"
#include "../index_registry.hpp"
#include "../utility.hpp"
#include "../handle.hpp"
#include "../snarl_distance_index.hpp"
#include <gbwtgraph/index.h>
Functions | |
int | get_default_threads () |
size_t | estimate_hash_table_size (const gbwtgraph::GBZ &gbz, bool progress) |
void | help_minimizer (char **argv) |
int | main_minimizer (int argc, char **argv) |
size_t | trailing_zeros (size_t value) |
Variables | |
constexpr int | DEFAULT_MAX_THREADS = 16 |
constexpr size_t | DEFAULT_THRESHOLD = 500 |
constexpr size_t | DEFAULT_ITERATIONS = 3 |
constexpr size_t | MAX_ITERATIONS = gbwtgraph::MinimizerHeader::FLAG_WEIGHT_MASK >> gbwtgraph::MinimizerHeader::FLAG_WEIGHT_OFFSET |
constexpr size_t | HASH_TABLE_MIN_WIDTH = 10 |
constexpr size_t | HASH_TABLE_MAX_WIDTH = 36 |
Defines the "vg minimizer" subcommand, which builds the minimizer index.
The index contains the lexicographically smallest kmer in a window of w successive kmers and their reverse complements. If the kmer contains characters other than A, C, G, and T, it will not be indexed.
The index contains either all or haplotype-consistent minimizers. Indexing all minimizers from complex graph regions can take a long time (e.g. tens of hours vs 5-10 minutes for 1000GP), because many windows have the same minimizer. As the total number of minimizers is manageable (e.g. 1.5x more for 1000GP) it should be possible to develop a better algorithm for finding the minimizers.
A quick idea for indexing the entire graph:
size_t estimate_hash_table_size | ( | const gbwtgraph::GBZ & | gbz, |
bool | progress | ||
) |
int get_default_threads | ( | ) |
void help_minimizer | ( | char ** | argv | ) |
int main_minimizer | ( | int | argc, |
char ** | argv | ||
) |
size_t trailing_zeros | ( | size_t | value | ) |
|
constexpr |
|
constexpr |
|
constexpr |
|
constexpr |
|
constexpr |
|
constexpr |