vg
tools for working with variation graphs
|
#include <stream_index.hpp>
Public Types | |
using | cursor_t = vg::io::ProtobufIterator< Message > |
Public Types inherited from vg::StreamIndexBase | |
using | bin_t = make_unsigned< id_t >::type |
using | window_t = make_unsigned< id_t >::type |
Public Member Functions | |
StreamIndex ()=default | |
void | find (cursor_t &cursor, id_t node_id, const function< void(const Message &)> handle_result) const |
Call the given callback with all messages in the index that visit the given node. More... | |
void | find (cursor_t &cursor, id_t min_node, id_t max_node, const function< void(const Message &)> handle_result) const |
Call the given callback with all messages in the index that visit a node in the given inclusive range. More... | |
void | find (cursor_t &cursor, const vector< pair< id_t, id_t >> &ranges, const function< void(const Message &)> handle_result, bool only_fully_contained=false) const |
void | index (cursor_t &cursor) |
Given a cursor at the beginning of a sorted, readable file, index the file. More... | |
void | add_group (const vector< Message > &msgs, int64_t virtual_start, int64_t virtual_past_end) |
vector< pair< int64_t, int64_t > > | find (id_t node_id) const |
void | find (id_t node_id, const function< bool(int64_t, int64_t)> scan_callback) const |
void | find (id_t min_node, id_t max_node, const function< bool(int64_t, int64_t)> scan_callback) const |
void | add_group (id_t min_id, id_t max_id, int64_t virtual_start, int64_t virtual_past_end) |
Public Member Functions inherited from vg::StreamIndexBase | |
StreamIndexBase ()=default | |
void | load (istream &from) |
void | save (ostream &to) const |
Save an index to a file. More... | |
vector< pair< int64_t, int64_t > > | find (id_t node_id) const |
void | find (id_t node_id, const function< bool(int64_t, int64_t)> scan_callback) const |
void | find (id_t min_node, id_t max_node, const function< bool(int64_t, int64_t)> scan_callback) const |
void | scan_backward (const function< bool(int64_t, int64_t)> scan_callback) const |
void | add_group (id_t min_id, id_t max_id, int64_t virtual_start, int64_t virtual_past_end) |
bool | used_bins_of_range (id_t min_id, id_t max_id, const function< bool(bin_t)> &iteratee) const |
Protected Member Functions | |
void | for_each_id (const Message &msg, const function< bool(const id_t &)> iteratee) const |
Additional Inherited Members | |
Static Public Member Functions inherited from vg::StreamIndexBase | |
static BitString | bin_to_prefix (bin_t bin) |
Get the ID prefix bits corresponding to a bin. More... | |
static BitString | id_to_prefix (id_t id) |
Get the given ID as a bit string. More... | |
static bin_t | common_bin (id_t a, id_t b) |
Get the most specific bin that contains both of the given node IDs. More... | |
static window_t | window_of_id (id_t id) |
Static Public Attributes inherited from vg::StreamIndexBase | |
const static uint32_t | MAX_INPUT_VERSION = 1 |
What's the maximum index version number we can read with this code? More... | |
const static uint32_t | OUTPUT_VERSION = 1 |
What's the version we serialize? More... | |
const static string | MAGIC_BYTES = "GAI!" |
Static Protected Member Functions inherited from vg::StreamIndexBase | |
static bool | is_in_range (const vector< pair< id_t, id_t >> &ranges, id_t id) |
Protected Attributes inherited from vg::StreamIndexBase | |
unordered_map< bin_t, vector< pair< int64_t, int64_t > > > | bin_to_ranges |
BitStringTree< bin_t > | bins_by_id_prefix |
map< window_t, int64_t > | window_to_start |
id_t | last_group_min_id = numeric_limits<id_t>::min() |
Static Protected Attributes inherited from vg::StreamIndexBase | |
const static size_t | WINDOW_SHIFT = 8 |
An index that provides a higher-level API in terms of the actual messages being indexed. This is the main entry point for users in most cases.
All find operations are thread-safe with respect to each other. Simultaneous adds or finds and adds are prohibited.
using vg::StreamIndex< Message >::cursor_t = vg::io::ProtobufIterator<Message> |
|
default |
auto vg::StreamIndex< Message >::add_group | ( | const vector< Message > & | msgs, |
int64_t | virtual_start, | ||
int64_t | virtual_past_end | ||
) |
Add a group articulated as a vector of messages, between the given virtual offsets. Must be called in virtual offset order for successive groups.
auto vg::StreamIndexBase::add_group |
Add a group into the index, based on its minimum and maximum (inclusive) used node IDs. Must be called for all groups in virtual offset order.
auto vg::StreamIndex< Message >::find | ( | cursor_t & | cursor, |
const vector< pair< id_t, id_t >> & | ranges, | ||
const function< void(const Message &)> | handle_result, | ||
bool | only_fully_contained = false |
||
) | const |
Call the given callback with all the messages in the index that visit a node in any of the given sorted, coalesced inclusive ranges. Emits each message at most once. If only_fully_contained is set, only messages where all the involved nodes are in one of the ranges will match.
auto vg::StreamIndex< Message >::find | ( | cursor_t & | cursor, |
id_t | min_node, | ||
id_t | max_node, | ||
const function< void(const Message &)> | handle_result | ||
) | const |
Call the given callback with all messages in the index that visit a node in the given inclusive range.
auto vg::StreamIndex< Message >::find | ( | cursor_t & | cursor, |
id_t | node_id, | ||
const function< void(const Message &)> | handle_result | ||
) | const |
Call the given callback with all messages in the index that visit the given node.
auto vg::StreamIndexBase::find |
Find all the ranges of run virtual offsets to check for reads visiting the given inclusive node ID range. Relies on a scanning callback, which will be called repeatedly with the start and past-the-end virtual offsets of runs which may contain groups touching the given node ID. When called, the callback should scan the run and return either true if it wants the next run, or false if it encountered a group with an out-of-range start and wants to stop iteration. Runs will be emitted in order, and truncated on the left to either the appropriate lower bound from the linear index, or the past-the-end of the previous run scanned.
auto vg::StreamIndexBase::find |
Find all the ranges of run virtual offsets from the first position that might be relevant for the given node ID to the ends of all the bins it is in. Trims ranges by the linear index on the low end, and returns a series of potentially abutting but non-overlapping virtual offset ranges. Does not stop early (because it has no access to the actual reads to tell when it should stop looking at runs in a bin). So you will get ranges covering all runs in a bin that follow the runs you are interested in as well.
auto vg::StreamIndexBase::find |
Find all the ranges of run virtual offsets to check for reads visiting the given node ID. Relies on a scanning callback, which will be called repeatedly with the start and past-the-end virtual offsets of runs which may contain groups touching the given node ID. When called, the callback should scan the run and return either true if it wants the next run, or false if it encountered a group with an out-of-range start and wants to stop iteration. Runs will be emitted in order, and truncated on the left to either the appropriate lower bound from the linear index, or the past-the-end of the previous run scanned (which should be moot, because runs should not overlap in the index).
|
protected |
Call the given iteratee for each node ID relevant to the given message. IDs may repeat. If the iteratee returns false, stop iteration. Calls the iteratee with 0 only if there are no relevant node IDs or the message is relevant to queries for unplaced content.
auto vg::StreamIndex< Message >::index | ( | cursor_t & | cursor | ) |
Given a cursor at the beginning of a sorted, readable file, index the file.