vg
tools for working with variation graphs
|
#include <zip_code.hpp>
Classes | |
struct | chain_code_t |
struct | decoder_t |
struct | node_code_t |
struct | snarl_code_t |
Public Types | |
enum | code_type_t { NODE = 1, CHAIN, REGULAR_SNARL, IRREGULAR_SNARL, CYCLIC_SNARL, ROOT_SNARL, ROOT_CHAIN, ROOT_NODE, EMPTY } |
typedef std::uint64_t | code_type |
Public Member Functions | |
void | fill_in_zipcode (const SnarlDistanceIndex &distance_index, const vg::pos_t &pos, bool fill_in_decoder=true) |
void | fill_in_zipcode_from_payload (const gbwtgraph::Payload &payload) |
gbwtgraph::Payload | get_payload_from_zip () const |
size_t | byte_count () const |
How many bytes were used to store this zipcode? More... | |
bool | operator== (const ZipCode &other) const |
Equality operator. More... | |
std::vector< size_t > | to_vector () const |
Dump to a normal vector. More... | |
void | from_vector (const std::vector< size_t > &values) |
Load from a normal vector. More... | |
node_code_t | unpack_node_code (size_t zipcode_level) const |
chain_code_t | unpack_chain_code (size_t zipcode_level) const |
snarl_code_t | unpack_snarl_code (size_t zipcode_level) const |
void | fill_in_full_decoder () |
Go through the entire zipcode and fill in the decoder. More... | |
bool | fill_in_next_decoder () |
size_t | max_depth () const |
What is the maximum depth of this zipcode? More... | |
size_t | decoder_length () const |
How many codes in the zipcode have been decoded? More... | |
ZipCode::code_type_t | get_code_type (const size_t &depth) const |
What type of snarl tree node is at the given depth (index into the zipcode) More... | |
size_t | get_length (const size_t &depth, bool get_chain_component_length=false) const |
size_t | get_rank_in_snarl (const size_t &depth) const |
Get the rank of a node/snarl in a snarl. Throw an exception if it isn't the child of a snarl. More... | |
size_t | get_snarl_child_count (const size_t &depth, const SnarlDistanceIndex *distance_index=nullptr) const |
Get the number of children in a snarl. Throw an exception if it isn't a snarl. More... | |
size_t | get_offset_in_chain (const size_t &depth, const SnarlDistanceIndex *distance_index=nullptr) const |
size_t | get_chain_component (const size_t &depth) const |
size_t | get_last_chain_component (const size_t &depth, bool get_end=false) const |
bool | get_is_looping_chain (const size_t &depth) const |
bool | get_is_reversed_in_parent (const size_t &depth) const |
Is the snarl tree node backwards relative to its parent. More... | |
net_handle_t | get_net_handle (const size_t &depth, const SnarlDistanceIndex *distance_index) const |
net_handle_t | get_net_handle_slow (nid_t id, const size_t &depth, const SnarlDistanceIndex *distance_index, const net_handle_t *child_handle=nullptr) const |
size_t | get_distance_index_address (const size_t &depth) const |
size_t | get_distance_to_snarl_bound (const size_t &depth, bool snarl_start, bool left_side) const |
The minimum distance from start or end of the snarl to the left or right side of the child. More... | |
bool | is_externally_start_end_connected (const size_t &depth) const |
bool | is_externally_start_start_connected (const size_t &depth) const |
bool | is_externally_end_end_connected (const size_t &depth) const |
void | dump (std::ostream &out) const |
MIPayload | get_payload_from_zipcode (nid_t id, const SnarlDistanceIndex &distance_index, hash_map< size_t, net_handle_t > *component_to_net_handle=nullptr) const |
net_identifier_t | get_identifier (size_t depth) const |
Static Public Member Functions | |
static size_t | minimum_distance_between (ZipCode &zip1, const pos_t &pos1, ZipCode &zip2, const pos_t &pos2, const SnarlDistanceIndex &distance_index, size_t distance_limit=std::numeric_limits< size_t >::max(), bool undirected_distance=false, const HandleGraph *graph=nullptr) |
static bool | is_farther_than (const ZipCode &zip1, const ZipCode &zip2, const size_t &limit) |
static tuple< size_t, size_t, size_t > | get_top_level_chain_offset () |
const static bool | is_equal (const ZipCode &zip1, const ZipCode &zip2, const size_t &depth) |
const static net_identifier_t | get_parent_identifier (const net_identifier_t &child) |
Public Attributes | |
varint_vector_t | zipcode |
std::vector< decoder_t > | decoder |
bool | finished_decoding = false |
Private Member Functions | |
node_code_t | get_node_code (const net_handle_t &node, const SnarlDistanceIndex &distance_index) |
chain_code_t | get_chain_code (const net_handle_t &chain, const SnarlDistanceIndex &distance_index) |
snarl_code_t | get_regular_snarl_code (const net_handle_t &snarl, const net_handle_t &snarl_child, const SnarlDistanceIndex &distance_index) |
snarl_code_t | get_irregular_snarl_code (const net_handle_t &snarl, const net_handle_t &snarl_child, const SnarlDistanceIndex &distance_index) |
Static Private Attributes | |
const static size_t | ROOT_SNARL_SIZE = 2 |
const static size_t | ROOT_IS_CHAIN_OFFSET = 0 |
const static size_t | ROOT_IDENTIFIER_OFFSET = 1 |
const static size_t | ROOT_CHAIN_SIZE = 4 |
const static size_t | ROOT_CHAIN_COMPONENT_COUNT_OFFSET = 2 |
const static size_t | ROOT_NODE_OR_CHAIN_CONNECTIVITY_OR_LENGTH_OFFSET = 3 |
const static size_t | ROOT_NODE_SIZE = 4 |
const static size_t | ROOT_NODE_LENGTH_OFFSET = 2 |
const static size_t | CHAIN_SIZE = 3 |
Offsets for chain codes. More... | |
const static size_t | CHAIN_RANK_IN_SNARL_OFFSET = 0 |
const static size_t | CHAIN_LENGTH_OFFSET = 1 |
const static size_t | CHAIN_COMPONENT_COUNT_OFFSET = 2 |
const static size_t | REGULAR_SNARL_SIZE = 6 |
Offsets for snarl codes. More... | |
const static size_t | IRREGULAR_SNARL_SIZE = 10 |
const static size_t | SNARL_IS_REGULAR_OFFSET = 0 |
const static size_t | SNARL_OFFSET_IN_CHAIN_OFFSET = 1 |
const static size_t | SNARL_LENGTH_OFFSET = 2 |
const static size_t | SNARL_CHILD_COUNT_OFFSET = 3 |
const static size_t | SNARL_CHAIN_COMPONENT_OFFSET = 4 |
const static size_t | REGULAR_SNARL_IS_REVERSED_OFFSET = 5 |
const static size_t | IRREGULAR_SNARL_RECORD_OFFSET = 5 |
const static size_t | IRREGULAR_SNARL_DISTANCE_LEFT_START_OFFSET = 6 |
const static size_t | IRREGULAR_SNARL_DISTANCE_LEFT_END_OFFSET = 7 |
const static size_t | IRREGULAR_SNARL_DISTANCE_RIGHT_START_OFFSET = 8 |
const static size_t | IRREGULAR_SNARL_DISTANCE_RIGHT_END_OFFSET = 9 |
const static size_t | NODE_SIZE = 4 |
Offsets for nodes. More... | |
const static size_t | NODE_OFFSET_OFFSET = 0 |
const static size_t | NODE_LENGTH_OFFSET = 1 |
const static size_t | NODE_IS_REVERSED_OFFSET = 2 |
const static size_t | NODE_CHAIN_COMPONENT_OFFSET = 3 |
typedef std::uint64_t vg::ZipCode::code_type |
The type of codes that can be stored in the zipcode Trivial chains that are children of snarls get saved as a chain with no child node EMPTY doesn't actually mean anything, it's used to catch errors Snarls can be regular, irregular, or cyclic. Regular snarls are bubbles. Irregular snarls are snarls that aren't bubbles but are dags Cyclic snarls are non-dags. They are stored the same as irregular snarls. Only the type is different
Enumerator | |
---|---|
NODE | |
CHAIN | |
REGULAR_SNARL | |
IRREGULAR_SNARL | |
CYCLIC_SNARL | |
ROOT_SNARL | |
ROOT_CHAIN | |
ROOT_NODE | |
EMPTY |
|
inline |
How many bytes were used to store this zipcode?
|
inline |
How many codes in the zipcode have been decoded?
void vg::ZipCode::dump | ( | std::ostream & | out | ) | const |
Dump a ZipCode to a stream so that it can be reconstructed for a unit test from the resulting information.
void vg::ZipCode::fill_in_full_decoder | ( | ) |
Go through the entire zipcode and fill in the decoder.
bool vg::ZipCode::fill_in_next_decoder | ( | ) |
Fill in one more item in the decoder Returns true if this is the last thing in the zipcode and false if there is more to decode
void vg::ZipCode::fill_in_zipcode | ( | const SnarlDistanceIndex & | distance_index, |
const vg::pos_t & | pos, | ||
bool | fill_in_decoder = true |
||
) |
void vg::ZipCode::fill_in_zipcode_from_payload | ( | const gbwtgraph::Payload & | payload | ) |
void vg::ZipCode::from_vector | ( | const std::vector< size_t > & | values | ) |
Load from a normal vector.
|
inlineprivate |
size_t vg::ZipCode::get_chain_component | ( | const size_t & | depth | ) | const |
Get the chain component of a chain child. For snarls, this will be the component of the start node
ZipCode::code_type_t vg::ZipCode::get_code_type | ( | const size_t & | depth | ) | const |
What type of snarl tree node is at the given depth (index into the zipcode)
size_t vg::ZipCode::get_distance_index_address | ( | const size_t & | depth | ) | const |
Get the information that was stored to get the address in the distance index This is the connected component number for a root structure, or the address of an irregular snarl. Throws an error for anything else This is used for checking equality without looking at the distance index. Use get_net_handle for getting the actual handle
size_t vg::ZipCode::get_distance_to_snarl_bound | ( | const size_t & | depth, |
bool | snarl_start, | ||
bool | left_side | ||
) | const |
The minimum distance from start or end of the snarl to the left or right side of the child.
net_identifier_t vg::ZipCode::get_identifier | ( | size_t | depth | ) | const |
Get an identifier for the snarl tree node at this depth. If the snarl tree node at this depth would be the node, also include the node id
|
inlineprivate |
bool vg::ZipCode::get_is_looping_chain | ( | const size_t & | depth | ) | const |
bool vg::ZipCode::get_is_reversed_in_parent | ( | const size_t & | depth | ) | const |
Is the snarl tree node backwards relative to its parent.
size_t vg::ZipCode::get_last_chain_component | ( | const size_t & | depth, |
bool | get_end = false |
||
) | const |
Get the chain component of the last node in the chain This behaves like the distance index get_chain_component- for looping chains it returns the last component if get_end is true, and 0 if it is false
size_t vg::ZipCode::get_length | ( | const size_t & | depth, |
bool | get_chain_component_length = false |
||
) | const |
Get the length of a snarl tree node given the depth in the snarl tree If get_chain_component_length is true, then return the length of the last component of the multicomponent chain. If get_chain_component_length is false for a multi-cmponent chain, return max()
net_handle_t vg::ZipCode::get_net_handle | ( | const size_t & | depth, |
const SnarlDistanceIndex * | distance_index | ||
) | const |
Get the handle of the thing at the given depth. This can only be used for Root-level structures or irregular snarls
net_handle_t vg::ZipCode::get_net_handle_slow | ( | nid_t | id, |
const size_t & | depth, | ||
const SnarlDistanceIndex * | distance_index, | ||
const net_handle_t * | child_handle = nullptr |
||
) | const |
Get the handle of the thing at the given depth. This can be used for anything but is slow, even for roots and irregular/cyclic snarls. It's a separate function to make sure I remember that it's slow If the child handle is given, get the net handle as the parent of the child, if the address isn't stored
|
inlineprivate |
size_t vg::ZipCode::get_offset_in_chain | ( | const size_t & | depth, |
const SnarlDistanceIndex * | distance_index = nullptr |
||
) | const |
Get the prefix sum of a child of a chain This requires the distance index for irregular snarls (except for a top-level snarl) Throws an exception if the distance index is not given when it is needed Doesn't use a given distance index if it isn't needed
|
static |
gbwtgraph::Payload vg::ZipCode::get_payload_from_zip | ( | ) | const |
MIPayload vg::ZipCode::get_payload_from_zipcode | ( | nid_t | id, |
const SnarlDistanceIndex & | distance_index, | ||
hash_map< size_t, net_handle_t > * | component_to_net_handle = nullptr |
||
) | const |
Fill in a payload with values from the zipcode Remember how to get the net handle from the connected component number.
size_t vg::ZipCode::get_rank_in_snarl | ( | const size_t & | depth | ) | const |
Get the rank of a node/snarl in a snarl. Throw an exception if it isn't the child of a snarl.
|
inlineprivate |
size_t vg::ZipCode::get_snarl_child_count | ( | const size_t & | depth, |
const SnarlDistanceIndex * | distance_index = nullptr |
||
) | const |
Get the number of children in a snarl. Throw an exception if it isn't a snarl.
|
static |
|
static |
Are the two decoders pointing to the same snarl tree node at the given depth This only checks if the values in the zipcode are the same at the given depth, so if the preceeding snarl tree nodes are different, then this might actually refer to different things
bool vg::ZipCode::is_externally_end_end_connected | ( | const size_t & | depth | ) | const |
bool vg::ZipCode::is_externally_start_end_connected | ( | const size_t & | depth | ) | const |
bool vg::ZipCode::is_externally_start_start_connected | ( | const size_t & | depth | ) | const |
|
static |
size_t vg::ZipCode::max_depth | ( | ) | const |
What is the maximum depth of this zipcode?
|
static |
|
inline |
Equality operator.
std::vector< size_t > vg::ZipCode::to_vector | ( | ) | const |
Dump to a normal vector.
ZipCode::chain_code_t vg::ZipCode::unpack_chain_code | ( | size_t | zipcode_level | ) | const |
ZipCode::node_code_t vg::ZipCode::unpack_node_code | ( | size_t | zipcode_level | ) | const |
ZipCode::snarl_code_t vg::ZipCode::unpack_snarl_code | ( | size_t | zipcode_level | ) | const |
|
staticprivate |
|
staticprivate |
|
staticprivate |
|
staticprivate |
Offsets for chain codes.
std::vector<decoder_t> vg::ZipCode::decoder |
bool vg::ZipCode::finished_decoding = false |
Did we fill in the entire decoder TODO: I'm making it fill in the decoder automatically because it seems to be faster that way, instead of waiting to see which parts are actually needed
|
staticprivate |
|
staticprivate |
|
staticprivate |
|
staticprivate |
|
staticprivate |
|
staticprivate |
|
staticprivate |
|
staticprivate |
|
staticprivate |
|
staticprivate |
|
staticprivate |
Offsets for nodes.
|
staticprivate |
|
staticprivate |
Offsets for snarl codes.
|
staticprivate |
|
staticprivate |
|
staticprivate |
|
staticprivate |
|
staticprivate |
|
staticprivate |
|
staticprivate |
|
staticprivate |
Offsets of values in a root chain or snarl code Roots have a bool for is_chain and an identifier, which is the connected component number from the distance index
|
staticprivate |
|
staticprivate |
|
staticprivate |
|
staticprivate |
|
staticprivate |
varint_vector_t vg::ZipCode::zipcode |