vg
tools for working with variation graphs
|
#include <phase_unfolder.hpp>
Public Types | |
typedef gbwt::SearchState | search_type |
typedef gbwt::vector_type | path_type |
typedef std::pair< search_type, path_type > | state_type |
Public Member Functions | |
PhaseUnfolder (const PathHandleGraph &path_graph, const gbwt::GBWT &gbwt_index, vg::id_t next_node) | |
void | unfold (MutableHandleGraph &graph, bool show_progress=false) |
void | restore_paths (MutableHandleGraph &graph, bool show_progress=false) const |
size_t | verify_paths (MutableHandleGraph &unfolded, bool show_progress=false) const |
void | write_mapping (const std::string &filename) const |
void | read_mapping (const std::string &filename) |
vg::id_t | get_mapping (vg::id_t node) const |
Static Public Member Functions | |
static edge_t | make_edge (const HandleGraph &graph, gbwt::node_type from, gbwt::node_type to) |
Private Member Functions | |
std::list< bdsg::HashGraph > | complement_components (MutableHandleGraph &graph, bool show_progress) |
size_t | unfold_component (MutableHandleGraph &component, MutableHandleGraph &graph, MutableHandleGraph &unfolded) |
void | generate_paths (MutableHandleGraph &component, vg::id_t from) |
void | generate_threads (MutableHandleGraph &component, vg::id_t from) |
void | create_state (vg::id_t node, bool is_reverse, bool starting) |
bool | extend_state (state_type state, vg::id_t node, bool is_reverse) |
void | extend_path (const path_type &path) |
void | insert_path (const path_type &path, bool from_border, bool to_border) |
Insert the path into the set in the canonical orientation. More... | |
gbwt::node_type | get_prefix (gbwt::node_type from, gbwt::node_type node) |
Get the id for the duplicate of 'node' after 'from'. More... | |
gbwt::node_type | get_suffix (gbwt::node_type node, gbwt::node_type to) |
Get the id for the duplicate of 'node' before 'to'. More... | |
Private Attributes | |
const PathHandleGraph & | path_graph |
XG and GBWT indexes for the original graph. More... | |
const gbwt::GBWT & | gbwt_index |
gcsa::NodeMapping | mapping |
Mapping from duplicated nodes to original ids. More... | |
hash_set< vg::id_t > | border |
Internal data structures for the current component. More... | |
std::stack< state_type > | states |
std::vector< path_type > | reference_paths |
pair_hash_map< std::pair< gbwt::node_type, gbwt::node_type >, gbwt::node_type > | prefixes |
pair_hash_map< std::pair< gbwt::node_type, gbwt::node_type >, gbwt::node_type > | suffixes |
pair_hash_set< std::pair< gbwt::node_type, gbwt::node_type > > | crossing_edges |
Transforms the pruned subregions of the input graph into collections of disconnected distinct traversal haplotypes. Use in combination with pruning to simplify the graph for GCSA2 indexing without losing observed variation. Requires the XG index of the original graph and an empty GBWT index or an GBWT index of the original graph. Note: PhaseUnfolder only considers paths of length >= 2.
typedef gbwt::vector_type vg::PhaseUnfolder::path_type |
typedef gbwt::SearchState vg::PhaseUnfolder::search_type |
typedef std::pair<search_type, path_type> vg::PhaseUnfolder::state_type |
vg::PhaseUnfolder::PhaseUnfolder | ( | const PathHandleGraph & | path_graph, |
const gbwt::GBWT & | gbwt_index, | ||
vg::id_t | next_node | ||
) |
Make a new PhaseUnfolder backed by the given XG and GBWT indexes. These indexes must represent the same original graph. 'next_node' should usually be max_node_id() + 1 in the original graph.
|
private |
Generate a complement graph consisting of the edges that are in the GBWT index but not in the input graph. Split the complement into disjoint components and return the components.
|
private |
Create or extend the state with the given node orientation, and insert it into the stack if it is supported by the GBWT index. Use 'starting' to determine whether the initial state is for the threads starting at the node or for the threads passing through the node.
|
private |
Try to extend the path at both ends until the border by using the reference paths. Insert the extended path into the set in the canonical orientation.
|
private |
|
private |
Generate all paths supported by the XG index passing through the given node until the border or until the path ends. Insert the generated paths into the set in the canonical orientation, and use them as reference paths for extending threads.
|
private |
Generate all paths supported by the GBWT index from the given node until the border. Extend paths that start/end at internal nodes using the reference paths. If the node is a border node, consider all threads passing through it. Otherwise consider only the threads starting from it, and do not output threads reaching a border.
Get the id of the corresponding node in the original graph.
|
private |
Get the id for the duplicate of 'node' after 'from'.
|
private |
Get the id for the duplicate of 'node' before 'to'.
|
private |
Insert the path into the set in the canonical orientation.
|
inlinestatic |
Create an edge between two node orientations.
void vg::PhaseUnfolder::read_mapping | ( | const std::string & | filename | ) |
Replace the existing node mapping with the one loaded from the file. This should be used before calling unfold(). The identifiers for new duplicated nodes will follow the ones in the loaded mapping.
void vg::PhaseUnfolder::restore_paths | ( | MutableHandleGraph & | graph, |
bool | show_progress = false |
||
) | const |
Restore the edges on XG paths. This is effectively the same as unfolding with an empty GBWT index, except that the inserted nodes will have their original identifiers.
void vg::PhaseUnfolder::unfold | ( | MutableHandleGraph & | graph, |
bool | show_progress = false |
||
) |
Unfold the pruned regions in the input graph:
|
private |
Generate all border-to-border paths in the component supported by the indexes. Unfold the paths by duplicating the inner nodes so that the paths become disjoint, except for their shared prefixes/suffixes.
size_t vg::PhaseUnfolder::verify_paths | ( | MutableHandleGraph & | unfolded, |
bool | show_progress = false |
||
) | const |
Verify that the graph contains the XG paths and the GBWT threads in the backing indexes. Returns the number of paths for which the verification failed. Uses OMP threads.
void vg::PhaseUnfolder::write_mapping | ( | const std::string & | filename | ) | const |
Write the mapping to the specified file with a header. The file will contain mappings from header.next_node - header.mapping_size (inclusive) to header.next_node (exclusive).
Internal data structures for the current component.
|
private |
|
private |
|
private |
Mapping from duplicated nodes to original ids.
|
private |
XG and GBWT indexes for the original graph.
|
private |
Tries for the unfolded prefixes and reverse suffixes. prefixes[(from, to)] is the mapping for to, and suffixes[(from, to)] is the mapping for from.
|
private |
|
private |
|
private |