vg
tools for working with variation graphs
Public Types | Public Member Functions | Static Public Member Functions | Private Member Functions | Private Attributes | List of all members
vg::PhaseUnfolder Class Reference

#include <phase_unfolder.hpp>

Public Types

typedef gbwt::SearchState search_type
 
typedef gbwt::vector_type path_type
 
typedef std::pair< search_type, path_typestate_type
 

Public Member Functions

 PhaseUnfolder (const PathHandleGraph &path_graph, const gbwt::GBWT &gbwt_index, vg::id_t next_node)
 
void unfold (MutableHandleGraph &graph, bool show_progress=false)
 
void restore_paths (MutableHandleGraph &graph, bool show_progress=false) const
 
size_t verify_paths (MutableHandleGraph &unfolded, bool show_progress=false) const
 
void write_mapping (const std::string &filename) const
 
void read_mapping (const std::string &filename)
 
vg::id_t get_mapping (vg::id_t node) const
 

Static Public Member Functions

static edge_t make_edge (const HandleGraph &graph, gbwt::node_type from, gbwt::node_type to)
 

Private Member Functions

std::list< bdsg::HashGraph > complement_components (MutableHandleGraph &graph, bool show_progress)
 
size_t unfold_component (MutableHandleGraph &component, MutableHandleGraph &graph, MutableHandleGraph &unfolded)
 
void generate_paths (MutableHandleGraph &component, vg::id_t from)
 
void generate_threads (MutableHandleGraph &component, vg::id_t from)
 
void create_state (vg::id_t node, bool is_reverse, bool starting)
 
bool extend_state (state_type state, vg::id_t node, bool is_reverse)
 
void extend_path (const path_type &path)
 
void insert_path (const path_type &path, bool from_border, bool to_border)
 Insert the path into the set in the canonical orientation. More...
 
gbwt::node_type get_prefix (gbwt::node_type from, gbwt::node_type node)
 Get the id for the duplicate of 'node' after 'from'. More...
 
gbwt::node_type get_suffix (gbwt::node_type node, gbwt::node_type to)
 Get the id for the duplicate of 'node' before 'to'. More...
 

Private Attributes

const PathHandleGraphpath_graph
 XG and GBWT indexes for the original graph. More...
 
const gbwt::GBWT & gbwt_index
 
gcsa::NodeMapping mapping
 Mapping from duplicated nodes to original ids. More...
 
hash_set< vg::id_tborder
 Internal data structures for the current component. More...
 
std::stack< state_typestates
 
std::vector< path_typereference_paths
 
pair_hash_map< std::pair< gbwt::node_type, gbwt::node_type >, gbwt::node_type > prefixes
 
pair_hash_map< std::pair< gbwt::node_type, gbwt::node_type >, gbwt::node_type > suffixes
 
pair_hash_set< std::pair< gbwt::node_type, gbwt::node_type > > crossing_edges
 

Detailed Description

Transforms the pruned subregions of the input graph into collections of disconnected distinct traversal haplotypes. Use in combination with pruning to simplify the graph for GCSA2 indexing without losing observed variation. Requires the XG index of the original graph and an empty GBWT index or an GBWT index of the original graph. Note: PhaseUnfolder only considers paths of length >= 2.

Member Typedef Documentation

◆ path_type

typedef gbwt::vector_type vg::PhaseUnfolder::path_type

◆ search_type

typedef gbwt::SearchState vg::PhaseUnfolder::search_type

◆ state_type

Constructor & Destructor Documentation

◆ PhaseUnfolder()

vg::PhaseUnfolder::PhaseUnfolder ( const PathHandleGraph path_graph,
const gbwt::GBWT &  gbwt_index,
vg::id_t  next_node 
)

Make a new PhaseUnfolder backed by the given XG and GBWT indexes. These indexes must represent the same original graph. 'next_node' should usually be max_node_id() + 1 in the original graph.

Member Function Documentation

◆ complement_components()

std::list< bdsg::HashGraph > vg::PhaseUnfolder::complement_components ( MutableHandleGraph graph,
bool  show_progress 
)
private

Generate a complement graph consisting of the edges that are in the GBWT index but not in the input graph. Split the complement into disjoint components and return the components.

◆ create_state()

void vg::PhaseUnfolder::create_state ( vg::id_t  node,
bool  is_reverse,
bool  starting 
)
private

Create or extend the state with the given node orientation, and insert it into the stack if it is supported by the GBWT index. Use 'starting' to determine whether the initial state is for the threads starting at the node or for the threads passing through the node.

◆ extend_path()

void vg::PhaseUnfolder::extend_path ( const path_type path)
private

Try to extend the path at both ends until the border by using the reference paths. Insert the extended path into the set in the canonical orientation.

◆ extend_state()

bool vg::PhaseUnfolder::extend_state ( state_type  state,
vg::id_t  node,
bool  is_reverse 
)
private

◆ generate_paths()

void vg::PhaseUnfolder::generate_paths ( MutableHandleGraph component,
vg::id_t  from 
)
private

Generate all paths supported by the XG index passing through the given node until the border or until the path ends. Insert the generated paths into the set in the canonical orientation, and use them as reference paths for extending threads.

◆ generate_threads()

void vg::PhaseUnfolder::generate_threads ( MutableHandleGraph component,
vg::id_t  from 
)
private

Generate all paths supported by the GBWT index from the given node until the border. Extend paths that start/end at internal nodes using the reference paths. If the node is a border node, consider all threads passing through it. Otherwise consider only the threads starting from it, and do not output threads reaching a border.

◆ get_mapping()

vg::id_t vg::PhaseUnfolder::get_mapping ( vg::id_t  node) const

Get the id of the corresponding node in the original graph.

◆ get_prefix()

gbwt::node_type vg::PhaseUnfolder::get_prefix ( gbwt::node_type  from,
gbwt::node_type  node 
)
private

Get the id for the duplicate of 'node' after 'from'.

◆ get_suffix()

gbwt::node_type vg::PhaseUnfolder::get_suffix ( gbwt::node_type  node,
gbwt::node_type  to 
)
private

Get the id for the duplicate of 'node' before 'to'.

◆ insert_path()

void vg::PhaseUnfolder::insert_path ( const path_type path,
bool  from_border,
bool  to_border 
)
private

Insert the path into the set in the canonical orientation.

◆ make_edge()

static edge_t vg::PhaseUnfolder::make_edge ( const HandleGraph graph,
gbwt::node_type  from,
gbwt::node_type  to 
)
inlinestatic

Create an edge between two node orientations.

◆ read_mapping()

void vg::PhaseUnfolder::read_mapping ( const std::string &  filename)

Replace the existing node mapping with the one loaded from the file. This should be used before calling unfold(). The identifiers for new duplicated nodes will follow the ones in the loaded mapping.

◆ restore_paths()

void vg::PhaseUnfolder::restore_paths ( MutableHandleGraph graph,
bool  show_progress = false 
) const

Restore the edges on XG paths. This is effectively the same as unfolding with an empty GBWT index, except that the inserted nodes will have their original identifiers.

◆ unfold()

void vg::PhaseUnfolder::unfold ( MutableHandleGraph graph,
bool  show_progress = false 
)

Unfold the pruned regions in the input graph:

  • Determine the connected components of edges missing from the input graph, as determined by the XG paths and GBWT threads.
  • For each component, find all border-to-border paths and threads supported by the indexes. Then unfold the component by duplicating the nodes, so that the paths are disjoint, except for their shared prefixes and suffixes.
  • Extend the input graph with the unfolded components.

◆ unfold_component()

size_t vg::PhaseUnfolder::unfold_component ( MutableHandleGraph component,
MutableHandleGraph graph,
MutableHandleGraph unfolded 
)
private

Generate all border-to-border paths in the component supported by the indexes. Unfold the paths by duplicating the inner nodes so that the paths become disjoint, except for their shared prefixes/suffixes.

◆ verify_paths()

size_t vg::PhaseUnfolder::verify_paths ( MutableHandleGraph unfolded,
bool  show_progress = false 
) const

Verify that the graph contains the XG paths and the GBWT threads in the backing indexes. Returns the number of paths for which the verification failed. Uses OMP threads.

◆ write_mapping()

void vg::PhaseUnfolder::write_mapping ( const std::string &  filename) const

Write the mapping to the specified file with a header. The file will contain mappings from header.next_node - header.mapping_size (inclusive) to header.next_node (exclusive).

Member Data Documentation

◆ border

hash_set<vg::id_t> vg::PhaseUnfolder::border
private

Internal data structures for the current component.

◆ crossing_edges

pair_hash_set<std::pair<gbwt::node_type, gbwt::node_type> > vg::PhaseUnfolder::crossing_edges
private

◆ gbwt_index

const gbwt::GBWT& vg::PhaseUnfolder::gbwt_index
private

◆ mapping

gcsa::NodeMapping vg::PhaseUnfolder::mapping
private

Mapping from duplicated nodes to original ids.

◆ path_graph

const PathHandleGraph& vg::PhaseUnfolder::path_graph
private

XG and GBWT indexes for the original graph.

◆ prefixes

pair_hash_map<std::pair<gbwt::node_type, gbwt::node_type>, gbwt::node_type> vg::PhaseUnfolder::prefixes
private

Tries for the unfolded prefixes and reverse suffixes. prefixes[(from, to)] is the mapping for to, and suffixes[(from, to)] is the mapping for from.

◆ reference_paths

std::vector<path_type> vg::PhaseUnfolder::reference_paths
private

◆ states

std::stack<state_type> vg::PhaseUnfolder::states
private

◆ suffixes

pair_hash_map<std::pair<gbwt::node_type, gbwt::node_type>, gbwt::node_type> vg::PhaseUnfolder::suffixes
private

The documentation for this class was generated from the following files: