#include <snarl_seed_clusterer.hpp>

Classes
struct	Cluster
	Cluster information used in Giraffe. More...

struct	ClusterHead

struct	ClusteringProblem

struct	Seed
	Seed information used in Giraffe. More...

struct	SeedCache
	Seed information used for clustering. More...

struct	SnarlTreeNodeProblem

Public Member Functions
	SnarlDistanceIndexClusterer (const SnarlDistanceIndex &distance_index, const HandleGraph *graph)

	SnarlDistanceIndexClusterer (const SnarlDistanceIndex distance_index, const HandleGraph graph)

	SnarlDistanceIndexClusterer (const SnarlDistanceIndex &distance_index)

	SnarlDistanceIndexClusterer (const SnarlDistanceIndex *distance_index)

vector< Cluster >	cluster_seeds (const vector< Seed > &seeds, size_t read_distance_limit) const

vector< vector< Cluster > >	cluster_seeds (const vector< vector< Seed >> &all_seeds, size_t read_distance_limit, size_t fragment_distance_limit=0) const

Private Member Functions
tuple< vector< structures::UnionFind >, structures::UnionFind >	cluster_seeds_internal (vector< vector< SeedCache > * > &all_seeds, size_t read_distance_limit, size_t fragment_distance_limit=0) const

void	get_nodes (ClusteringProblem &clustering_problem, vector< vector< net_handle_t >> &chains_by_level) const

void	cluster_snarl_level (ClusteringProblem &clustering_problem) const

void	cluster_chain_level (ClusteringProblem &clustering_problem, size_t depth) const

void	cluster_one_node (ClusteringProblem &clustering_problem, SnarlTreeNodeProblem *node_problem) const

void	cluster_one_snarl (ClusteringProblem &clustering_problem, SnarlTreeNodeProblem *snarl_problem) const

void	cluster_one_chain (ClusteringProblem &clustering_problem, SnarlTreeNodeProblem *chain_problem, bool is_top_level_chain) const

void	add_seed_to_chain_problem (ClusteringProblem &clustering_problem, SnarlTreeNodeProblem *chain_problem, SnarlTreeNodeProblem::SnarlTreeChild &last_child, size_t &last_prefix_sum, size_t &last_length, size_t &last_chain_component_end, vector< ClusterHead > &cluster_heads_to_add_again, bool &found_first_node, pair< bool, bool > &found_first_node_by_read, const SnarlTreeNodeProblem::SnarlTreeChild &current_child, bool is_first_child, bool is_last_child, bool skip_distances_to_ends) const

void	add_snarl_to_chain_problem (ClusteringProblem &clustering_problem, SnarlTreeNodeProblem *chain_problem, SnarlTreeNodeProblem::SnarlTreeChild &last_child, size_t &last_prefix_sum, size_t &last_length, size_t &last_chain_component_end, vector< ClusterHead > &cluster_heads_to_add_again, bool &found_first_node, pair< bool, bool > &found_first_node_by_read, const SnarlTreeNodeProblem::SnarlTreeChild &current_child, bool is_first_child, bool is_last_child, bool skip_distances_to_ends) const

void	cluster_root (ClusteringProblem &clustering_problem) const

void	cluster_seeds_on_linear_structure (ClusteringProblem &clustering_problem, SnarlTreeNodeProblem *problem, size_t structure_length, bool include_prefix_sum, bool skip_distances_to_ends) const

void	compare_and_combine_cluster_on_child_structures (ClusteringProblem &clustering_problem, SnarlTreeNodeProblem child_problem1, SnarlTreeNodeProblem child_problem2, SnarlTreeNodeProblem *parent_problem, const vector< pair< size_t, size_t >> &child_distances, bool is_root, bool first_child) const

void	compare_and_combine_cluster_on_one_child (ClusteringProblem &clustering_problem, SnarlTreeNodeProblem *child_problem) const

Private Attributes
const SnarlDistanceIndex &	distance_index

const HandleGraph *	graph

Detailed Description

SnarlDistanceIndexClusterer is used for clustering seeds (positions on the graph) A "cluster" is a partition of seeds that is based on the minimum distance between them in the graph Consider a graph where each seed is a node and two seeds are connected if the minimum distance between them is smaller than a given distance limit. Each connected component of this graph is a cluster

The clustering algorithm is based on the snarl tree Clusters are formed on nodes of the snarl tree, which represent nodes/snarls/chains Each node/snarl/chain represents a subgraph of the variation graph A clustered snarl tree node contains all seeds that occur on its subgraph, and the seeds have been partitioned into clusters Each cluster knows the shortest distance from any seed it contains to both ends of the snarl tree node containing it Clustering is done progressively by walking up the snarl tree and forming clusters on each snarl tree node (only visiting nodes that have seeds on them) At each snarl tree node, assume that its children have already been clustered. The clusters of the children are compared to each other, and any pair that are close enough are combined to produce clusters on the parent The distances from each cluster to the ends of the parent are updated

The algorithm starts by assigning each seed to its node on the snarl tree Since nodes are all on chains, this fills in all the children of chains that are nodes It then walks up the snarl tree, level by level, and clusters each snarl tree node that contains seeds At a given level, first cluster each chain in the level. After clustering a chain, assign it to its parent snarl. Then, go through each of the snarls that have just been given children, and cluster the snarls. Each snarl then gets assigned to its parent chain This completes one level of the snarl tree. Each chain in the next level has just been populated by the snarls from this level, and already knew about its nodes from the first step, so it is ready to be clustered

Every time the clusterer is run, a ClusteringProblem is made to store information about the state of the clusterer The ClusteringProblem keeps track of which level of the snarl tree is currently being clustered, and keeps track of the children of the current and next level of the snarl tree. Each snarl tree node that contains seeds is represented by a SnarlTreeNodeProblem. The SnarlTreeNodeProblem represents the problem of clustering one snarl tree node. It knows the identities of its children and keeps track of its cluster heads

Constructor & Destructor Documentation

◆ SnarlDistanceIndexClusterer() [1/4]

vg::SnarlDistanceIndexClusterer::SnarlDistanceIndexClusterer	(	const SnarlDistanceIndex &	distance_index,
		const HandleGraph *	graph
	)

◆ SnarlDistanceIndexClusterer() [2/4]

vg::SnarlDistanceIndexClusterer::SnarlDistanceIndexClusterer	(	const SnarlDistanceIndex *	distance_index,
		const HandleGraph *	graph
	)

◆ SnarlDistanceIndexClusterer() [3/4]

vg::SnarlDistanceIndexClusterer::SnarlDistanceIndexClusterer ( const SnarlDistanceIndex & distance_index )

◆ SnarlDistanceIndexClusterer() [4/4]

vg::SnarlDistanceIndexClusterer::SnarlDistanceIndexClusterer ( const SnarlDistanceIndex * distance_index )

Member Function Documentation

◆ add_seed_to_chain_problem()

void vg::SnarlDistanceIndexClusterer::add_seed_to_chain_problem	(	ClusteringProblem &	clustering_problem,
		SnarlTreeNodeProblem *	chain_problem,
		SnarlTreeNodeProblem::SnarlTreeChild &	last_child,
		size_t &	last_prefix_sum,
		size_t &	last_length,
		size_t &	last_chain_component_end,
		vector< ClusterHead > &	cluster_heads_to_add_again,
		bool &	found_first_node,
		pair< bool, bool > &	found_first_node_by_read,
		const SnarlTreeNodeProblem::SnarlTreeChild &	current_child,
		bool	is_first_child,
		bool	is_last_child,
		bool	skip_distances_to_ends
	)		const

private

Go through the clusters on the chain up to this point and see if anything can be combined with the clusters on the child Also update the distances of the chain clusters to reach the end of this node

◆ add_snarl_to_chain_problem()

void vg::SnarlDistanceIndexClusterer::add_snarl_to_chain_problem	(	ClusteringProblem &	clustering_problem,
		SnarlTreeNodeProblem *	chain_problem,
		SnarlTreeNodeProblem::SnarlTreeChild &	last_child,
		size_t &	last_prefix_sum,
		size_t &	last_length,
		size_t &	last_chain_component_end,
		vector< ClusterHead > &	cluster_heads_to_add_again,
		bool &	found_first_node,
		pair< bool, bool > &	found_first_node_by_read,
		const SnarlTreeNodeProblem::SnarlTreeChild &	current_child,
		bool	is_first_child,
		bool	is_last_child,
		bool	skip_distances_to_ends
	)		const

private

First, go through the clusters of the current child and see what can be combined

Next, go through the clusters on the chain up to this point and see if anything can be combined with the clusters on the child

◆ cluster_chain_level()

void vg::SnarlDistanceIndexClusterer::cluster_chain_level	(	ClusteringProblem &	clustering_problem,
		size_t	depth
	)		const

private

◆ cluster_one_chain()

void vg::SnarlDistanceIndexClusterer::cluster_one_chain	(	ClusteringProblem &	clustering_problem,
		SnarlTreeNodeProblem *	chain_problem,
		bool	is_top_level_chain
	)		const

private

◆ cluster_one_node()

void vg::SnarlDistanceIndexClusterer::cluster_one_node	(	ClusteringProblem &	clustering_problem,
		SnarlTreeNodeProblem *	node_problem
	)		const

private

◆ cluster_one_snarl()

void vg::SnarlDistanceIndexClusterer::cluster_one_snarl	(	ClusteringProblem &	clustering_problem,
		SnarlTreeNodeProblem *	snarl_problem
	)		const

private

◆ cluster_root()

void vg::SnarlDistanceIndexClusterer::cluster_root ( ClusteringProblem & clustering_problem ) const

private

◆ cluster_seeds() [1/2]

vector< SnarlDistanceIndexClusterer::Cluster > vg::SnarlDistanceIndexClusterer::cluster_seeds	(	const vector< Seed > &	seeds,
		size_t	read_distance_limit
	)		const

◆ cluster_seeds() [2/2]

vector< vector< SnarlDistanceIndexClusterer::Cluster > > vg::SnarlDistanceIndexClusterer::cluster_seeds	(	const vector< vector< Seed >> &	all_seeds,
		size_t	read_distance_limit,
		size_t	fragment_distance_limit = `0`
	)		const

◆ cluster_seeds_internal()

tuple< vector< structures::UnionFind >, structures::UnionFind > vg::SnarlDistanceIndexClusterer::cluster_seeds_internal	(	vector< vector< SeedCache > * > &	all_seeds,
		size_t	read_distance_limit,
		size_t	fragment_distance_limit = `0`
	)		const

private

◆ cluster_seeds_on_linear_structure()

void vg::SnarlDistanceIndexClusterer::cluster_seeds_on_linear_structure	(	ClusteringProblem &	clustering_problem,
		SnarlTreeNodeProblem *	problem,
		size_t	structure_length,
		bool	include_prefix_sum,
		bool	skip_distances_to_ends
	)		const

private

◆ cluster_snarl_level()

void vg::SnarlDistanceIndexClusterer::cluster_snarl_level ( ClusteringProblem & clustering_problem ) const

private

◆ compare_and_combine_cluster_on_child_structures()

void vg::SnarlDistanceIndexClusterer::compare_and_combine_cluster_on_child_structures	(	ClusteringProblem &	clustering_problem,
		SnarlTreeNodeProblem *	child_problem1,
		SnarlTreeNodeProblem *	child_problem2,
		SnarlTreeNodeProblem *	parent_problem,
		const vector< pair< size_t, size_t >> &	child_distances,
		bool	is_root,
		bool	first_child
	)		const

private

◆ compare_and_combine_cluster_on_one_child()

void vg::SnarlDistanceIndexClusterer::compare_and_combine_cluster_on_one_child	(	ClusteringProblem &	clustering_problem,
		SnarlTreeNodeProblem *	child_problem
	)		const

private

◆ get_nodes()

void vg::SnarlDistanceIndexClusterer::get_nodes	(	ClusteringProblem &	clustering_problem,
		vector< vector< net_handle_t >> &	chains_by_level
	)		const

private

Member Data Documentation

◆ distance_index

const SnarlDistanceIndex& vg::SnarlDistanceIndexClusterer::distance_index

private

◆ graph

const HandleGraph* vg::SnarlDistanceIndexClusterer::graph

private

The documentation for this class was generated from the following files:

src/snarl_seed_clusterer.hpp
src/snarl_seed_clusterer.cpp

Classes

Public Member Functions

Private Member Functions

Private Attributes

Detailed Description

Constructor & Destructor Documentation

◆ SnarlDistanceIndexClusterer() [1/4]

◆ SnarlDistanceIndexClusterer() [2/4]

◆ SnarlDistanceIndexClusterer() [3/4]

◆ SnarlDistanceIndexClusterer() [4/4]

Member Function Documentation

◆ add_seed_to_chain_problem()

◆ add_snarl_to_chain_problem()

◆ cluster_chain_level()

◆ cluster_one_chain()

◆ cluster_one_node()

◆ cluster_one_snarl()

◆ cluster_root()

◆ cluster_seeds() [1/2]

◆ cluster_seeds() [2/2]

◆ cluster_seeds_internal()

◆ cluster_seeds_on_linear_structure()

◆ cluster_snarl_level()

◆ compare_and_combine_cluster_on_child_structures()

◆ compare_and_combine_cluster_on_one_child()

◆ get_nodes()

Member Data Documentation

◆ distance_index

◆ graph