vg
tools for working with variation graphs
Public Member Functions | Static Public Member Functions | Private Member Functions | Private Attributes | Static Private Attributes | List of all members
vg::Packer Class Reference

#include <packer.hpp>

Public Member Functions

 Packer (const HandleGraph *graph=nullptr)
 Create a Packer (to read from a file) More...
 
 Packer (const HandleGraph *graph, bool record_bases, bool record_edges, bool record_edits, bool record_qualities, size_t bin_size=0, size_t coverage_bins=1, size_t data_width=8)
 
 ~Packer ()
 
void clear ()
 
void add (const Alignment &aln, int min_mapq=0, int min_baseq=0, int trim_ends=0)
 
void merge_from_files (const vector< string > &file_names)
 
void merge_from_dynamic (vector< Packer * > &packers)
 
void load_from_file (const string &file_name)
 
void save_to_file (const string &file_name)
 
void load (istream &in)
 
size_t serialize (std::ostream &out, sdsl::structure_tree_node *s=NULL, std::string name="")
 
void make_compact (void)
 
void make_dynamic (void)
 
size_t position_in_basis (const Position &pos) const
 
string pos_key (size_t i) const
 
string edit_value (const Edit &edit, bool revcomp) const
 
vector< Editedits_at_position (size_t i) const
 
size_t coverage_at_position (size_t i) const
 
void collect_coverage (const vector< Packer * > &packers)
 
ostream & as_table (ostream &out, bool show_edits, vector< vg::id_t > node_ids)
 
ostream & as_edge_table (ostream &out, vector< vg::id_t > node_ids)
 
ostream & as_quality_table (ostream &out, vector< vg::id_t > node_ids)
 
ostream & show_structure (ostream &out)
 
void write_edits (vector< ofstream * > &out) const
 
void write_edits (ostream &out, size_t bin) const
 
size_t get_bin_size (void) const
 
size_t get_n_bins (void) const
 
bool is_dynamic (void) const
 
const HandleGraphget_graph () const
 
size_t coverage_size (void) const
 
void increment_coverage (size_t i)
 
void increment_coverage (size_t i, size_t v)
 
size_t edge_coverage (Edge &e) const
 
size_t edge_coverage (size_t i) const
 
size_t edge_vector_size (void) const
 
size_t edge_index (const Edge &e) const
 
void increment_edge_coverage (size_t i)
 
void increment_edge_coverage (size_t i, size_t v)
 
size_t total_node_quality (size_t i) const
 total node quality (faster from dynamimc) More...
 
size_t average_node_quality (size_t i) const
 average node quality (faster from static) More...
 
size_t node_quality_vector_size (void) const
 
size_t node_index (nid_t node_id) const
 Return the 1-based node rank or 0 if node not in graph. More...
 
nid_t index_to_node (size_t i) const
 and back More...
 
void increment_node_quality (size_t i, size_t v)
 
bool has_qualities () const
 return true if there's at least one nonzero quality in the structure More...
 

Static Public Member Functions

static size_t estimate_data_width (size_t expected_coverage)
 Some helper functions to heuristically estimate input parameters for constructor. More...
 
static size_t estimate_batch_size (size_t num_threads)
 
static size_t estimate_bin_count (size_t num_threads)
 

Private Member Functions

pair< size_t, size_t > coverage_bin_offset (size_t i) const
 map from absolute postion to positions in the binned arrays More...
 
pair< size_t, size_t > edge_coverage_bin_offset (size_t i) const
 
pair< size_t, size_t > node_quality_bin_offset (size_t i) const
 
size_t coverage_bin_size (size_t i) const
 get the size of a bin More...
 
size_t edge_coverage_bin_size (size_t i) const
 
size_t node_quality_bin_size (size_t i) const
 
void init_coverage_bin (size_t i)
 initialize coverage bins on demand More...
 
void init_edge_coverage_bin (size_t i)
 
void init_node_quality_bin (size_t i)
 
void ensure_edit_tmpfiles_open (void)
 
void close_edit_tmpfiles (void)
 
void remove_edit_tmpfiles (void)
 
size_t bin_for_position (size_t i) const
 
string escape_delim (const string &s, char d) const
 
string escape_delims (const string &s) const
 
string unescape_delim (const string &s, char d) const
 
string unescape_delims (const string &s) const
 
int compute_quality (const Alignment &aln, size_t position_in_read) const
 
int combine_qualities (int map_quality, int base_quality) const
 

Private Attributes

bool is_compacted = false
 
const HandleGraphgraph
 
size_t data_width
 
size_t cov_bin_size
 
size_t edge_cov_bin_size
 
size_t node_qual_bin_size
 
vector< gcsa::CounterArray * > coverage_dynamic
 
size_t num_bases_dynamic
 
std::mutex * base_locks
 
vector< gcsa::CounterArray * > edge_coverage_dynamic
 
size_t num_edges_dynamic
 
std::mutex * edge_locks
 
vector< gcsa::CounterArray * > node_quality_dynamic
 
size_t num_nodes_dynamic
 
std::mutex * node_quality_locks
 
vector< string > edit_tmpfile_names
 
vector< ofstream * > tmpfstreams
 
std::mutex * tmpfstream_locks
 
size_t n_bins = 1
 
size_t bin_size = 0
 
size_t edit_length = 0
 
size_t edit_count = 0
 
dac_vector coverage_civ
 
vlc_vector edge_coverage_civ
 
vlc_vector node_quality_civ
 
vector< csa_sada< enc_vector<>, 32, 32, sa_order_sa_sampling<>, isa_sampling<>, succinct_byte_alphabet<> > > edit_csas
 
char delim1 = '\xff'
 
char delim2 = '\xfe'
 
bool record_bases
 
bool record_edges
 
bool record_edits
 
bool record_qualities
 
vector< LRUCache< pair< int, int >, int > * > quality_cache
 

Static Private Attributes

static const int maximum_quality = 60
 
static const int lru_cache_size = 4096
 

Detailed Description

Packer collects coverage of a GAM using compressed indexes Any combination of these 3 types of information can be stored

Constructor & Destructor Documentation

◆ Packer() [1/2]

vg::Packer::Packer ( const HandleGraph graph = nullptr)

Create a Packer (to read from a file)

◆ Packer() [2/2]

vg::Packer::Packer ( const HandleGraph graph,
bool  record_bases,
bool  record_edges,
bool  record_edits,
bool  record_qualities,
size_t  bin_size = 0,
size_t  coverage_bins = 1,
size_t  data_width = 8 
)

Create a Packer (to write to) graph : Must implement the VectorizableHandleGraph interface record_bases : Store the base coverage record_edges : Store the edge coverage record_edits : Store the edits record_qualities : Store the average MAPQ for each node rank
bin_size : Bin coverage into bins coverage_bins : Use this many coverage objects. Using one / thread allows faster merge coverage_locks : Number of mutexes to use for each of node and edge coverage. data_width : Number of bits per entry in the dynamic coverage vector. Higher values get stored in a map

◆ ~Packer()

vg::Packer::~Packer ( )

Member Function Documentation

◆ add()

void vg::Packer::add ( const Alignment aln,
int  min_mapq = 0,
int  min_baseq = 0,
int  trim_ends = 0 
)

Add coverage from given alignment to the indexes aln : given alignemnt min_mapq : ignore alignments with mapping_quality below this value min_baseq : ignore bases in the alignment if their read quality is below this value trim_ends : ignore first and last <trim_ends> bases

◆ as_edge_table()

ostream & vg::Packer::as_edge_table ( ostream &  out,
vector< vg::id_t node_ids 
)

◆ as_quality_table()

ostream & vg::Packer::as_quality_table ( ostream &  out,
vector< vg::id_t node_ids 
)

◆ as_table()

ostream & vg::Packer::as_table ( ostream &  out,
bool  show_edits,
vector< vg::id_t node_ids 
)

◆ average_node_quality()

size_t vg::Packer::average_node_quality ( size_t  i) const

average node quality (faster from static)

◆ bin_for_position()

size_t vg::Packer::bin_for_position ( size_t  i) const
private

◆ clear()

void vg::Packer::clear ( )

◆ close_edit_tmpfiles()

void vg::Packer::close_edit_tmpfiles ( void  )
private

◆ collect_coverage()

void vg::Packer::collect_coverage ( const vector< Packer * > &  packers)

◆ combine_qualities()

int vg::Packer::combine_qualities ( int  map_quality,
int  base_quality 
) const
private

◆ compute_quality()

int vg::Packer::compute_quality ( const Alignment aln,
size_t  position_in_read 
) const
private

◆ coverage_at_position()

size_t vg::Packer::coverage_at_position ( size_t  i) const

◆ coverage_bin_offset()

pair< size_t, size_t > vg::Packer::coverage_bin_offset ( size_t  i) const
private

map from absolute postion to positions in the binned arrays

◆ coverage_bin_size()

size_t vg::Packer::coverage_bin_size ( size_t  i) const
private

get the size of a bin

◆ coverage_size()

size_t vg::Packer::coverage_size ( void  ) const

◆ edge_coverage() [1/2]

size_t vg::Packer::edge_coverage ( Edge e) const

◆ edge_coverage() [2/2]

size_t vg::Packer::edge_coverage ( size_t  i) const

◆ edge_coverage_bin_offset()

pair< size_t, size_t > vg::Packer::edge_coverage_bin_offset ( size_t  i) const
private

◆ edge_coverage_bin_size()

size_t vg::Packer::edge_coverage_bin_size ( size_t  i) const
private

◆ edge_index()

size_t vg::Packer::edge_index ( const Edge e) const

Return the 1-based index of the given edge in our vectorization order, or 0 if the edge does not exist in the graph.

◆ edge_vector_size()

size_t vg::Packer::edge_vector_size ( void  ) const

◆ edit_value()

string vg::Packer::edit_value ( const Edit edit,
bool  revcomp 
) const

◆ edits_at_position()

vector< Edit > vg::Packer::edits_at_position ( size_t  i) const

◆ ensure_edit_tmpfiles_open()

void vg::Packer::ensure_edit_tmpfiles_open ( void  )
private

◆ escape_delim()

string vg::Packer::escape_delim ( const string &  s,
char  d 
) const
private

◆ escape_delims()

string vg::Packer::escape_delims ( const string &  s) const
private

◆ estimate_batch_size()

size_t vg::Packer::estimate_batch_size ( size_t  num_threads)
static

◆ estimate_bin_count()

size_t vg::Packer::estimate_bin_count ( size_t  num_threads)
static

◆ estimate_data_width()

size_t vg::Packer::estimate_data_width ( size_t  expected_coverage)
static

Some helper functions to heuristically estimate input parameters for constructor.

◆ get_bin_size()

size_t vg::Packer::get_bin_size ( void  ) const

◆ get_graph()

const HandleGraph * vg::Packer::get_graph ( ) const

◆ get_n_bins()

size_t vg::Packer::get_n_bins ( void  ) const

◆ has_qualities()

bool vg::Packer::has_qualities ( ) const

return true if there's at least one nonzero quality in the structure

◆ increment_coverage() [1/2]

void vg::Packer::increment_coverage ( size_t  i)

◆ increment_coverage() [2/2]

void vg::Packer::increment_coverage ( size_t  i,
size_t  v 
)

◆ increment_edge_coverage() [1/2]

void vg::Packer::increment_edge_coverage ( size_t  i)

◆ increment_edge_coverage() [2/2]

void vg::Packer::increment_edge_coverage ( size_t  i,
size_t  v 
)

◆ increment_node_quality()

void vg::Packer::increment_node_quality ( size_t  i,
size_t  v 
)

◆ index_to_node()

nid_t vg::Packer::index_to_node ( size_t  i) const

and back

◆ init_coverage_bin()

void vg::Packer::init_coverage_bin ( size_t  i)
private

initialize coverage bins on demand

◆ init_edge_coverage_bin()

void vg::Packer::init_edge_coverage_bin ( size_t  i)
private

◆ init_node_quality_bin()

void vg::Packer::init_node_quality_bin ( size_t  i)
private

◆ is_dynamic()

bool vg::Packer::is_dynamic ( void  ) const

◆ load()

void vg::Packer::load ( istream &  in)

◆ load_from_file()

void vg::Packer::load_from_file ( const string &  file_name)

◆ make_compact()

void vg::Packer::make_compact ( void  )

◆ make_dynamic()

void vg::Packer::make_dynamic ( void  )

◆ merge_from_dynamic()

void vg::Packer::merge_from_dynamic ( vector< Packer * > &  packers)

◆ merge_from_files()

void vg::Packer::merge_from_files ( const vector< string > &  file_names)

◆ node_index()

size_t vg::Packer::node_index ( nid_t  node_id) const

Return the 1-based node rank or 0 if node not in graph.

◆ node_quality_bin_offset()

pair< size_t, size_t > vg::Packer::node_quality_bin_offset ( size_t  i) const
private

◆ node_quality_bin_size()

size_t vg::Packer::node_quality_bin_size ( size_t  i) const
private

◆ node_quality_vector_size()

size_t vg::Packer::node_quality_vector_size ( void  ) const

◆ pos_key()

string vg::Packer::pos_key ( size_t  i) const

◆ position_in_basis()

size_t vg::Packer::position_in_basis ( const Position pos) const

◆ remove_edit_tmpfiles()

void vg::Packer::remove_edit_tmpfiles ( void  )
private

◆ save_to_file()

void vg::Packer::save_to_file ( const string &  file_name)

◆ serialize()

size_t vg::Packer::serialize ( std::ostream &  out,
sdsl::structure_tree_node *  s = NULL,
std::string  name = "" 
)

◆ show_structure()

ostream & vg::Packer::show_structure ( ostream &  out)

◆ total_node_quality()

size_t vg::Packer::total_node_quality ( size_t  i) const

total node quality (faster from dynamimc)

◆ unescape_delim()

string vg::Packer::unescape_delim ( const string &  s,
char  d 
) const
private

◆ unescape_delims()

string vg::Packer::unescape_delims ( const string &  s) const
private

◆ write_edits() [1/2]

void vg::Packer::write_edits ( ostream &  out,
size_t  bin 
) const

◆ write_edits() [2/2]

void vg::Packer::write_edits ( vector< ofstream * > &  out) const

Member Data Documentation

◆ base_locks

std::mutex* vg::Packer::base_locks
private

◆ bin_size

size_t vg::Packer::bin_size = 0
private

◆ cov_bin_size

size_t vg::Packer::cov_bin_size
private

◆ coverage_civ

dac_vector vg::Packer::coverage_civ
private

◆ coverage_dynamic

vector<gcsa::CounterArray*> vg::Packer::coverage_dynamic
private

◆ data_width

size_t vg::Packer::data_width
private

◆ delim1

char vg::Packer::delim1 = '\xff'
private

◆ delim2

char vg::Packer::delim2 = '\xfe'
private

◆ edge_cov_bin_size

size_t vg::Packer::edge_cov_bin_size
private

◆ edge_coverage_civ

vlc_vector vg::Packer::edge_coverage_civ
private

◆ edge_coverage_dynamic

vector<gcsa::CounterArray*> vg::Packer::edge_coverage_dynamic
private

◆ edge_locks

std::mutex* vg::Packer::edge_locks
private

◆ edit_count

size_t vg::Packer::edit_count = 0
private

◆ edit_csas

vector<csa_sada<enc_vector<>, 32, 32, sa_order_sa_sampling<>, isa_sampling<>, succinct_byte_alphabet<> > > vg::Packer::edit_csas
private

◆ edit_length

size_t vg::Packer::edit_length = 0
private

◆ edit_tmpfile_names

vector<string> vg::Packer::edit_tmpfile_names
private

◆ graph

const HandleGraph* vg::Packer::graph
private

◆ is_compacted

bool vg::Packer::is_compacted = false
private

◆ lru_cache_size

const int vg::Packer::lru_cache_size = 4096
staticprivate

◆ maximum_quality

const int vg::Packer::maximum_quality = 60
staticprivate

◆ n_bins

size_t vg::Packer::n_bins = 1
private

◆ node_qual_bin_size

size_t vg::Packer::node_qual_bin_size
private

◆ node_quality_civ

vlc_vector vg::Packer::node_quality_civ
private

◆ node_quality_dynamic

vector<gcsa::CounterArray*> vg::Packer::node_quality_dynamic
private

◆ node_quality_locks

std::mutex* vg::Packer::node_quality_locks
private

◆ num_bases_dynamic

size_t vg::Packer::num_bases_dynamic
private

◆ num_edges_dynamic

size_t vg::Packer::num_edges_dynamic
private

◆ num_nodes_dynamic

size_t vg::Packer::num_nodes_dynamic
private

◆ quality_cache

vector<LRUCache<pair<int, int>, int>*> vg::Packer::quality_cache
mutableprivate

◆ record_bases

bool vg::Packer::record_bases
private

◆ record_edges

bool vg::Packer::record_edges
private

◆ record_edits

bool vg::Packer::record_edits
private

◆ record_qualities

bool vg::Packer::record_qualities
private

◆ tmpfstream_locks

std::mutex* vg::Packer::tmpfstream_locks
private

◆ tmpfstreams

vector<ofstream*> vg::Packer::tmpfstreams
private

The documentation for this class was generated from the following files: