vg
tools for working with variation graphs
|
Namespaces | |
algorithms | |
io | |
subcommand | |
temp_file | |
Enumerations | |
enum | alignment_emitter_flags_t { ALIGNMENT_EMITTER_FLAG_NONE = 0, ALIGNMENT_EMITTER_FLAG_HTS_RAW = 1, ALIGNMENT_EMITTER_FLAG_HTS_SPLICED = 2, ALIGNMENT_EMITTER_FLAG_HTS_PRUNE_SUSPICIOUS_ANCHORS = 4, ALIGNMENT_EMITTER_FLAG_VG_USE_SEGMENT_NAMES = 8 } |
enum | MappingQualityMethod { Approx, Exact, Adaptive, None } |
enum | SnarlType { UNCLASSIFIED = 0, ULTRABUBBLE = 1, UNARY = 2 } |
Enumeration of the classifications of snarls. More... | |
Functions | |
int32_t | score_gap (size_t gap_length, int32_t gap_open, int32_t gap_extension) |
Score a gap with the given open and extension scores. More... | |
int | hts_for_each (string &filename, function< void(Alignment &)> lambda, const PathPositionHandleGraph *graph) |
int | hts_for_each (string &filename, function< void(Alignment &)> lambda) |
int | hts_for_each_parallel (string &filename, function< void(Alignment &)> lambda, const PathPositionHandleGraph *graph) |
int | hts_for_each_parallel (string &filename, function< void(Alignment &)> lambda) |
bam_hdr_t * | hts_file_header (string &filename, string &header) |
bam_hdr_t * | hts_string_header (string &header, const map< string, int64_t > &path_length, const map< string, string > &rg_sample) |
bam_hdr_t * | hts_string_header (string &header, const vector< pair< string, int64_t >> &path_order_and_length, const map< string, string > &rg_sample) |
bool | get_next_alignment_from_fastq (gzFile fp, char *buffer, size_t len, Alignment &alignment) |
bool | get_next_interleaved_alignment_pair_from_fastq (gzFile fp, char *buffer, size_t len, Alignment &mate1, Alignment &mate2) |
bool | get_next_alignment_pair_from_fastqs (gzFile fp1, gzFile fp2, char *buffer, size_t len, Alignment &mate1, Alignment &mate2) |
size_t | fastq_unpaired_for_each_parallel (const string &filename, function< void(Alignment &)> lambda, uint64_t batch_size) |
size_t | fastq_paired_interleaved_for_each_parallel (const string &filename, function< void(Alignment &, Alignment &)> lambda, uint64_t batch_size) |
size_t | fastq_paired_two_files_for_each_parallel (const string &file1, const string &file2, function< void(Alignment &, Alignment &)> lambda, uint64_t batch_size) |
size_t | fastq_paired_interleaved_for_each_parallel_after_wait (const string &filename, function< void(Alignment &, Alignment &)> lambda, function< bool(void)> single_threaded_until_true, uint64_t batch_size) |
size_t | fastq_paired_two_files_for_each_parallel_after_wait (const string &file1, const string &file2, function< void(Alignment &, Alignment &)> lambda, function< bool(void)> single_threaded_until_true, uint64_t batch_size) |
size_t | fastq_unpaired_for_each (const string &filename, function< void(Alignment &)> lambda) |
size_t | fastq_paired_interleaved_for_each (const string &filename, function< void(Alignment &, Alignment &)> lambda) |
size_t | fastq_paired_two_files_for_each (const string &file1, const string &file2, function< void(Alignment &, Alignment &)> lambda) |
void | parse_rg_sample_map (char *hts_header, map< string, string > &rg_sample) |
Populate a mapping from read group to sample name, given the text BAM header. More... | |
void | parse_tid_path_handle_map (const bam_hdr_t *hts_header, const PathHandleGraph *graph, map< int, path_handle_t > &tid_path_handle) |
string | alignment_to_sam_internal (const Alignment &alignment, const string &refseq, const int32_t refpos, const bool refrev, const vector< pair< int, char >> &cigar, const string &mateseq, const int32_t matepos, bool materev, const int32_t tlen, bool paired, const int32_t tlen_max) |
int32_t | determine_flag (const Alignment &alignment, const string &refseq, const int32_t refpos, const bool refrev, const string &mateseq, const int32_t matepos, bool materev, const int32_t tlen, bool paired, const int32_t tlen_max) |
Returns the SAM bit-coded flag for alignment with. More... | |
string | alignment_to_sam (const Alignment &alignment, const string &refseq, const int32_t refpos, const bool refrev, const vector< pair< int, char >> &cigar, const string &mateseq, const int32_t matepos, bool materev, const int32_t tlen, const int32_t tlen_max) |
string | alignment_to_sam (const Alignment &alignment, const string &refseq, const int32_t refpos, const bool refrev, const vector< pair< int, char >> &cigar) |
bam1_t * | alignment_to_bam_internal (bam_hdr_t *header, const Alignment &alignment, const string &refseq, const int32_t refpos, const bool refrev, const vector< pair< int, char >> &cigar, const string &mateseq, const int32_t matepos, bool materev, const int32_t tlen, bool paired, const int32_t tlen_max) |
bam1_t * | alignment_to_bam (bam_hdr_t *bam_header, const Alignment &alignment, const string &refseq, const int32_t refpos, const bool refrev, const vector< pair< int, char >> &cigar, const string &mateseq, const int32_t matepos, bool materev, const int32_t tlen, const int32_t tlen_max) |
bam1_t * | alignment_to_bam (bam_hdr_t *bam_header, const Alignment &alignment, const string &refseq, const int32_t refpos, const bool refrev, const vector< pair< int, char >> &cigar) |
string | cigar_string (const vector< pair< int, char > > &cigar) |
string | mapping_string (const string &source, const Mapping &mapping) |
void | mapping_cigar (const Mapping &mapping, vector< pair< int, char >> &cigar) |
int64_t | cigar_mapping (const bam1_t *b, Mapping *mapping) |
void | mapping_against_path (Alignment &alignment, const bam1_t *b, const path_handle_t &path, const PathPositionHandleGraph *graph, bool on_reverse_strand) |
vector< pair< int, char > > | cigar_against_path (const Alignment &alignment, bool on_reverse_strand, int64_t &pos, size_t path_len, size_t softclip_suppress) |
void | simplify_cigar (vector< pair< int, char >> &cigar) |
pair< int32_t, int32_t > | compute_template_lengths (const int64_t &pos1, const vector< pair< int, char >> &cigar1, const int64_t &pos2, const vector< pair< int, char >> &cigar2) |
int32_t | sam_flag (const Alignment &alignment, bool on_reverse_strand, bool paired) |
Alignment | bam_to_alignment (const bam1_t *b, const map< string, string > &rg_sample, const map< int, path_handle_t > &tid_path_handle, const bam_hdr_t *bh, const PathPositionHandleGraph *graph) |
Alignment | bam_to_alignment (const bam1_t *b, const map< string, string > &rg_sample, const map< int, path_handle_t > &tid_path_handle) |
int | alignment_to_length (const Alignment &a) |
int | alignment_from_length (const Alignment &a) |
Alignment | strip_from_start (const Alignment &aln, size_t drop) |
Alignment | strip_from_end (const Alignment &aln, size_t drop) |
Alignment | trim_alignment (const Alignment &aln, const Position &pos1, const Position &pos2) |
vector< Alignment > | alignment_ends (const Alignment &aln, size_t len1, size_t len2) |
Alignment | alignment_middle (const Alignment &aln, int len) |
vector< Alignment > | reverse_complement_alignments (const vector< Alignment > &alns, const function< int64_t(int64_t)> &node_length) |
Alignment | reverse_complement_alignment (const Alignment &aln, const function< int64_t(id_t)> &node_length) |
void | reverse_complement_alignment_in_place (Alignment *aln, const function< int64_t(id_t)> &node_length) |
Alignment | merge_alignments (const vector< Alignment > &alns) |
Alignment & | extend_alignment (Alignment &a1, const Alignment &a2, bool debug) |
Alignment | merge_alignments (const Alignment &a1, const Alignment &a2, bool debug) |
void | translate_nodes (Alignment &a, const unordered_map< id_t, pair< id_t, bool > > &ids, const std::function< size_t(int64_t)> &node_length) |
void | flip_nodes (Alignment &a, const set< int64_t > &ids, const std::function< size_t(int64_t)> &node_length) |
int | non_match_start (const Alignment &alignment) |
int | non_match_end (const Alignment &alignment) |
int | softclip_start (const Alignment &alignment) |
int | softclip_end (const Alignment &alignment) |
int | softclip_trim (Alignment &alignment) |
int | query_overlap (const Alignment &aln1, const Alignment &aln2) |
int | edit_count (const Alignment &alignment) |
size_t | to_length_after_pos (const Alignment &aln, const Position &pos) |
size_t | from_length_after_pos (const Alignment &aln, const Position &pos) |
size_t | to_length_before_pos (const Alignment &aln, const Position &pos) |
size_t | from_length_before_pos (const Alignment &aln, const Position &pos) |
const string | hash_alignment (const Alignment &aln) |
Alignment | simplify (const Alignment &a, bool trim_internal_deletions) |
void | normalize_alignment (Alignment &alignment) |
Merge adjacent edits of the same type and convert all N matches to mismatches. More... | |
bool | uses_Us (const Alignment &alignment) |
void | convert_alignment_char (Alignment &alignment, char from, char to) |
void | convert_Us_to_Ts (Alignment &alignment) |
Replaces any U's in the sequence or the Path with T's. More... | |
void | convert_Ts_to_Us (Alignment &alignment) |
Replaces any T's in the sequence or the Path with U's. More... | |
map< id_t, int > | alignment_quality_per_node (const Alignment &aln) |
string | middle_signature (const Alignment &aln, int len) |
pair< string, string > | middle_signature (const Alignment &aln1, const Alignment &aln2, int len) |
string | signature (const Alignment &aln) |
pair< string, string > | signature (const Alignment &aln1, const Alignment &aln2) |
void | parse_bed_regions (istream &bedstream, const PathPositionHandleGraph *graph, vector< Alignment > *out_alignments) |
void | parse_bed_regions (istream &bedstream, const PathPositionHandleGraph *graph, const std::function< void(Alignment &)> &callback) |
void | parse_gff_regions (istream &gffstream, const PathPositionHandleGraph *graph, vector< Alignment > *out_alignments) |
void | parse_gff_regions (istream &gffstream, const PathPositionHandleGraph *graph, const std::function< void(Alignment &)> &callback) |
Position | alignment_start (const Alignment &aln) |
Position | alignment_end (const Alignment &aln) |
map< string,vector< pair< size_t, bool > > > | alignment_refpos_to_path_offsets (const Alignment &aln) |
return the path offsets as cached in the alignment More... | |
void | alignment_set_distance_to_correct (Alignment &aln, const Alignment &base, const unordered_map< string, string > *translation) |
void | alignment_set_distance_to_correct (Alignment &aln, const map< string, vector< pair< size_t, bool > > > &base_offsets, const unordered_map< string, string > *translation) |
AlignmentValidity | alignment_is_valid (const Alignment &aln, const HandleGraph *hgraph, bool check_sequence) |
Alignment | target_alignment (const PathPositionHandleGraph *graph, const path_handle_t &path, size_t pos1, size_t pos2, const string &feature, bool is_reverse, Mapping &cigar_mapping) |
Alignment | target_alignment (const PathPositionHandleGraph *graph, const path_handle_t &path, size_t pos1, size_t pos2, const string &feature, bool is_reverse) |
int | fastq_for_each (string &filename, function< void(Alignment &)> lambda) |
void | write_alignment_to_file (const Alignment &aln, const string &filename) |
void | mapping_cigar (const Mapping &mapping, vector< pair< int, char > > &cigar) |
void | cigar_mapping (const bam1_t *b, Mapping &mapping) |
void | append_cigar_operation (const int length, const char operation, vector< pair< int, char >> &cigar) |
void | alignment_set_distance_to_correct (Alignment &aln, const map< string, vector< pair< size_t, bool >>> &base_offsets, const unordered_map< string, string > *translation=nullptr) |
template<typename Annotated > | |
bool | has_annotation (const Annotated &annotated, const string &name) |
Returns true if the Protobuf object has an annotation with this name. More... | |
template<typename AnnotationType , typename Annotated > | |
AnnotationType | get_annotation (const Annotated &annotated, const string &name) |
template<typename AnnotationType , typename Annotated > | |
AnnotationType | get_annotation (Annotated *annotated, const string &name) |
template<typename AnnotationType , typename Annotated > | |
void | set_annotation (Annotated *annotated, const string &name, const AnnotationType &annotation) |
template<typename AnnotationType , typename Annotated > | |
void | set_annotation (Annotated &annotated, const string &name, const AnnotationType &annotation) |
template<typename Annotated > | |
void | clear_annotation (Annotated *annotated, const string &name) |
Clear the annotation with the given name. More... | |
template<typename Annotated > | |
void | clear_annotation (Annotated &annotated, const string &name) |
Clear the annotation with the given name. More... | |
template<typename Annotated > | |
void | for_each_basic_annotation (const Annotated &annotated, const function< void(const string &)> null_lambda, const function< void(const string &, double)> double_lambda, const function< void(const string &, bool)> bool_lambda, const function< void(const string &, const string &)> string_lambda) |
template<typename T > | |
T | value_cast (const google::protobuf::Value &value) |
Cast a Protobuf generic Value to any type. More... | |
template<typename T > | |
google::protobuf::Value | value_cast (const T &wrap) |
Cast any type to a generic Protobuf value. More... | |
template<> | |
bool | value_cast< bool > (const google::protobuf::Value &value) |
template<> | |
double | value_cast< double > (const google::protobuf::Value &value) |
template<> | |
string | value_cast< string > (const google::protobuf::Value &value) |
template<> | |
google::protobuf::Value | value_cast< bool > (const bool &wrap) |
template<> | |
google::protobuf::Value | value_cast< double > (const double &wrap) |
template<> | |
google::protobuf::Value | value_cast< string > (const string &wrap) |
template<> | |
google::protobuf::Value | value_cast< size_t > (const size_t &wrap) |
template<> | |
google::protobuf::Value | value_cast< int > (const int &wrap) |
template<typename Container > | |
Container | value_cast (const google::protobuf::Value &value) |
Cast a Protobuf generic Value to any type. More... | |
template<typename Container > | |
google::protobuf::Value | value_cast (const Container &wrap) |
void | augment (MutablePathMutableHandleGraph *graph, const string &gam_path, const string &aln_format, vector< Translation > *out_translations, const string &gam_out_path, bool embed_paths, bool break_at_ends, bool remove_softclips, bool filter_out_of_graph_alignments, double min_baseq, double min_mapq, Packer *packer, size_t min_bp_coverage, double max_frac_n, bool edges_only) |
void | augment (MutablePathMutableHandleGraph *graph, vector< Path > &path_vector, const string &aln_format, vector< Translation > *out_translations, const string &gam_out_path, bool embed_paths, bool break_at_ends, bool remove_softclips, bool filter_out_of_graph_alignments, double min_baseq, double min_mapq, Packer *packer, size_t min_bp_coverage, double max_frac_n, bool edges_only) |
void | augment_impl (MutablePathMutableHandleGraph *graph, function< void(function< void(Alignment &)>, bool, bool)> iterate_gam, const string &aln_format, vector< Translation > *out_translation, const string &gam_out_path, bool embed_paths, bool break_at_ends, bool remove_soft_clips, bool filter_out_of_graph_alignments, double min_baseq, double min_mapq, Packer *packer, size_t min_bp_coverage, double max_frac_n, bool edges_only) |
Generic version used to implement the above three methods. More... | |
double | get_avg_baseq (const Edit &edit, const string &base_quals, size_t position_in_read) |
void | find_breakpoints (const Path &path, unordered_map< id_t, set< pos_t >> &breakpoints, bool break_ends, const string &base_quals, double min_baseq, double max_frac_n) |
unordered_map< id_t, set< pos_t > > | forwardize_breakpoints (const HandleGraph *graph, const unordered_map< id_t, set< pos_t >> &breakpoints) |
Flips the breakpoints onto the forward strand. More... | |
void | find_packed_breakpoints (const Path &path, Packer &packed_breakpoints, bool break_ends=true, const string &base_quals="", double min_baseq=0, double max_frac_n=1.) |
Like "find_breakpoints", but store in packed structure (better for large gams and enables coverage filter) More... | |
unordered_map< id_t, set< pos_t > > | filter_breakpoints_by_coverage (const Packer &packed_breakpoints, size_t min_bp_coverage) |
path_handle_t | add_path_to_graph (MutablePathHandleGraph *graph, const Path &path) |
map< pos_t, id_t > | ensure_breakpoints (MutableHandleGraph *graph, const unordered_map< id_t, set< pos_t >> &breakpoints) |
bool | simplify_filtered_edits (HandleGraph *graph, Alignment &aln, Path &path, const map< pos_t, id_t > &node_translation, const unordered_map< id_t, size_t > &orig_node_sizes, double min_baseq, double max_frac_n) |
Path | add_nodes_and_edges (MutableHandleGraph *graph, const Path &path, const map< pos_t, id_t > &node_translation, unordered_map< pair< pos_t, string >, vector< id_t >> &added_seqs, unordered_map< id_t, Path > &added_nodes, const unordered_map< id_t, size_t > &orig_node_sizes, size_t max_node_size=1024) |
This version doesn't require a set of dangling sides to populate More... | |
Path | add_nodes_and_edges (MutableHandleGraph *graph, const Path &path, const map< pos_t, id_t > &node_translation, unordered_map< pair< pos_t, string >, vector< id_t >> &added_seqs, unordered_map< id_t, Path > &added_nodes, const unordered_map< id_t, size_t > &orig_node_sizes, set< NodeSide > &dangling, size_t max_node_size) |
vector< Translation > | make_translation (const HandleGraph *graph, const map< pos_t, id_t > &node_translation, const unordered_map< id_t, Path > &added_nodes, const unordered_map< id_t, size_t > &orig_node_sizes) |
Produce a graph Translation object from information about the editing process. More... | |
void | add_edges_only (MutableHandleGraph *graph, function< void(function< void(Alignment &)>, bool, bool)> iterate_gam, double min_mapq, size_t min_bp_coverage) |
ostream & | operator<< (ostream &out, const BenchmarkResult &result) |
void | benchmark_control () |
BenchmarkResult | run_benchmark (const string &name, size_t iterations, const function< void(void)> &under_test) |
BenchmarkResult | run_benchmark (const string &name, size_t iterations, const function< void(void)> &setup, const function< void(void)> &under_test) |
void | build_gcsa_lcp (const HandleGraph &graph, gcsa::GCSA *&gcsa, gcsa::LCPArray *&lcp, int kmer_size, size_t doubling_steps, size_t size_limit, const string &base_file_name) |
void * | mergeNodeObjects (void *a, void *b) |
void | getReachableBridges2 (stCactusEdgeEnd *edgeEnd1, stHash *bridgeEndsToBridgeNodes, stList *bridgeEnds) |
void | getReachableBridges (stCactusEdgeEnd *edgeEnd1, stList *bridgeEnds) |
void | addArbitraryTelomerePair (vector< stCactusEdgeEnd * > ends, stList *telomeres) |
pair< stCactusGraph *, stList * > | handle_graph_to_cactus (const PathHandleGraph &graph, const unordered_set< string > &hint_paths, bool single_component) |
VG | cactus_to_vg (stCactusGraph *cactus_graph) |
VG | cactusify (VG &graph) |
void | visit_contained_snarls (const PathPositionHandleGraph *graph, const vector< Region > ®ions, SnarlManager &snarl_manager, bool include_endpoints, function< void(const Snarl *, step_handle_t, step_handle_t, int64_t, int64_t, bool, const Region *)> visit_fn) |
void | delete_nodes_and_chop_paths (MutablePathMutableHandleGraph *graph, const unordered_set< nid_t > &nodes_to_delete, const unordered_set< edge_t > &edges_to_delete, int64_t min_fragment_len, unordered_map< string, size_t > *fragments_per_path) |
void | clip_contained_snarls (MutablePathMutableHandleGraph *graph, PathPositionHandleGraph *pp_graph, const vector< Region > ®ions, SnarlManager &snarl_manager, bool include_endpoints, int64_t min_fragment_len, size_t max_nodes, size_t max_edges, size_t max_nodes_shallow, size_t max_edges_shallow, double max_avg_degree, double max_reflen_prop, size_t max_reflen, bool out_bed, bool verbose) |
void | clip_low_depth_nodes_and_edges_generic (MutablePathMutableHandleGraph *graph, function< void(function< void(handle_t, const Region *)>)> iterate_handles, function< void(function< void(edge_t, const Region *)>)> iterate_edges, int64_t min_depth, const vector< string > &ref_prefixes, int64_t min_fragment_len, bool verbose) |
void | clip_low_depth_nodes_and_edges (MutablePathMutableHandleGraph *graph, int64_t min_depth, const vector< string > &ref_prefixes, int64_t min_fragment_len, bool verbose) |
void | clip_contained_low_depth_nodes_and_edges (MutablePathMutableHandleGraph *graph, PathPositionHandleGraph *pp_graph, const vector< Region > ®ions, SnarlManager &snarl_manager, bool include_endpoints, int64_t min_depth, int64_t min_fragment_len, bool verbose) |
void | clip_deletion_edges (MutablePathMutableHandleGraph *graph, int64_t max_deletion, int64_t context_steps, const vector< string > &ref_prefixes, int64_t min_fragment_len, bool verbose) |
void | clip_stubs_generic (MutablePathMutableHandleGraph *graph, function< void(function< void(handle_t, const Region *)>)> iterate_handles, function< bool(handle_t)> handle_in_range, const vector< string > &ref_prefixes, int64_t min_fragment_len, bool verbose) |
void | clip_stubs (MutablePathMutableHandleGraph *graph, const vector< string > &ref_prefixes, int64_t min_fragment_len, bool verbose) |
void | clip_contained_stubs (MutablePathMutableHandleGraph *graph, PathPositionHandleGraph *pp_graph, const vector< Region > ®ions, SnarlManager &snarl_manager, bool include_endpoints, int64_t min_fragment_len, bool verbose) |
void | stubbify_ref_paths (MutablePathMutableHandleGraph *graph, const vector< string > &ref_prefixes, int64_t min_fragment_len, bool verbose) |
vector< pair< gcsa::node_type, size_t > > | mem_node_start_positions (const HandleGraph &graph, const vg::MaximalExactMatch &mem) |
get the handles that a mem covers More... | |
bdsg::HashGraph | cluster_subgraph_containing (const HandleGraph &base, const Alignment &aln, const vector< vg::MaximalExactMatch > &cluster, const GSSWAligner *aligner) |
return a containing subgraph connecting the mems More... | |
bdsg::HashGraph | cluster_subgraph_walk (const HandleGraph &base, const Alignment &aln, const vector< vg::MaximalExactMatch > &mems, double expansion) |
template<typename T > | |
bool | convert (const std::string &s, T &r) |
template<typename T > | |
std::string | convert (const T &r) |
void | stacktrace_manually (ostream &out, int signalNumber, void *ip, void **bp) |
void | emit_stacktrace (int signalNumber, siginfo_t *signalInfo, void *signalContext) |
Emit a stack trace when something bad happens. Add as a signal handler with sigaction. More... | |
void | enable_crash_handling () |
Main should call this to turn on our stack tracing support. More... | |
void | set_crash_context (const std::string &message) |
User code should call this when it has context for a failure in its thread. More... | |
void | clear_crash_context () |
User code should call this when it wants to clear context for a failure in its thread. More... | |
void | with_exception_handling (const std::function< void(void)> &body) |
User code should call this to get all its exceptions handled. More... | |
void | report_exception (const std::exception &ex) |
void | crash_unless_impl (bool condition, const std::string &condition_string, const std::string &file, int line, const std::string &function) |
crash_unless calls into this function for a real implementation. More... | |
double | entropy (const string &st) |
double | entropy (const char *st, size_t len) |
std::ostream & | operator<< (std::ostream &out, const Funnel::State &state) |
template<class Element > | |
void | in_place_subvector (std::vector< Element > &vec, size_t head, size_t tail) |
void | set_score (GaplessExtension &extension, const Aligner *aligner) |
void | match_initial (GaplessExtension &match, const std::string &seq, gbwtgraph::view_type target) |
size_t | match_forward (GaplessExtension &match, const std::string &seq, gbwtgraph::view_type target, uint32_t mismatch_limit) |
void | match_backward (GaplessExtension &match, const std::string &seq, gbwtgraph::view_type target, uint32_t mismatch_limit) |
void | handle_full_length (const HandleGraph &graph, std::vector< GaplessExtension > &result, double overlap_threshold) |
void | remove_duplicates (std::vector< GaplessExtension > &result) |
void | find_mismatches (const std::string &seq, const gbwtgraph::CachedGBWTGraph &graph, std::vector< GaplessExtension > &result) |
size_t | interval_length (std::pair< size_t, size_t > interval) |
std::vector< handle_t > | get_path (const std::vector< handle_t > &first, handle_t second) |
std::vector< handle_t > | get_path (handle_t first, const std::vector< handle_t > &second) |
std::vector< handle_t > | get_path (const std::vector< handle_t > &first, gbwt::node_type second) |
std::vector< handle_t > | get_path (gbwt::node_type reverse_first, const std::vector< handle_t > &second) |
bool | trim_mismatches (GaplessExtension &extension, const gbwtgraph::CachedGBWTGraph &graph, const Aligner &aligner) |
std::ostream & | operator<< (std::ostream &out, const WFAAlignment::Edit &edit) |
Allow printing an Edit. More... | |
std::vector< std::string > | parseGenotypes (const std::string &vcf_line, size_t num_samples) |
gbwt::vector_type | extract_as_gbwt_path (const PathHandleGraph &graph, const std::string &path_name) |
Extract a path as a GBWT path. If the path does not exist, it is treated as empty. More... | |
gbwt::vector_type | path_predecessors (const PathHandleGraph &graph, const std::string &path_name) |
gbwt::size_type | gbwt_node_width (const HandleGraph &graph) |
Determine the node width in bits for the GBWT nodes based on the given graph. More... | |
void | load_gbwt (gbwt::GBWT &index, const std::string &filename, bool show_progress=false) |
Load a compressed GBWT from the file. More... | |
void | load_gbwt (gbwt::DynamicGBWT &index, const std::string &filename, bool show_progress=false) |
Load a dynamic GBWT from the file. More... | |
void | load_r_index (gbwt::FastLocate &index, const std::string &filename, bool show_progress=false) |
Load an r-index from the file. More... | |
void | save_gbwt (const gbwt::GBWT &index, const std::string &filename, bool show_progress=false) |
Save a compressed GBWT to the file. More... | |
void | save_gbwt (const gbwt::DynamicGBWT &index, const std::string &filename, bool show_progress=false) |
Save a dynamic GBWT to the file. More... | |
void | save_r_index (const gbwt::FastLocate &index, const std::string &filename, bool show_progress=false) |
Save an r-index to the file. More... | |
std::vector< std::vector< gbwt::size_type > > | partition_gbwt_sequences (const gbwt::GBWT &gbwt_index, const std::unordered_map< nid_t, size_t > &node_to_job, size_t num_jobs) |
gbwt::GBWT | rebuild_gbwt_job (const gbwt::GBWT &gbwt_index, const RebuildJob &job, size_t job_id, const std::vector< gbwt::size_type > &sequences, const RebuildParameters ¶meters) |
void | copy_metadata (const gbwt::GBWT &source, gbwt::GBWT &target, const std::vector< std::vector< gbwt::size_type >> &jobs, const std::vector< size_t > &job_order) |
gbwt::GBWT | rebuild_gbwt (const gbwt::GBWT &gbwt_index, const std::vector< RebuildJob > &jobs, const std::unordered_map< nid_t, size_t > &node_to_job, const RebuildParameters ¶meters) |
gbwt::GBWT | rebuild_gbwt (const gbwt::GBWT &gbwt_index, const std::vector< RebuildJob::mapping_type > &mappings) |
As the general rebuild_gbwt , but always using a single job with default parameters. More... | |
std::vector< gbwt::size_type > | threads_for_sample (const gbwt::GBWT &gbwt_index, const std::string &sample_name) |
Return the list of thread ids / gbwt path ids for the given sample. More... | |
std::vector< gbwt::size_type > | threads_for_contig (const gbwt::GBWT &gbwt_index, const std::string &contig_name) |
Return the list of thread ids / gbwt path ids for the given contig. More... | |
std::string | insert_gbwt_path (MutablePathHandleGraph &graph, const gbwt::GBWT &gbwt_index, gbwt::size_type id, std::string path_name) |
Path | extract_gbwt_path (const HandleGraph &graph, const gbwt::GBWT &gbwt_index, gbwt::size_type id) |
std::string | compose_short_path_name (const gbwt::GBWT &gbwt_index, gbwt::size_type id) |
void | copy_reference_samples (const gbwt::GBWT &source, gbwt::GBWT &destination) |
Copies the reference sample tag from the source GBWT index to the destination GBWT index. More... | |
void | copy_reference_samples (const PathHandleGraph &source, gbwt::GBWT &destination) |
gbwt::GBWT | get_gbwt (const std::vector< gbwt::vector_type > &paths) |
Transform the paths into a GBWT index. Primarily for testing. More... | |
unordered_map< string, vector< nid_t > > | load_translation_map (ifstream &input_stream) |
unordered_map< nid_t, pair< string, size_t > > | load_translation_back_map (HandleGraph &graph, ifstream &input_stream) |
handle_t | gbwt_to_handle (const HandleGraph &graph, gbwt::node_type node) |
Convert gbwt::node_type to handle_t. More... | |
pos_t | gbwt_to_pos (gbwt::node_type node, size_t offset) |
Convert gbwt::node_type and an offset as size_t to pos_t. More... | |
gbwt::node_type | handle_to_gbwt (const HandleGraph &graph, handle_t handle) |
Convert handle_t to gbwt::node_type. More... | |
gbwt::node_type | pos_to_gbwt (pos_t pos) |
Extract gbwt::node_type from pos_t. More... | |
gbwt::node_type | mapping_to_gbwt (const Mapping &mapping) |
Convert Mapping to gbwt::node_type. More... | |
gbwt::vector_type | path_to_gbwt (const Path &path) |
Convert Path to a GBWT path. More... | |
gbwtgraph::GFAParsingParameters | get_best_gbwtgraph_gfa_parsing_parameters () |
void | load_gbwtgraph (gbwtgraph::GBWTGraph &graph, const std::string &filename, bool show_progress) |
void | load_gbz (gbwtgraph::GBZ &gbz, const std::string &filename, bool show_progress=false) |
Load GBZ from the file. More... | |
void | load_gbz (gbwt::GBWT &index, gbwtgraph::GBWTGraph &graph, const std::string &filename, bool show_progress=false) |
Load GBWT and GBWTGraph from the GBZ file. More... | |
void | load_gbz (gbwtgraph::GBZ &gbz, const std::string &gbwt_name, const std::string &graph_name, bool show_progress=false) |
Load GBZ from separate GBWT / GBWTGraph files. More... | |
void | load_minimizer (gbwtgraph::DefaultMinimizerIndex &index, const std::string &filename, bool show_progress=false) |
Load a minimizer index from the file. More... | |
void | save_gbwtgraph (const gbwtgraph::GBWTGraph &graph, const std::string &filename, bool show_progress=false) |
Save GBWTGraph to the file. More... | |
void | save_gbz (const gbwtgraph::GBZ &gbz, const std::string &filename, bool show_progress=false) |
Save GBZ to the file. More... | |
void | save_gbz (const gbwt::GBWT &index, gbwtgraph::GBWTGraph &graph, const std::string &filename, bool show_progress=false) |
Save GBWT and GBWTGraph to the GBZ file. More... | |
void | save_gbz (const gbwtgraph::GBZ &gbz, const std::string &gbwt_name, const std::string &graph_name, bool show_progress=false) |
Save GBZ to separate GBWT / GBWTGraph files. More... | |
void | save_minimizer (const gbwtgraph::DefaultMinimizerIndex &index, const std::string &filename, bool show_progress=false) |
Save a minimizer index to the file. More... | |
unordered_map< string, vector< nid_t > > | load_translation_map (const gbwtgraph::GBWTGraph &graph) |
Return a mapping of the original segment ids to a list of chopped node ids. More... | |
unordered_map< nid_t, pair< string, size_t > > | load_translation_back_map (const gbwtgraph::GBWTGraph &graph) |
Return a backwards mapping of chopped node to original segment position (id,offset pair) More... | |
std::string | to_string_gbwtgraph (handle_t handle) |
Returns a string representation of a GBWTGraph handle. More... | |
std::string | to_string_gbwtgraph (gbwt::node_type node) |
Returns a string representation of a GBWTGraph node. More... | |
handle_t | empty_gbwtgraph_handle () |
Returns an empty GBWTGraph handle corresponding to the GBWT endmarker. More... | |
void | load_gcsa (gcsa::GCSA &index, const std::string &filename, bool show_progress=false) |
Load GCSA from the file. More... | |
void | load_lcp (gcsa::LCPArray &lcp, const std::string &filename, bool show_progress=false) |
Load LCP array from the file. More... | |
void | save_gcsa (const gcsa::GCSA &index, const std::string &filename, bool show_progress=false) |
Save GCSA to the file. More... | |
void | save_lcp (const gcsa::LCPArray &lcp, const std::string &filename, bool show_progress=false) |
Save LCP array to the file. More... | |
SnarlTraversal | get_traversal_of_snarl (VG &graph, const Snarl *snarl, const SnarlManager &manager, const Path &path) |
string | traversal_to_string (VG &graph, const SnarlTraversal &path) |
Support | make_support (double forward, double reverse, double quality) |
double | total (const Support &support) |
Support | support_min (const Support &a, const Support &b) |
Support | support_max (const Support &a, const Support &b) |
Support | flip (const Support &to_flip) |
Support | operator+ (const Support &one, const Support &other) |
Support & | operator+= (Support &one, const Support &other) |
bool | operator< (const Support &a, const Support &b) |
bool | operator> (const Support &a, const Support &b) |
ostream & | operator<< (ostream &stream, const Support &support) |
string | to_vcf_genotype (const Genotype >) |
template<typename Scalar > | |
Support | operator* (const Support &support, const Scalar &scale) |
template<typename Scalar > | |
Support & | operator*= (Support &support, const Scalar &scale) |
template<typename Scalar > | |
Support | operator* (const Scalar &scale, const Support &support) |
template<typename Scalar > | |
Support | operator/ (const Support &support, const Scalar &scale) |
template<typename Scalar > | |
Support & | operator/= (Support &support, const Scalar &scale) |
string | allele_to_string (VG &graph, const Path &allele) |
template<typename T > | |
void | set_intersection (const unordered_set< T > &set_1, const unordered_set< T > &set_2, unordered_set< T > *out_intersection) |
void | graph_to_gfa (const PathHandleGraph *graph, ostream &out, const set< string > &rgfa_paths, bool rgfa_pline, bool use_w_lines) |
void | sort_by_id_dedup_and_clean (Graph &graph) |
remove duplicates and sort by id More... | |
void | remove_duplicates (Graph &graph) |
remove duplicate nodes and edges More... | |
void | remove_duplicate_edges (Graph &graph) |
remove duplicate edges More... | |
void | remove_duplicate_nodes (Graph &graph) |
remove duplicate nodes More... | |
void | remove_orphan_edges (Graph &graph) |
remove edges that link to a node that is not in the graph More... | |
void | sort_by_id (Graph &graph) |
order the nodes and edges in the graph by id More... | |
void | sort_nodes_by_id (Graph &graph) |
order the nodes in the graph by id More... | |
void | sort_edges_by_id (Graph &graph) |
order the edges in the graph by id pairs More... | |
bool | is_id_sortable (const Graph &graph) |
returns true if the graph is id-sortable (no reverse links) More... | |
bool | has_inversion (const Graph &graph) |
returns true if we find an edge that may specify an inversion More... | |
void | flip_doubly_reversed_edges (Graph &graph) |
clean up doubly-reversed edges More... | |
void | from_handle_graph (const HandleGraph &from, Graph &to) |
void | from_path_handle_graph (const PathHandleGraph &from, Graph &to) |
void | trace_haplotypes_and_paths (const PathHandleGraph &source, const gbwt::GBWT &haplotype_database, vg::id_t start_node, int extend_distance, Graph &out_graph, map< string, int > &out_thread_frequencies, bool expand_graph) |
void | output_haplotype_counts (ostream &annotation_ostream, vector< pair< thread_t, int >> &haplotype_list) |
Graph | output_graph_with_embedded_paths (vector< pair< thread_t, int >> &haplotype_list, const HandleGraph &source) |
void | output_graph_with_embedded_paths (ostream &subgraph_ostream, vector< pair< thread_t, int >> &haplotype_list, const HandleGraph &source, bool json) |
void | thread_to_graph_spanned (thread_t &t, Graph &g, const HandleGraph &source) |
void | add_thread_nodes_to_set (thread_t &t, set< int64_t > &nodes) |
void | add_thread_edges_to_set (thread_t &t, set< pair< int, int > > &edges) |
void | construct_graph_from_nodes_and_edges (Graph &g, const HandleGraph &source, set< int64_t > &nodes, set< pair< int, int > > &edges) |
Path | path_from_thread_t (thread_t &t, const HandleGraph &source) |
vector< pair< vector< gbwt::node_type >, gbwt::SearchState > > | list_haplotypes (const HandleGraph &graph, const gbwt::GBWT &gbwt, handle_t start, function< bool(const vector< gbwt::node_type > &)> stop_fn) |
unique_ptr< AlignmentEmitter > | get_alignment_emitter (const string &filename, const string &format, const vector< tuple< path_handle_t, size_t, size_t >> &paths, size_t max_threads, const HandleGraph *graph, int flags) |
pair< vector< pair< string, int64_t > >, unordered_map< string, int64_t > > | extract_path_metadata (const vector< tuple< path_handle_t, size_t, size_t >> &paths, const PathPositionHandleGraph &graph, bool subpath_support) |
vector< tuple< path_handle_t, size_t, size_t > > | get_sequence_dictionary (const string &filename, const vector< string > &path_names, const PathPositionHandleGraph &graph) |
void | copy_file (const string &from_fp, const string &to_fp) |
int64_t | get_file_size (const string &filename) |
bool | is_gzipped (const string &filename) |
int64_t | get_num_samples (const string &vcf_filename) |
double | approx_num_vars (const string &vcf_filename) |
double | format_multiplier () |
int64_t | approx_graph_memory (const vector< string > &fasta_filenames, const vector< string > &vcf_filenames) |
vector< int64_t > | each_approx_graph_memory (const vector< string > &fasta_filenames, const vector< string > &vcf_filenames) |
int64_t | approx_graph_memory (const string &fasta_filename, const string &vcf_filename) |
int64_t | approx_graph_memory (const string &gfa_filename) |
int64_t | approx_gbwt_memory (const string &vcf_filename) |
int64_t | approx_graph_load_memory (const string &graph_filename) |
bool | transcript_file_nonempty (const string &transcripts) |
vector< string > | vcf_contigs (const string &filename) |
size_t | guess_parallel_gbwt_jobs (size_t node_count, size_t haplotype_count, size_t available_memory, size_t batch_size) |
size_t | xg_index_size (const xg::XG &index) |
int | execute_in_fork (const function< void(void)> &exec) |
bool | kff_is_trivial (const uint8_t *encoding) |
Returns true if the encoding is trivial (0, 1, 2, 3). More... | |
std::string | kff_invert (const uint8_t *encoding) |
Inverts the KFF encoding into a packed -> char table. More... | |
kff_recoding_t | kff_recoding (const uint8_t *encoding) |
Returns a recoding for the given encoding. More... | |
uint64_t | kff_parse (const uint8_t *data, size_t bytes) |
Parses a big-endian integer from KFF data. More... | |
uint8_t | kff_encode (const std::string &kmer, size_t start, size_t limit, const uint8_t *encoding) |
std::vector< uint8_t > | kff_encode (const std::string &kmer, const uint8_t *encoding) |
void | kff_decode (uint8_t byte, size_t chars, const std::string &decoding, std::string &output) |
std::string | kff_decode (const uint8_t *kmer, size_t k, const std::string &decoding) |
Decodes a kmer in KFF format according to the given encoding. More... | |
uint8_t | kff_recode (gbwtgraph::Key64::value_type kmer, size_t k, size_t chars, const uint8_t *encoding) |
std::vector< uint8_t > | kff_recode (gbwtgraph::Key64::value_type kmer, size_t k, const uint8_t *encoding) |
Recodes a kmer from a minimizer index in KFF format according to the given encoding. More... | |
gbwtgraph::Key64::value_type | kff_recode (const uint8_t *kmer, size_t k, kff_recoding_t recoding) |
gbwtgraph::Key64::value_type | kff_recode_trivial (const uint8_t *kmer, size_t k, size_t bytes) |
std::vector< gbwtgraph::Key64::value_type > | kff_recode (const uint8_t *kmers, size_t n, size_t k, kff_recoding_t recoding) |
uint8_t | kff_get (const uint8_t *kmer, size_t i) |
void | kff_set (std::vector< uint8_t > &kmer, size_t i, uint8_t value) |
std::vector< uint8_t > | kff_reverse_complement (const uint8_t *kmer, size_t k, const uint8_t *encoding) |
Returns the reverse complement of a KFF kmer. More... | |
size_t | kff_bytes (size_t k) |
Returns the number of bytes required for a kmer in KFF format. More... | |
gbwtgraph::Key64::value_type | minimizer_reverse_complement (gbwtgraph::Key64::value_type kmer, size_t k) |
Returns the reverse complement of a minimizer index kmer. More... | |
void | for_each_kmer (const HandleGraph &graph, size_t k, const function< void(const kmer_t &)> &lambda, id_t head_id, id_t tail_id, atomic< int > *stop_flag) |
ostream & | operator<< (ostream &out, const kmer_t &kmer) |
Print a kmer_t to a stream. More... | |
void | kmer_to_gcsa_kmers (const kmer_t &kmer, const gcsa::Alphabet &alpha, const function< void(const gcsa::KMer &)> &lambda) |
Convert the kmer_t to a set of gcsa2 binary kmers which are exposed via a callback. More... | |
gcsa::byte_type | encode_chars (const vector< char > &chars, const gcsa::Alphabet &alpha) |
Encode the chars into the gcsa2 byte. More... | |
void | write_gcsa_kmers (const HandleGraph &graph, int kmer_size, ostream &out, size_t &size_limit, id_t head_id, id_t tail_id) |
string | write_gcsa_kmers_to_tmpfile (const HandleGraph &graph, int kmer_size, size_t &size_limit, id_t head_id, id_t tail_id, const string &base_file_name) |
vector< size_t > | make_prefix_suffix_table (const char *pattern, size_t len) |
size_t | kmp_search (const char *text, size_t text_len, const char *pattern, size_t pattern_len, const vector< size_t > &prefix_suffix_table) |
int | sub_overlaps_of_first_aln (const vector< Alignment > &alns, float overlap_fraction) |
set< pos_t > | gcsa_nodes_to_positions (const vector< gcsa::node_type > &nodes) |
const int | balanced_stride (int read_length, int kmer_size, int stride) |
const vector< string > | balanced_kmers (const string &seq, const int kmer_size, const int stride) |
pair< int64_t, int64_t > | mem_min_oriented_distances (const MaximalExactMatch &m1, const MaximalExactMatch &m2) |
bool | operator== (const MaximalExactMatch &m1, const MaximalExactMatch &m2) |
bool | operator< (const MaximalExactMatch &m1, const MaximalExactMatch &m2) |
ostream & | operator<< (ostream &out, const MaximalExactMatch &mem) |
const string | mems_to_json (const vector< MaximalExactMatch > &mems) |
vector< string::const_iterator > | cluster_cover (const vector< MaximalExactMatch > &cluster) |
int | cluster_coverage (const vector< MaximalExactMatch > &cluster) |
bool | mems_overlap (const MaximalExactMatch &mem1, const MaximalExactMatch &mem2) |
int | mems_overlap_length (const MaximalExactMatch &mem1, const MaximalExactMatch &mem2) |
bool | clusters_overlap_in_read (const vector< MaximalExactMatch > &cluster1, const vector< MaximalExactMatch > &cluster2) |
int | clusters_overlap_length (const vector< MaximalExactMatch > &cluster1, const vector< MaximalExactMatch > &cluster2) |
vector< pos_t > | cluster_nodes (const vector< MaximalExactMatch > &cluster) |
bool | clusters_overlap_in_graph (const vector< MaximalExactMatch > &cluster1, const vector< MaximalExactMatch > &cluster2) |
vector< MaximalExactMatch > | translate_mems (const vector< MaximalExactMatch > &mems, const unordered_map< id_t, pair< id_t, bool > > &trans) |
string | get_proc_status_value (const string &name) |
Get the string value for a field in /proc/self/status by name, or "" if unsupported or not found. More... | |
size_t | get_max_rss_kb () |
Get the max RSS usage ever, in kb, or 0 if unsupported. More... | |
size_t | get_max_vmem_kb () |
Get the max virtual memory size ever, in kb, or 0 if unsupported. More... | |
size_t | get_current_vmem_kb () |
Get the current virtual memory size, in kb, or 0 if unsupported. More... | |
vector< size_t > | subpath_topological_order (const multipath_alignment_t &multipath_aln, bool do_index) |
Return either the vector of topological order by index or the vector of indexes within the topological order. More... | |
void | topologically_order_subpaths (multipath_alignment_t &multipath_aln) |
Put subpaths in topological order (assumed to be true for other algorithms) More... | |
void | remove_empty_alignment_sections (multipath_alignment_t &multipath_aln) |
void | identify_start_subpaths (multipath_alignment_t &multipath_aln) |
void | clear_alignment (multipath_alignment_t &multipath_aln) |
Clear all of the field associated with the alignment. More... | |
tuple< MultipathProblem, int64_t, int32_t > | run_multipath_dp (const multipath_alignment_t &multipath_aln, bool subpath_global=false, bool forward=true) |
template<typename TracebackIterator > | |
void | populate_path_from_traceback (const multipath_alignment_t &multipath_aln, const MultipathProblem &problem, TracebackIterator traceback_start, TracebackIterator traceback_end, Path *output) |
int32_t | optimal_alignment_internal (const multipath_alignment_t &multipath_aln, Alignment *aln_out, bool subpath_global) |
void | optimal_alignment (const multipath_alignment_t &multipath_aln, Alignment &aln_out, bool subpath_global) |
int32_t | optimal_alignment_score (const multipath_alignment_t &multipath_aln, bool subpath_global) |
int32_t | worst_alignment_score (const multipath_alignment_t &multipath_aln) |
void | remove_low_scoring_sections (multipath_alignment_t &multipath_aln, int32_t max_score_diff) |
vector< Alignment > | optimal_alignments (const multipath_alignment_t &multipath_aln, size_t count) |
vector< Alignment > | optimal_alignments_with_disjoint_subpaths (const multipath_alignment_t &multipath_aln, size_t count) |
vector< Alignment > | haplotype_consistent_alignments (const multipath_alignment_t &multipath_aln, const haplo::ScoreProvider &score_provider, size_t soft_count, size_t hard_count, bool optimal_first) |
pair< int64_t, int64_t > | aligned_interval (const multipath_alignment_t &multipath_aln) |
The indexes on the read sequence of the portion of the read that is aligned outside of soft clips. More... | |
void | rev_comp_subpath (const subpath_t &subpath, const function< int64_t(int64_t)> &node_length, subpath_t &rev_comp_out) |
void | rev_comp_multipath_alignment (const multipath_alignment_t &multipath_aln, const function< int64_t(int64_t)> &node_length, multipath_alignment_t &rev_comp_out) |
void | rev_comp_multipath_alignment_in_place (multipath_alignment_t *multipath_aln, const function< int64_t(int64_t)> &node_length) |
void | convert_multipath_alignment_char (multipath_alignment_t &multipath_aln, char from, char to) |
void | convert_Us_to_Ts (multipath_alignment_t &multipath_aln) |
Replaces all U's in the sequence and the aligned Paths with T's. More... | |
void | convert_Ts_to_Us (multipath_alignment_t &multipath_aln) |
Replaces all T's in the sequence and the aligned Paths with U's. More... | |
template<class ProtoAlignment > | |
void | transfer_from_proto_annotation (const ProtoAlignment &from, multipath_alignment_t &to) |
template<class ProtoAlignment > | |
void | transfer_to_proto_annotation (const multipath_alignment_t &from, ProtoAlignment &to) |
template<class ProtoAlignment1 , class ProtoAlignment2 > | |
void | transfer_between_proto_annotation (const ProtoAlignment1 &from, ProtoAlignment2 &to) |
template<class Alignment1 , class Alignment2 > | |
void | transfer_uniform_metadata (const Alignment1 &from, Alignment2 &to) |
void | to_proto_multipath_alignment (const multipath_alignment_t &multipath_aln, MultipathAlignment &proto_multipath_aln_out) |
Convert an STL-based multipath_alignment_t to a protobuf MultipathAlignment. More... | |
void | from_proto_multipath_alignment (const MultipathAlignment &proto_multipath_aln, multipath_alignment_t &multipath_aln_out) |
Convert a protobuf MultipathAlignment to an STL-based multipath_alignment_t. More... | |
void | to_multipath_alignment (const Alignment &aln, multipath_alignment_t &multipath_aln_out) |
void | transfer_read_metadata (const MultipathAlignment &from, multipath_alignment_t &to) |
void | transfer_read_metadata (const multipath_alignment_t &from, MultipathAlignment &to) |
void | transfer_read_metadata (const multipath_alignment_t &from, multipath_alignment_t &to) |
void | transfer_read_metadata (const Alignment &from, multipath_alignment_t &to) |
void | transfer_read_metadata (const multipath_alignment_t &from, Alignment &to) |
void | transfer_read_metadata (const Alignment &from, Alignment &to) |
void | transfer_proto_metadata (const Alignment &from, MultipathAlignment &to) |
void | transfer_proto_metadata (const MultipathAlignment &from, Alignment &to) |
void | merge_non_branching_subpaths (multipath_alignment_t &multipath_aln, const unordered_set< size_t > *prohibited_merges) |
void | connected_comps_do (const multipath_alignment_t &multipath_aln, function< void(void)> &on_new_component, function< void(size_t)> &on_new_node) |
size_t | num_connected_components (const multipath_alignment_t &multipath_aln) |
Returns the number of connected components in the multipath alignment. More... | |
vector< vector< int64_t > > | connected_components (const multipath_alignment_t &multipath_aln) |
void | extract_sub_multipath_alignment (const multipath_alignment_t &multipath_aln, const vector< int64_t > &subpath_indexes, multipath_alignment_t &sub_multipath_aln) |
void | append_multipath_alignment (multipath_alignment_t &multipath_aln, const multipath_alignment_t &to_append) |
Add the subpaths of one multipath alignment onto another. More... | |
bool | contains_connection (const multipath_alignment_t &multipath_aln) |
Returns true if any subpath has a connection adjacency. More... | |
vector< tuple< int64_t, int64_t, int64_t, int64_t > > | search_multipath_alignment (const multipath_alignment_t &multipath_aln, const pos_t &graph_pos, int64_t seq_pos) |
pair< tuple< int64_t, int64_t, int64_t >, vector< tuple< int64_t, int64_t, int64_t, int64_t > > > | trace_path (const multipath_alignment_t &multipath_aln, const Path &path, int64_t subpath_idx, int64_t mapping_idx, int64_t edit_idx, int64_t base_idx, bool search_left, int64_t search_limit) |
bool | contains_match (const multipath_alignment_t &multipath_aln, const pos_t &pos, int64_t read_pos, int64_t match_length) |
vector< pair< int, char > > | cigar_against_path (const multipath_alignment_t &multipath_aln, const string &path_name, bool rev, int64_t path_pos, const PathPositionHandleGraph &graph, int64_t min_splice_length) |
bool | validate_multipath_alignment (const multipath_alignment_t &multipath_aln, const HandleGraph &handle_graph) |
void | view_multipath_alignment (ostream &out, const multipath_alignment_t &multipath_aln, const HandleGraph &handle_graph) |
Send a formatted string representation of the multipath_alignment_t into the ostream. More... | |
void | view_multipath_alignment_as_dot (ostream &out, const multipath_alignment_t &multipath_aln, bool show_graph=false) |
Converts a multipath_alignment_t to a GraphViz Dot representation, output to the given ostream. More... | |
string | debug_string (const connection_t &connection) |
string | debug_string (const subpath_t &subpath) |
string | debug_string (const multipath_alignment_t &multipath_aln) |
string | make_shuffle_seed (const multipath_alignment_t &aln) |
Define seed generation for shuffling multipath alignments. More... | |
NodeSide | node_start (id_t id) |
Produce the start NodeSide of a Node. More... | |
NodeSide | node_end (id_t id) |
Produce the end NodeSide of a Node. More... | |
ostream & | operator<< (ostream &out, const NodeSide &nodeside) |
Print a NodeSide to a stream. More... | |
ostream & | operator<< (ostream &out, const NodeTraversal &nodetraversal) |
Print the given NodeTraversal. More... | |
ostream & | operator<< (ostream &out, mapping_t mapping) |
Allow a mapping_t to be printed, for debugging purposes. More... | |
Path & | append_path (Path &a, const Path &b) |
int | path_to_length (const Path &path) |
int | path_from_length (const Path &path) |
int | mapping_to_length (const Mapping &m) |
int | mapping_from_length (const Mapping &m) |
int | softclip_start (const Mapping &mapping) |
int | softclip_end (const Mapping &mapping) |
Position | first_path_position (const Path &path) |
Position | last_path_position (const Path &path) |
int | to_length (const Mapping &m) |
int | from_length (const Mapping &m) |
Path & | extend_path (Path &path1, const Path &path2) |
Path | concat_paths (const Path &path1, const Path &path2) |
Path | simplify (const Path &p, bool trim_internal_deletions) |
Mapping | concat_mappings (const Mapping &m, const Mapping &n, bool trim_internal_deletions) |
Mapping | simplify (const Mapping &m, bool trim_internal_deletions) |
bool | edits_are_compatible (const Edit &e, const Edit &f) |
Return true if two edits could be combined into one (assuming adjacency). More... | |
void | merge_edits_in_place (Edit &e, const Edit &f) |
Glom the second edit into the first, assuming adjacency. More... | |
Mapping | merge_adjacent_edits (const Mapping &m) |
Merge adjacent edits of the same type. More... | |
Path | trim_hanging_ends (const Path &p) |
bool | mappings_equivalent (const Mapping &m1, const Mapping &m2) |
bool | mapping_ends_in_deletion (const Mapping &m) |
bool | mapping_starts_in_deletion (const Mapping &m) |
bool | mapping_is_total_deletion (const Mapping &m) |
bool | mapping_is_total_insertion (const Mapping &m) |
bool | mapping_is_simple_match (const Mapping &m) |
bool | path_is_simple_match (const Path &p) |
const string | mapping_sequence (const Mapping &mp, const string &node_seq) |
const string | mapping_sequence (const Mapping &mp, const Node &n) |
string | path_sequence (const HandleGraph &graph, const Path &path) |
Mapping | reverse_complement_mapping (const Mapping &m, const function< int64_t(id_t)> &node_length) |
void | reverse_complement_mapping_in_place (Mapping *m, const function< int64_t(id_t)> &node_length) |
Path | reverse_complement_path (const Path &path, const function< int64_t(id_t)> &node_length) |
void | reverse_complement_path_in_place (Path *path, const function< int64_t(id_t)> &node_length) |
pair< Mapping, Mapping > | cut_mapping (const Mapping &m, const Position &pos) |
pair< mapping_t, mapping_t > | cut_mapping (const mapping_t &m, const Position &pos) |
pair< Mapping, Mapping > | cut_mapping_offset (const Mapping &m, size_t offset) |
pair< mapping_t, mapping_t > | cut_mapping_offset (const mapping_t &m, size_t offset) |
pair< Mapping, Mapping > | cut_mapping (const Mapping &m, size_t offset) |
pair< mapping_t, mapping_t > | cut_mapping (const mapping_t &m, size_t offset) |
pair< Path, Path > | cut_path (const Path &path, const Position &pos) |
pair< Path, Path > | cut_path (const Path &path, size_t offset) |
bool | maps_to_node (const Path &p, id_t id) |
Position | path_start_position (const Path &path) |
string | path_to_string (Path p) |
Position | path_end_position (const Path &path) |
bool | adjacent_mappings (const Mapping &m1, const Mapping &m2) |
bool | mapping_is_match (const Mapping &m) |
double | divergence (const Mapping &m) |
double | identity (const Path &path) |
void | decompose (const Path &path, map< pos_t, int > &ref_positions, map< pos_t, Edit > &edits) |
double | overlap (const Path &p1, const Path &p2) |
void | translate_node_ids (Path &path, const unordered_map< id_t, id_t > &translator) |
Switches the node ids in the path to the ones indicated by the translator. More... | |
void | translate_node_ids (Path &path, const unordered_map< id_t, id_t > &translator, id_t cut_node, size_t bases_removed, bool from_right) |
void | translate_oriented_node_ids (Path &path, const unordered_map< id_t, pair< id_t, bool >> &translator) |
Switches the node ids and orientations in the path to the ones indicated by the translator. More... | |
void | translate_oriented_node_ids (Path &path, const function< pair< id_t, bool >(id_t)> &translator) |
Switches node ids and orientations in the path to the ones indicated by the translator. More... | |
void | translate_node_ids (path_t &path, const unordered_map< id_t, id_t > &translator) |
void | translate_oriented_node_ids (path_t &path, const unordered_map< id_t, pair< id_t, bool >> &translator) |
void | translate_oriented_node_ids (path_t &path, const function< pair< id_t, bool >(id_t)> &translator) |
pos_t | initial_position (const Path &path) |
pos_t | final_position (const Path &path) |
Path | path_from_node_traversals (const list< NodeTraversal > &traversals) |
void | remove_paths (Graph &graph, const function< bool(const string &)> &paths_to_take, std::list< Path > *matching) |
Path | path_from_path_handle (const PathHandleGraph &graph, path_handle_t path_handle) |
Alignment | alignment_from_path (const HandleGraph &graph, const Path &path) |
void | from_proto_edit (const Edit &proto_edit, edit_t &edit) |
void | to_proto_edit (const edit_t &edit, Edit &proto_edit) |
void | from_proto_mapping (const Mapping &proto_mapping, path_mapping_t &mapping) |
void | to_proto_mapping (const path_mapping_t &mapping, Mapping &proto_mapping) |
void | from_proto_path (const Path &proto_path, path_t &path) |
void | to_proto_path (const path_t &path, Path &proto_path) |
int | mapping_from_length (const path_mapping_t &mapping) |
int | path_from_length (const path_t &path) |
int | mapping_to_length (const path_mapping_t &mapping) |
int | path_to_length (const path_t &path) |
void | reverse_complement_mapping_in_place (path_mapping_t *m, const function< int64_t(id_t)> &node_length) |
path_mapping_t | reverse_complement_mapping (const path_mapping_t &m, const function< int64_t(id_t)> &node_length) |
path_t | reverse_complement_path (const path_t &path, const function< int64_t(id_t)> &node_length) |
void | reverse_complement_path_in_place (path_t *path, const function< int64_t(id_t)> &node_length) |
pos_t | initial_position (const path_t &path) |
pos_t | final_position (const path_t &path) |
string | debug_string (const path_t &path) |
string | debug_string (const path_mapping_t &mapping) |
string | debug_string (const edit_t &edit) |
int | corresponding_length_internal (const path_t &path, int given_length, bool is_from_length, bool from_end) |
int | corresponding_to_length (const path_t &path, int from_length, bool from_end) |
int | corresponding_from_length (const path_t &path, int to_length, bool from_end) |
Path & | increment_node_mapping_ids (Path &p, id_t inc) |
const Paths | paths_from_graph (Graph &g) |
Path | merge_adjacent_edits (const Path &m) |
Merge adjacent edits of the same type. More... | |
vg::id_t | path_node (const vector< pair< vg::id_t, bool >> &path, size_t i) |
vg::id_t | path_node (const gbwt::vector_type &path, size_t i) |
size_t | path_size (const vector< pair< vg::id_t, bool >> &path) |
size_t | path_size (const gbwt::vector_type &path) |
bool | path_reverse (const vector< pair< vg::id_t, bool >> &path, size_t i) |
bool | path_reverse (const gbwt::vector_type &path, size_t i) |
std::ostream & | operator<< (std::ostream &out, PathBranch branch) |
template<class PathType > | |
bool | verify_path (const PathType &path, MutableHandleGraph &unfolded, const hash_map< vg::id_t, std::vector< vg::id_t >> &reverse_mapping) |
template<class Decoder > | |
void | printId (vg::id_t id) |
PhaseUnfolder::path_type | canonical_orientation (const PhaseUnfolder::path_type &path, bool &from_border, bool &to_border) |
pos_t | make_pos_t (const Position &pos) |
Convert a Position to a (much smaller) pos_t. More... | |
pos_t | make_pos_t (const position_t &pos) |
pos_t | make_pos_t (gcsa::node_type node) |
Create a pos_t from a gcsa node. More... | |
Position | make_position (const pos_t &pos) |
Convert a pos_t to a Position. More... | |
Position | make_position (id_t id, bool is_rev, offset_t off) |
Create a Position from a Node ID, an orientation flag, and an offset along that strand of the node. More... | |
Position | make_position (gcsa::node_type node) |
Make a Position from a gcsa node. More... | |
Position | reverse (const Position &pos, size_t node_length) |
pair< int64_t, int64_t > | min_oriented_distances (const unordered_map< path_handle_t, vector< pair< size_t, bool > > > &path_offsets1, const unordered_map< path_handle_t, vector< pair< size_t, bool > > > &path_offsets2) |
Find the min distance in the path offsets where the path orientation is the same and different. More... | |
string | debug_string (const position_t &pos) |
void | from_proto_position (const Position &from, position_t &to) |
void | preflight_check () |
ostream & | operator<< (ostream &os, const Counts &counts) |
std::string | to_string (handle_t handle) |
hash_map< Haplotypes::Subchain::kmer_type, size_t >::iterator | find_kmer (hash_map< Haplotypes::Subchain::kmer_type, size_t > &counts, Haplotypes::Subchain::kmer_type kmer, size_t k) |
void | sa_to_da (std::vector< HaplotypePartitioner::sequence_type > &sequences, const gbwt::FastLocate &r_index) |
std::string | generate_haplotype (gbwt::edge_type pos, handle_t end, size_t start_max, size_t end_max, const gbwtgraph::GBWTGraph &graph) |
std::vector< HaplotypePartitioner::kmer_type > | take_unique_minimizers (const std::string &sequence, const HaplotypePartitioner::minimizer_index_type &minimizer_index) |
void | present_kmers (const std::vector< std::vector< HaplotypePartitioner::kmer_type >> &sequences, std::vector< std::pair< HaplotypePartitioner::kmer_type, size_t >> &all_kmers, sdsl::bit_vector &kmers_present) |
void | add_path (const gbwt::GBWT &source, gbwt::size_type path_id, gbwt::GBWTBuilder &builder, gbwtgraph::MetadataBuilder &metadata) |
void | recombinator_sanity_checks (const Recombinator::Parameters ¶meters) |
double | get_or_estimate_coverage (const hash_map< Haplotypes::Subchain::kmer_type, size_t > &counts, const Recombinator::Parameters ¶meters, Haplotypes::Verbosity verbosity) |
std::vector< std::pair< Recombinator::kmer_presence, double > > | classify_kmers (const Haplotypes::Subchain &subchain, const hash_map< Haplotypes::Subchain::kmer_type, size_t > &kmer_counts, double coverage, Recombinator::Statistics *statistics, const Recombinator::Parameters ¶meters) |
std::vector< std::pair< size_t, double > > | select_diploid (const Haplotypes::Subchain &subchain, const std::vector< std::pair< size_t, double >> &candidates, const std::vector< std::pair< Recombinator::kmer_presence, double >> &kmer_types) |
std::vector< std::pair< size_t, double > > | select_haplotypes (const Haplotypes::Subchain &subchain, const hash_map< Haplotypes::Subchain::kmer_type, size_t > &kmer_counts, double coverage, Recombinator::Statistics *statistics, std::vector< Recombinator::LocalHaplotype > *local_haplotypes, const Recombinator::Parameters ¶meters) |
void | parse_region (const string &target, string &name, int64_t &start, int64_t &end) |
void | parse_bed_regions (const string &bed_path, vector< Region > &out_regions, vector< string > *out_names) |
void | parse_region (string ®ion, Region &out_region) |
pos_t | position_at (PathPositionHandleGraph *graph_ptr, const string &path_name, const size_t &path_offset, bool is_reverse) |
string | to_string (const HandleGraph &graph, handle_t handle) |
string | to_string (const HandleGraph &graph, edge_t edge) |
size_t | minimum_distance (const SnarlDistanceIndex &distance_index, pos_t pos1, pos_t pos2, bool unoriented_distance, const HandleGraph *graph) |
size_t | maximum_distance (const SnarlDistanceIndex &distance_index, pos_t pos1, pos_t pos2) |
void | fill_in_distance_index (SnarlDistanceIndex *distance_index, const HandleGraph *graph, const HandleGraphSnarlFinder *snarl_finder, size_t size_limit, bool silence_warnings) |
SnarlDistanceIndex::TemporaryDistanceIndex | make_temporary_distance_index (const HandleGraph *graph, const HandleGraphSnarlFinder *snarl_finder, size_t size_limit) |
void | populate_snarl_index (SnarlDistanceIndex::TemporaryDistanceIndex &temp_index, pair< SnarlDistanceIndex::temp_record_t, size_t > snarl_index, size_t size_limit, const HandleGraph *graph) |
void | subgraph_in_distance_range (const SnarlDistanceIndex &distance_index, const Path &path, const HandleGraph *super_graph, size_t min_distance, size_t max_distance, std::unordered_set< nid_t > &subgraph, bool look_forward) |
void | subgraph_in_distance_range_walk_graph (const HandleGraph *super_graph, size_t min_distance, size_t max_distance, std::unordered_set< nid_t > &subgraph, vector< pair< handle_t, size_t >> &start_nodes, hash_set< pair< nid_t, bool >> &seen_nodes, const pair< nid_t, bool > &traversal_start) |
void | subgraph_in_distance_range_walk_across_chain (const SnarlDistanceIndex &distance_index, const HandleGraph *super_graph, std::unordered_set< nid_t > &subgraph, net_handle_t current_node, size_t current_distance, vector< pair< handle_t, size_t >> &search_start_nodes, hash_set< pair< nid_t, bool >> &seen_nodes, const size_t &min_distance, const size_t &max_distance, bool checked_loop) |
void | subgraph_containing_path_snarls (const SnarlDistanceIndex &distance_index, const HandleGraph *graph, const Path &path, std::unordered_set< nid_t > &subgraph) |
void | add_descendants_to_subgraph (const SnarlDistanceIndex &distance_index, const net_handle_t &parent, std::unordered_set< nid_t > &subgraph) |
MIPayloadValues | get_minimizer_distances (const SnarlDistanceIndex &distance_index, pos_t pos) |
bool | start_backward (const Chain &chain) |
bool | end_backward (const Chain &chain) |
Visit | get_start_of (const Chain &chain) |
Visit | get_end_of (const Chain &chain) |
ChainIterator | chain_begin (const Chain &chain) |
ChainIterator | chain_end (const Chain &chain) |
ChainIterator | chain_rbegin (const Chain &chain) |
ChainIterator | chain_rend (const Chain &chain) |
ChainIterator | chain_rcbegin (const Chain &chain) |
ChainIterator | chain_rcend (const Chain &chain) |
ChainIterator | chain_begin_from (const Chain &chain, const Snarl *start_snarl, bool snarl_orientation) |
ChainIterator | chain_end_from (const Chain &chain, const Snarl *start_snarl, bool snarl_orientation) |
edge_t | to_edge (const handlegraph::HandleGraph &graph, const Visit &v1, const Visit &v2) |
Make an edge_t from a pair of visits. More... | |
bool | operator== (const Visit &a, const Visit &b) |
bool | operator!= (const Visit &a, const Visit &b) |
bool | operator< (const Visit &a, const Visit &b) |
ostream & | operator<< (ostream &out, const Visit &visit) |
bool | operator== (const SnarlTraversal &a, const SnarlTraversal &b) |
bool | operator!= (const SnarlTraversal &a, const SnarlTraversal &b) |
bool | operator< (const SnarlTraversal &a, const SnarlTraversal &b) |
bool | operator== (const Snarl &a, const Snarl &b) |
bool | operator!= (const Snarl &a, const Snarl &b) |
bool | operator< (const Snarl &a, const Snarl &b) |
ostream & | operator<< (ostream &out, const Snarl &snarl) |
NodeTraversal | to_node_traversal (const Visit &visit, const VG &graph) |
NodeTraversal | to_rev_node_traversal (const Visit &visit, const VG &graph) |
NodeSide | to_left_side (const Visit &visit) |
Converts a Visit to a node or snarl into a NodeSide for its left side. More... | |
NodeSide | to_right_side (const Visit &visit) |
Converts a Visit to a node or snarl into a NodeSide for its right side. More... | |
Visit | to_visit (const NodeTraversal &node_traversal) |
Converts a NodeTraversal to a Visit. More... | |
Visit | to_visit (const Mapping &mapping, bool make_full_node_match=false) |
Visit | to_visit (id_t node_id, bool is_reverse) |
Make a Visit from a node ID and an orientation. More... | |
Visit | to_visit (const Snarl &snarl) |
Make a Visit from a snarl to traverse. More... | |
Visit | to_visit (const handlegraph::HandleGraph &graph, const handle_t &handle) |
Make a Visit from a handle in a HandleGraph. More... | |
Visit | reverse (const Visit &visit) |
Get the reversed version of a visit. More... | |
Visit | to_rev_visit (const NodeTraversal &node_traversal) |
Converts a NodeTraversal to a Visit in the opposite orientation. More... | |
Mapping | to_mapping (const Visit &visit, std::function< size_t(id_t)> node_length) |
Mapping | to_mapping (const Visit &visit, const HandleGraph &vg) |
Alignment | to_alignment (const SnarlTraversal &trav, const HandleGraph &graph) |
Convert a snarl traversal into an alignment. More... | |
void | transfer_boundary_info (const Snarl &from, Snarl &to) |
Copies the boundary Visits from one Snarl into another. More... | |
NodeTraversal | to_node_traversal (const Visit &visit, VG &graph) |
NodeTraversal | to_rev_node_traversal (const Visit &visit, VG &graph) |
multipath_alignment_t | from_hit (const Alignment &alignment, const HandleGraph &graph, const pos_t &hit_pos, const MaximalExactMatch &mem, const GSSWAligner &scorer) |
tuple< pos_t, int64_t, int32_t > | trimmed_end (const Alignment &aln, int64_t len, bool from_end, const HandleGraph &graph, const GSSWAligner &aligner) |
bool | trim_path (path_t *path, bool from_left, int64_t mapping_idx, int64_t edit_idx, int64_t base_idx) |
pair< pair< path_t, int32_t >, pair< path_t, int32_t > > | split_splice_segment (const Alignment &splice_segment, const tuple< int64_t, int64_t, int64_t > &left_trace, const tuple< int64_t, int64_t, int64_t > &right_trace, int64_t splice_junction_idx, const GSSWAligner &scorer, const HandleGraph &graph) |
multipath_alignment_t && | fuse_spliced_alignments (const Alignment &alignment, multipath_alignment_t &&left_mp_aln, multipath_alignment_t &&right_mp_aln, int64_t left_bridge_point, const Alignment &splice_segment, int64_t splice_junction_idx, int32_t splice_score, const GSSWAligner &scorer, const HandleGraph &graph) |
double | median (std::vector< int > &v) |
void | wellford_update (size_t &count, double &mean, double &M2, double new_val) |
pair< double, double > | wellford_mean_var (size_t count, double mean, double M2, bool sample_variance) |
double | Phi (double x) |
The standard normal cumulative distribution function. More... | |
double | Phi_inv (double quantile) |
Inverse CDF of a standard normal distribution. Must have 0 < quantile < 1. More... | |
double | lognormal_pdf (double x, double mu, double sigma) |
Probability density function or log-normal distribution. More... | |
double | slope (const std::vector< double > &x, const std::vector< double > &y) |
double | fit_zipf (const vector< double > &y) |
double | fit_fixed_shape_max_exponential (const vector< double > &x, double shape, double tolerance=1e-8) |
Returns the MLE rate parameter for the distribution of (shape) iid exponential RVs. More... | |
double | fit_fixed_rate_max_exponential (const vector< double > &x, double rate, double tolerance=1e-8) |
Returns the MLE estimate for the number of iid exponential RVs the data are maxima of. More... | |
pair< double, double > | fit_max_exponential (const vector< double > &x, double tolerance=1e-8) |
Returns the MLE rate and shape parameters of a max exponential. More... | |
double | max_exponential_log_likelihood (const vector< double > &x, double rate, double shape, double location=0.0) |
The log likelihood of a max exponential with the given parameters on the given data. More... | |
pair< double, double > | fit_weibull (const vector< double > &x) |
Returns an estimate of the rate and shape parameters of a Weibull distribution. More... | |
tuple< double, double, double > | fit_offset_weibull (const vector< double > &x, double tolerance=1e-8) |
Returns an estimate of the rate, shape, and location (minimum value) of a 3-parameter Weibull distribution. More... | |
double | weibull_log_likelihood (const vector< double > &x, double scale, double shape, double location=0.0) |
Returns the log likelihood of some data generated by a Weibull distribution. More... | |
double | golden_section_search (const function< double(double)> &f, double x_min, double x_max, double tolerance=1e-8) |
Returns a local maximum of a function within an interval. More... | |
double | phred_to_prob (uint8_t phred) |
Convert 8-bit Phred quality score to probability of wrongness, using a lookup table. More... | |
double | phred_for_at_least_one (size_t p, size_t n) |
double | prob_for_at_least_one (size_t p, size_t n) |
vector< vector< double > > | transpose (const vector< vector< double >> &A) |
A shitty set of linear algebra functions. More... | |
vector< vector< double > > | matrix_multiply (const vector< vector< double >> &A, const vector< vector< double >> &B) |
vector< double > | matrix_multiply (const vector< vector< double >> &A, const vector< double > &b) |
vector< vector< double > > | matrix_invert (const vector< vector< double >> &A) |
vector< double > | regress (const vector< vector< double >> &X, vector< double > &y) |
Returns the coefficients of a regression (does not automatically compute constant) More... | |
template<typename T > | |
double | stdev (const T &v) |
template<typename Number > | |
SummaryStatistics | summary_statistics (const std::map< Number, size_t > &values) |
Returns summary statistics for a multiset of numbers. More... | |
double | add_log (double log_x, double log_y) |
double | subtract_log (double log_x, double log_y) |
double | ln_to_log10 (double ln) |
double | log10_to_ln (double l10) |
double | log10_add_one (double x) |
double | add_log10 (double i, double j) |
template<typename T > | |
T | normal_pdf (T x, T m=0.0, T s=1.0) |
double | prob_to_logprob (double prob) |
Convert a probability to a natural log probability. More... | |
double | logprob_to_prob (double logprob) |
Convert natural log probability to a probability. More... | |
double | logprob_add (double logprob1, double logprob2) |
double | logprob_invert (double logprob) |
Invert a logprob, and get the probability of its opposite. More... | |
double | phred_to_prob (double phred) |
Convert floating point Phred quality score to probability of wrongness. More... | |
double | prob_to_phred (double prob) |
Convert probability of wrongness to integer Phred quality score. More... | |
double | phred_to_logprob (int phred) |
Convert a Phred quality score directly to a natural log probability of wrongness. More... | |
double | logprob_to_phred (double logprob) |
Convert a natural log probability of wrongness directly to a Phred quality score. More... | |
double | logprob_geometric_mean (double lnprob1, double lnprob2) |
Take the geometric mean of two logprobs. More... | |
double | phred_geometric_mean (double phred1, double phred2) |
Take the geometric mean of two phred-encoded probabilities. More... | |
double | phred_add (double phred1, double phred2) |
template<typename Collection > | |
Collection::value_type | logprob_sum (const Collection &collection) |
template<typename Iterator > | |
std::iterator_traits< Iterator >::value_type | phred_sum (const Iterator &begin_it, const Iterator &end_it) |
template<typename Collection > | |
Collection::value_type | phred_sum (const Collection &collection) |
double | max_exponential_cdf (double x, double rate, double shape, double location=0.0) |
Return the CDF of a max exponential with the given parameters. More... | |
double | weibull_cdf (double x, double scale, double shape, double location=0.0) |
Return the CDF of a max exponential with the given parameters. More... | |
real_t | gamma_ln (real_t x) |
real_t | factorial_ln (int n) |
real_t | pow_ln (real_t m, int n) |
real_t | choose_ln (int n, int k) |
real_t | multinomial_choose_ln (int n, vector< int > k) |
real_t | poisson_prob_ln (int observed, real_t expected) |
template<typename ProbIn > | |
real_t | multinomial_sampling_prob_ln (const vector< ProbIn > &probs, const vector< int > &obs) |
template<typename ProbIn > | |
real_t | binomial_cmf_ln (ProbIn success_logprob, size_t trials, size_t successes) |
template<typename ProbIn > | |
real_t | geometric_sampling_prob_ln (ProbIn success_logprob, size_t trials) |
template<typename Iter > | |
bool | advance_split (Iter start, Iter end) |
template<typename ProbIn > | |
real_t | multinomial_censored_sampling_prob_ln (const vector< ProbIn > &probs, const unordered_map< vector< bool >, int > &obs) |
real_t | ewens_af_prob_ln (const vector< int > &a, real_t theta) |
auto | operator<< (ostream &out, const BitString &bs) |
Allow BitStrings to be printed for debugging. More... | |
bool | operator== (const Exon &lhs, const Exon &rhs) |
bool | operator!= (const Exon &lhs, const Exon &rhs) |
bool | operator< (const Exon &lhs, const Exon &rhs) |
bool | operator== (const Transcript &lhs, const Transcript &rhs) |
bool | operator!= (const Transcript &lhs, const Transcript &rhs) |
bool | operator< (const Transcript &lhs, const Transcript &rhs) |
bool | operator== (const Mapping &lhs, const Mapping &rhs) |
bool | operator!= (const Mapping &lhs, const Mapping &rhs) |
bool | operator== (const Path &lhs, const Path &rhs) |
bool | operator!= (const Path &lhs, const Path &rhs) |
bool | sort_pair_by_second (const pair< uint32_t, uint32_t > &lhs, const pair< uint32_t, uint32_t > &rhs) |
bool | sort_transcript_paths_by_name (const CompletedTranscriptPath &lhs, const CompletedTranscriptPath &rhs) |
handle_t | mapping_to_handle (const Mapping &mapping, const HandleGraph &graph) |
bool | is_match (const Translation &translation) |
double | weighted_jaccard_coefficient (const PathHandleGraph *graph, const multiset< handle_t > &target, const multiset< handle_t > &query) |
vector< int > | get_traversal_order (const PathHandleGraph *graph, const vector< Traversal > &traversals, const vector< string > &trav_path_names, const vector< int > &ref_travs, int64_t ref_trav_idx, const vector< bool > &use_traversal) |
vector< vector< int > > | cluster_traversals (const PathHandleGraph *graph, const vector< Traversal > &traversals, const vector< int > &traversal_order, const vector< pair< handle_t, handle_t >> &child_snarls, double min_jaccard, vector< pair< double, int64_t >> &out_info, vector< int > &out_child_snarl_to_trav) |
vector< vector< int > > | assign_child_snarls_to_traversals (const PathHandleGraph *graph, const vector< Traversal > &traversals, const vector< pair< handle_t, handle_t >> &child_snarls) |
void | merge_equivalent_traversals_in_graph (MutablePathHandleGraph *graph, const unordered_set< path_handle_t > &selected_paths, bool use_snarl_manager) |
template<typename T , typename U > | |
double | jaccard_coefficient (const T &target, const U &query) |
string | traversal_to_string (const PathHandleGraph *graph, const Traversal &traversal, int64_t max_steps) |
string | graph_interval_to_string (const HandleGraph *graph, const handle_t &start_handle, const handle_t &end_handle) |
pair< vector< SnarlTraversal >, vector< pair< step_handle_t, step_handle_t > > > | find_path_traversals (const Snarl &site) |
pos_t | make_pos_t (id_t id, bool is_rev, offset_t off) |
Create a pos_t from a Node ID, an orientation flag, and an offset along that strand of the node. More... | |
id_t | id (const pos_t &pos) |
Extract the id of the node a pos_t is on. More... | |
bool | is_rev (const pos_t &pos) |
Return true if a pos_t is on the reverse strand of its node. More... | |
offset_t | offset (const pos_t &pos) |
Get the offset along the selected strand of the node from a pos_t. More... | |
id_t & | get_id (pos_t &pos) |
Get a reference to the Node ID of a pos_t. More... | |
bool & | get_is_rev (pos_t &pos) |
Get a reference to the reverse flag of a pos_t. More... | |
offset_t & | get_offset (pos_t &pos) |
Get a reference to the offset field of a pos_t, which counts along the selected strand of the node. More... | |
bool | is_empty (const pos_t &pos) |
Return true if a pos_t is unset. More... | |
pos_t | empty_pos_t () |
Get an unset pos_t. More... | |
pos_t | reverse (const pos_t &pos, size_t node_length) |
pos_t | reverse_base_pos (const pos_t &pos, size_t node_length) |
Reverse a pos_t and get a pos_t at the same base, going the other direction. More... | |
std::ostream & | operator<< (std::ostream &out, const pos_t &pos) |
Print a pos_t to a stream. More... | |
char | reverse_complement (const char &c) |
string | reverse_complement (const string &seq) |
void | reverse_complement_in_place (string &seq) |
bool | is_all_n (const string &seq) |
double | get_fraction_of_ns (const string &seq) |
int | get_thread_count (void) |
void | choose_good_thread_count () |
std::vector< std::string > & | split_delims (const std::string &s, const std::string &delims, std::vector< std::string > &elems, size_t max_cuts) |
std::vector< std::string > | split_delims (const std::string &s, const std::string &delims, size_t max_cuts) |
bool | starts_with (const std::string &value, const std::string &prefix) |
Check if a string starts with another string. More... | |
const std::string | sha1sum (const std::string &data) |
const std::string | sha1head (const std::string &data, size_t head) |
string | wrap_text (const string &str, size_t width) |
bool | is_number (const std::string &s) |
bool | isATGC (const char &b) |
Return true if a character is an uppercase A, C, G, or T, and false otherwise. More... | |
bool | allATGC (const string &s) |
bool | allATGCN (const string &s) |
string | nonATGCNtoN (const string &s) |
string | allAmbiguousToN (const string &s) |
string | toUppercase (const string &s) |
void | toUppercaseInPlace (string &s) |
void | write_fasta_sequence (const std::string &name, const std::string &sequence, ostream &os, size_t width) |
string | get_or_make_variant_id (const vcflib::Variant &variant) |
string | make_variant_id (const vcflib::Variant &variant) |
vector< size_t > | range_vector (size_t begin, size_t end) |
Vector containing positive integer values in [begin, end) More... | |
std::vector< size_t > | stack_permutations (const std::vector< size_t > &bottom, const std::vector< size_t > &top) |
Apply one permutation on top of another. Retutn the combined permutation. More... | |
bool | have_input_file (int &optind, int argc, char **argv) |
Return true if there's a command line argument (i.e. input file name) waiting to be processed. More... | |
void | get_input_file (int &optind, int argc, char **argv, function< void(istream &)> callback) |
string | get_input_file_name (int &optind, int argc, char **argv, bool test_open) |
string | get_output_file_name (int &optind, int argc, char **argv) |
void | get_input_file (const string &file_name, function< void(istream &)> callback) |
pair< string, string > | split_ext (const string &filename) |
Split off the extension from a filename and return both parts. More... | |
string | file_base_name (const string &filename) |
Get the base name of a filename (without the directory and the extension). More... | |
bool | file_exists (const string &filename) |
void | create_ref_allele (vcflib::Variant &variant, const std::string &allele) |
int | add_alt_allele (vcflib::Variant &variant, const std::string &allele) |
size_t | integer_power (uint64_t base, uint64_t exponent) |
Computes base^exponent in log(exponent) time. More... | |
size_t | modular_exponent (uint64_t base, uint64_t exponent, uint64_t modulus) |
default_random_engine | random_sequence_gen (102) |
string | random_sequence (size_t length) |
Returns a uniformly random DNA sequence of the given length. More... | |
string | pseudo_random_sequence (size_t length, uint64_t seed) |
Returns a uniformly random DNA sequence sequence deterministically from a seed. More... | |
string | replace_in_string (string subject, const string &search, const string &replace) |
string | percent_url_encode (const string &seq) |
Escape "%" to "%25". More... | |
bool | deterministic_flip (LazyRNG &rng) |
Flip a coin with 50% probability against the given RNG. More... | |
unordered_map< id_t, pair< id_t, bool > > | overlay_node_translations (const unordered_map< id_t, pair< id_t, bool >> &over, const unordered_map< id_t, pair< id_t, bool >> &under) |
Compose the translations from two graph operations, both of which involved oriented transformations. More... | |
unordered_map< id_t, pair< id_t, bool > > | overlay_node_translations (const unordered_map< id_t, id_t > &over, const unordered_map< id_t, pair< id_t, bool >> &under) |
Compose the translations from two graph operations, the first of which involved oriented transformations. More... | |
unordered_map< id_t, pair< id_t, bool > > | overlay_node_translations (const unordered_map< id_t, pair< id_t, bool >> &over, const unordered_map< id_t, id_t > &under) |
Compose the translations from two graph operations, the second of which involved oriented transformations. More... | |
unordered_map< id_t, id_t > | overlay_node_translations (const unordered_map< id_t, id_t > &over, const unordered_map< id_t, id_t > &under) |
Compose the translations from two graph operations, neither of which involved oriented transformations. More... | |
template<> | |
bool | parse (const string &arg, double &dest) |
template<> | |
bool | parse (const string &arg, std::regex &dest) |
template<> | |
bool | parse (const string &arg, pos_t &dest) |
bool | is_number (const string &s) |
template<typename T , typename V > | |
set< T > | map_keys_to_set (const map< T, V > &m) |
template<typename T > | |
vector< T > | pmax (const std::vector< T > &a, const std::vector< T > &b) |
template<typename T > | |
vector< T > | vpmax (const std::vector< std::vector< T >> &vv) |
template<typename Collection > | |
Collection::value_type | sum (const Collection &collection) |
template<template< class T, class A=std::allocator< T >> class Container, typename Input , typename Output > | |
Container< Output > | map_over (const Container< Input > &in, const std::function< Output(const Input &)> &lambda) |
template<template< class T, class A=std::allocator< T >> class Container, typename Item > | |
Container< const Item * > | pointerfy (const Container< Item > &in) |
template<typename Item > | |
VectorView< Item >::const_iterator | operator+ (typename VectorView< Item >::const_iterator::difference_type a, const typename VectorView< Item >::const_iterator &b) |
Allow VectorView iterators to be added to numbers. More... | |
vector< size_t > | range_vector (size_t end) |
Vector containing positive integer values in [0, end) More... | |
template<typename Iterator > | |
std::vector< size_t > | sort_permutation (const Iterator &begin, const Iterator &end, const std::function< bool(const typename Iterator::value_type &, const typename Iterator::value_type &)> &comparator) |
Get the index permutation that sorts the given items with the given comparator instead of <. More... | |
template<typename Iterator > | |
std::vector< size_t > | sort_permutation (const Iterator &begin, const Iterator &end) |
Get the index permutation that sorts the given items ascending using <. More... | |
size_t | integer_power (size_t x, size_t power) |
template<class RandomIt > | |
void | deterministic_shuffle (RandomIt begin, RandomIt end, LazyRNG &rng) |
template<typename Number > | |
bool | deterministic_beats (const Number &a, const Number &b, LazyRNG &rng) |
Return true if a is larger than b, or else equal to b and wins a coin flip. More... | |
string | make_shuffle_seed (const Alignment &aln) |
Make seeds for Alignments based on their sequences. More... | |
template<typename T > | |
string | make_shuffle_seed (const T *ptr) |
Make seeds for pointers to things we can make seeds for. More... | |
template<typename T1 , typename T2 > | |
string | make_shuffle_seed (const pair< T1, T2 > &p) |
Make seeds for pairs of things we can make seeds for. More... | |
template<class RandomIt > | |
void | deterministic_shuffle (RandomIt begin, RandomIt end) |
Do a deterministic shuffle with automatic seed determination. More... | |
template<class RandomIt , class Compare > | |
void | sort_shuffling_ties (RandomIt begin, RandomIt end, Compare comp, LazyRNG &rng) |
template<class RandomIt , class Compare > | |
void | sort_shuffling_ties (RandomIt begin, RandomIt end, Compare comp) |
template<typename Result > | |
Result | parse (const string &arg) |
template<typename Result > | |
Result | parse (const char *arg) |
template<typename Result > | |
bool | parse (const string &arg, Result &dest) |
template<typename Result > | |
bool | parse (const string &arg, typename enable_if< sizeof(Result)<=sizeof(long long) &&is_integral< Result >::value &&is_signed< Result >::value, Result >::type &dest) |
void | genotype_svs (VG *graph, string gamfile, string refpath) |
void | variant_recall (VG *graph, vcflib::VariantCallFile *vars, FastaReference *ref_genome, vector< FastaReference * > insertions, string gamfile) |
tuple< double, double, double > | hash_to_rgb (const string &str, double min_sum) |
size_t | wang_hash_64 (size_t key) |
template<typename Result > | |
bool | parse (const string &arg, typename enable_if< is_instantiation_of< Result, Range >::value, Result >::type &dest) |
Parse a range as start[:end[:step]]. More... | |
Variables | |
const char *const | BAM_DNA_LOOKUP = "=ACMGRSVTWYHKDBN" |
const char * | var = "VG_FULL_TRACEBACK" |
bool | fullTrace = false |
const char * | ISSUE_URL = "https://github.com/vgteam/vg/issues/new/choose" |
thread_local std::string | stored_crash_context |
const read_alignment_index_t | NO_READ_INDEX = {std::numeric_limits<size_t>::infinity(), std::numeric_limits<size_t>::infinity()} |
Represents an unset index. More... | |
const alignment_index_t | NO_INDEX {std::numeric_limits<size_t>::max(), std::numeric_limits<size_t>::max(), std::numeric_limits<bool>::max()} |
Represents an unset index. More... | |
using vg::benchtime = typedef chrono::nanoseconds |
We define a duration type for expressing benchmark times in.
Snarls are defined at the Protobuf level, but here is how we define chains as real objects.
A chain is a sequence of Snarls, in either normal (false) or reverse (true) orientation.
The SnarlManager is going to have one official copy of each chain stored, and it will give you a pointer to it on demand.
using vg::DeletableHandleGraph = typedef handlegraph::DeletableHandleGraph |
using vg::edge_t = typedef handlegraph::edge_t |
typedef std::map<id_t, std::vector<Edge*> > vg::EdgeMapping |
typedef vector<gbwt::node_type> vg::exon_nodes_t |
using vg::GAMIndex = typedef StreamIndex<Alignment> |
Define a GAM index as a stream index over a stream of Alignments.
using vg::GAMSorter = typedef StreamSorter<Alignment> |
typedef unordered_map<handle_t, int> vg::Handle2Component |
using vg::handle_t = typedef handlegraph::handle_t |
using vg::HandleGraph = typedef handlegraph::HandleGraph |
typedef unordered_set<handle_t> vg::HandleSet |
typedef handlegraph::nid_t vg::id_t |
Represents a Node ID. ID type is a 64-bit signed int.
using vg::IndexGroup = typedef set<IndexName> |
A group of indexes that can be made simultaneously
using vg::IndexName = typedef string |
A unique identifier for an Index
using vg::MutableHandleGraph = typedef handlegraph::MutableHandleGraph |
using vg::MutablePathHandleGraph = typedef handlegraph::MutablePathHandleGraph |
using vg::NamedNodeBackTranslation = typedef handlegraph::NamedNodeBackTranslation |
using vg::nid_t = typedef handlegraph::nid_t |
typedef size_t vg::offset_t |
Represents an offset along the sequence of a Node. Offsets are size_t.
using vg::oriented_node_range_t = typedef handlegraph::oriented_node_range_t |
typedef std::pair<uint32_t, int32_t> vg::pareto_point |
using vg::path_handle_t = typedef handlegraph::path_handle_t |
using vg::PathHandleGraph = typedef handlegraph::PathHandleGraph |
using vg::PathInterval = typedef pair<step_handle_t, step_handle_t> |
using vg::PathMetadata = typedef handlegraph::PathMetadata |
using vg::PathPositionHandleGraph = typedef handlegraph::PathPositionHandleGraph |
using vg::PathSense = typedef handlegraph::PathSense |
using vg::RankedHandleGraph = typedef handlegraph::RankedHandleGraph |
using vg::real_t = typedef long double |
using vg::RecipeFunc = typedef function<vector<vector<string> >(const vector<const IndexFile*>&, const IndexingPlan*, AliasGraph&, const IndexGroup&)> |
Is a recipe to create the files (returned by name) associated with some index, from a series of input indexes, given the plan it is being generated for and the index being generated.
using vg::RecipeName = typedef pair<IndexGroup, size_t> |
Names a recipe in the collection of registered recipes.
using vg::SerializableHandleGraph = typedef handlegraph::SerializableHandleGraph |
using vg::step_handle_t = typedef handlegraph::step_handle_t |
using vg::subrange_t = typedef handlegraph::subrange_t |
typedef vector<gbwt::size_type> vg::thread_ids_t |
using vg::thread_t = typedef vector<gbwt::node_type> |
using vg::Traversal = typedef vector<handle_t> |
using vg::VectorizableHandleGraph = typedef handlegraph::VectorizableHandleGraph |
Flag enum for controlling the behavior of alignment emitters behind get_alignment_emitter().
enum vg::SnarlType |
int vg::add_alt_allele | ( | vcflib::Variant & | variant, |
const std::string & | allele | ||
) |
Add a new alt allele to a vcflib Variant, since apaprently there's no method for that already.
If that allele already exists in the variant, does not add it again.
Retuerns the allele number (0, 1, 2, etc.) corresponding to the given allele string in the given variant.
void vg::add_descendants_to_subgraph | ( | const SnarlDistanceIndex & | distance_index, |
const net_handle_t & | parent, | ||
std::unordered_set< nid_t > & | subgraph | ||
) |
void vg::add_edges_only | ( | MutableHandleGraph * | graph, |
function< void(function< void(Alignment &)>, bool, bool)> | iterate_gam, | ||
double | min_mapq, | ||
size_t | min_bp_coverage | ||
) |
Add edges between consecutive mappings that aren't already in the graph note: offsets are completely ignored (a simplifying assumption designed to help with SV genotpying with pack/call as edge packing works similarly)
No existing nodes or edges are modified, and no nodes are added, just edges So the output graph will be id-space compatible, and any GAM/GAF will continue to be valid for it.
|
inline |
|
inline |
Return the log of the sum of two log10-transformed values without taking them out of log space.
Path vg::add_nodes_and_edges | ( | MutableHandleGraph * | graph, |
const Path & | path, | ||
const map< pos_t, id_t > & | node_translation, | ||
unordered_map< pair< pos_t, string >, vector< id_t >> & | added_seqs, | ||
unordered_map< id_t, Path > & | added_nodes, | ||
const unordered_map< id_t, size_t > & | orig_node_sizes, | ||
set< NodeSide > & | dangling, | ||
size_t | max_node_size = 1024 |
||
) |
Given a path on nodes that may or may not exist, and a map from start position in the old graph to a node in the current graph, add all the new sequence and edges required by the path. The given path must not contain adjacent perfect match edits in the same mapping, or any deletions on the start or end of mappings (the removal of which can be accomplished with the Path::simplify() function).
Outputs (and caches for subsequent calls) novel node runs in added_seqs, and Paths describing where novel nodes translate back to in the original graph in added_nodes. Also needs a map of the original sizes of nodes deleted from the original graph, for reverse complementing. If dangling is nonempty, left edges of nodes created for initial inserts will connect to the specified sides. At the end, dangling is populated with the side corresponding to the last edit in the path.
Returns a fully embedded version of the path, after all node insertions, divisions, and translations.
Path vg::add_nodes_and_edges | ( | MutableHandleGraph * | graph, |
const Path & | path, | ||
const map< pos_t, id_t > & | node_translation, | ||
unordered_map< pair< pos_t, string >, vector< id_t >> & | added_seqs, | ||
unordered_map< id_t, Path > & | added_nodes, | ||
const unordered_map< id_t, size_t > & | orig_node_sizes, | ||
size_t | max_node_size | ||
) |
This version doesn't require a set of dangling sides to populate
void vg::add_path | ( | const gbwt::GBWT & | source, |
gbwt::size_type | path_id, | ||
gbwt::GBWTBuilder & | builder, | ||
gbwtgraph::MetadataBuilder & | metadata | ||
) |
path_handle_t vg::add_path_to_graph | ( | MutablePathHandleGraph * | graph, |
const Path & | path | ||
) |
Add a path to the graph. This is like VG::extend, and expects a path with no edits, and for all the nodes and edges in the path to exist exactly in the graph
void vg::add_thread_edges_to_set | ( | thread_t & | t, |
set< pair< int, int > > & | edges | ||
) |
void vg::add_thread_nodes_to_set | ( | thread_t & | t, |
set< int64_t > & | nodes | ||
) |
void vg::addArbitraryTelomerePair | ( | vector< stCactusEdgeEnd * > | ends, |
stList * | telomeres | ||
) |
Finds an arbitrary pair of telomeres in a Cactus graph, which are either a pair of bridge edge ends or a pair of chain edge ends, oriented such that they form a pair of boundaries.
Mostly copied from the pinchesAndCacti unit tests.
bool vg::advance_split | ( | Iter | start, |
Iter | end | ||
) |
Given a split of items across a certain number of categories, as ints between the two given bidirectional iterators, advance to the next split and return true. If there is no next split, leave the collection unchanged and return false.
pair< int64_t, int64_t > vg::aligned_interval | ( | const multipath_alignment_t & | multipath_aln | ) |
The indexes on the read sequence of the portion of the read that is aligned outside of soft clips.
int vg::alignment_from_length | ( | const Alignment & | a | ) |
Alignment vg::alignment_from_path | ( | const HandleGraph & | graph, |
const Path & | path | ||
) |
AlignmentValidity vg::alignment_is_valid | ( | const Alignment & | aln, |
const HandleGraph * | hgraph, | ||
bool | check_sequence = false |
||
) |
Check to make sure edits on the alignment's path don't assume incorrect node lengths or ids. Result can be used like a bool or inspected for further details. Does not log anything itself about bad alignments.
map< string,vector< pair< size_t, bool > > > vg::alignment_refpos_to_path_offsets | ( | const Alignment & | aln | ) |
return the path offsets as cached in the alignment
void vg::alignment_set_distance_to_correct | ( | Alignment & | aln, |
const Alignment & | base, | ||
const unordered_map< string, string > * | translation = nullptr |
||
) |
Annotate the first alignment with its minimum distance to the second in their annotated paths. If translation is set, replace path names in aln using that mapping, if they are found in it.
void vg::alignment_set_distance_to_correct | ( | Alignment & | aln, |
const map< string, vector< pair< size_t, bool > > > & | base_offsets, | ||
const unordered_map< string, string > * | translation | ||
) |
void vg::alignment_set_distance_to_correct | ( | Alignment & | aln, |
const map< string, vector< pair< size_t, bool >>> & | base_offsets, | ||
const unordered_map< string, string > * | translation = nullptr |
||
) |
bam1_t * vg::alignment_to_bam | ( | bam_hdr_t * | bam_header, |
const Alignment & | alignment, | ||
const string & | refseq, | ||
const int32_t | refpos, | ||
const bool | refrev, | ||
const vector< pair< int, char >> & | cigar | ||
) |
Convert an unpaired Alignment to a BAM record. If the alignment is unmapped, refpos must be -1. Otherwise, refpos must be the position on the reference sequence to which the alignment is aligned. Similarly, refseq must be the sequence aligned to, or "" if unaligned.
Remember to clean up with bam_destroy1(b);
bam1_t * vg::alignment_to_bam | ( | bam_hdr_t * | bam_header, |
const Alignment & | alignment, | ||
const string & | refseq, | ||
const int32_t | refpos, | ||
const bool | refrev, | ||
const vector< pair< int, char >> & | cigar, | ||
const string & | mateseq, | ||
const int32_t | matepos, | ||
bool | materev, | ||
const int32_t | tlen, | ||
const int32_t | tlen_max = 0 |
||
) |
Convert a paired Alignment to a BAM record. If the alignment is unmapped, refpos must be -1. Otherwise, refpos must be the position on the reference sequence to which the alignment is aligned. Similarly, refseq must be the sequence aligned to, or "" if unaligned. The mateseq and matepos fields must be set similarly for the mate. Note that mateseq must not be "=". If tlen_max is given, it is a limit on the magnitude of tlen to consider the read properly paired.
Remember to clean up with bam_destroy1(b);
bam1_t* vg::alignment_to_bam_internal | ( | bam_hdr_t * | header, |
const Alignment & | alignment, | ||
const string & | refseq, | ||
const int32_t | refpos, | ||
const bool | refrev, | ||
const vector< pair< int, char >> & | cigar, | ||
const string & | mateseq, | ||
const int32_t | matepos, | ||
bool | materev, | ||
const int32_t | tlen, | ||
bool | paired, | ||
const int32_t | tlen_max | ||
) |
int vg::alignment_to_length | ( | const Alignment & | a | ) |
string vg::alignment_to_sam | ( | const Alignment & | alignment, |
const string & | refseq, | ||
const int32_t | refpos, | ||
const bool | refrev, | ||
const vector< pair< int, char >> & | cigar | ||
) |
Convert an unpaired Alignment to a SAM record. If the alignment is unmapped, refpos must be -1. Otherwise, refpos must be the position on the reference sequence to which the alignment is aligned. Similarly, refseq must be the sequence aligned to, or "" if unaligned.
string vg::alignment_to_sam | ( | const Alignment & | alignment, |
const string & | refseq, | ||
const int32_t | refpos, | ||
const bool | refrev, | ||
const vector< pair< int, char >> & | cigar, | ||
const string & | mateseq, | ||
const int32_t | matepos, | ||
bool | materev, | ||
const int32_t | tlen, | ||
const int32_t | tlen_max = 0 |
||
) |
Convert a paired Alignment to a SAM record. If the alignment is unmapped, refpos must be -1. Otherwise, refpos must be the position on the reference sequence to which the alignment is aligned. Similarly, refseq must be the sequence aligned to, or "" if unaligned. The mateseq and matepos fields must be set similarly for the mate. Note that mateseq must not be "=". If tlen_max is given, it is a limit on the magnitude of tlen to consider the read properly paired.
string vg::alignment_to_sam_internal | ( | const Alignment & | alignment, |
const string & | refseq, | ||
const int32_t | refpos, | ||
const bool | refrev, | ||
const vector< pair< int, char >> & | cigar, | ||
const string & | mateseq, | ||
const int32_t | matepos, | ||
bool | materev, | ||
const int32_t | tlen, | ||
bool | paired, | ||
const int32_t | tlen_max | ||
) |
string vg::allAmbiguousToN | ( | const string & | s | ) |
Convert known IUPAC ambiguity codes (which we don't support) to N (which we do), while leaving any other garbage to trigger validation checks later.
bool vg::allATGC | ( | const string & | s | ) |
bool vg::allATGCN | ( | const string & | s | ) |
Turn the given path into an allele. Drops the first and last mappings and looks up the sequences for the nodes of the others.
|
inline |
Add a CIGAR operation to a vector representing the parsed CIGAR string.
Coalesces adjacent operations of the same type. Coalesces runs of inserts and deletes into a signle delete followed by a single insert.
void vg::append_multipath_alignment | ( | multipath_alignment_t & | multipath_aln, |
const multipath_alignment_t & | to_append | ||
) |
Add the subpaths of one multipath alignment onto another.
Append the second path onto the end of the first, without combining mappings or simplifying. Modifies and returns a reference to the first path.
int64_t vg::approx_gbwt_memory | ( | const string & | vcf_filename | ) |
int64_t vg::approx_graph_load_memory | ( | const string & | graph_filename | ) |
int64_t vg::approx_graph_memory | ( | const string & | fasta_filename, |
const string & | vcf_filename | ||
) |
int64_t vg::approx_graph_memory | ( | const string & | gfa_filename | ) |
int64_t vg::approx_graph_memory | ( | const vector< string > & | fasta_filenames, |
const vector< string > & | vcf_filenames | ||
) |
double vg::approx_num_vars | ( | const string & | vcf_filename | ) |
vector< vector< int > > vg::assign_child_snarls_to_traversals | ( | const PathHandleGraph * | graph, |
const vector< Traversal > & | traversals, | ||
const vector< pair< handle_t, handle_t >> & | child_snarls | ||
) |
assign a list of child snarls to traversals that fully conrtain them the output is a list (for each traversal) of each child snarl that's contained in it so otuput[i] = {x,y,z} means that child_snarls[x],[y],[z] are in traversals[i]
void vg::augment | ( | MutablePathMutableHandleGraph * | graph, |
const string & | gam_path, | ||
const string & | aln_format = "GAM" , |
||
vector< Translation > * | out_translation = nullptr , |
||
const string & | gam_out_path = "" , |
||
bool | embed_paths = false , |
||
bool | break_at_ends = false , |
||
bool | remove_soft_clips = false , |
||
bool | filter_out_of_graph_alignments = false , |
||
double | min_baseq = 0 , |
||
double | min_mapq = 0 , |
||
Packer * | packer = nullptr , |
||
size_t | min_bp_coverage = 0 , |
||
double | max_frac_n = 1. , |
||
bool | edges_only = false |
||
) |
Edit the graph to include all the sequence and edges added by the given paths. Can handle paths that visit nodes in any orientation. Note that this method sorts the graph and rebuilds the path index, so it should not be called in a loop.
if gam_path is "-", then stdin used if gam_out_path is "-", then stdout used If gam_out_path is not empty, the paths will be modified to reflect their embedding in the modified graph and written to the path. aln_format used to toggle between GAM and GAF If out_translation is not null, a list of translations, one per node existing after the edit, describing how each new or conserved node is embedded in the old graph. if embed_paths is true, then the augmented alignemnents will be saved as embededed paths in the graph in order to add it back to the graph. If break_at_ends is true, nodes will be broken at the ends of paths that start/end woth perfect matches, so the paths can be added to the vg graph's paths object. If soft_clip is true, soft clips will be removed from the input paths before processing, and the dangling ends won't end up in the graph If filter_out_of_graph_alignments is true, some extra time will be taken to check if all nodes in the alignment are in the graph. If they aren't, then it will be ignored If an edit sequence's avg base quality is less than min_baseq it will be ignored (considered a match) If an alignment's mapping quality is less than min_mapq it is ignored A packer is required for all non-mapq filters If a breakpoint has less than min_bp_coverage it is not included in the graph Edits with more than max_frac_n N content will be ignored
void vg::augment | ( | MutablePathMutableHandleGraph * | graph, |
vector< Path > & | path_vector, | ||
const string & | aln_format = "GAM" , |
||
vector< Translation > * | out_translation = nullptr , |
||
const string & | gam_out_path = "" , |
||
bool | embed_paths = false , |
||
bool | break_at_ends = false , |
||
bool | remove_soft_clips = false , |
||
bool | filter_out_of_graph_alignments = false , |
||
double | min_baseq = 0 , |
||
double | min_mapq = 0 , |
||
Packer * | packer = nullptr , |
||
size_t | min_bp_coverage = 0 , |
||
double | max_frac_n = 1. , |
||
bool | edges_only = false |
||
) |
Like above, but operates on a vector of Alignments, instead of a file (Note: It is best to use file interface to stream large numbers of alignments to save memory)
void vg::augment_impl | ( | MutablePathMutableHandleGraph * | graph, |
function< void(function< void(Alignment &)>, bool, bool)> | iterate_gam, | ||
const string & | aln_format, | ||
vector< Translation > * | out_translations, | ||
const string & | gam_out_path, | ||
bool | embed_paths, | ||
bool | break_at_ends, | ||
bool | remove_softclips, | ||
bool | filter_out_of_graph_alignments, | ||
double | min_baseq, | ||
double | min_mapq, | ||
Packer * | packer, | ||
size_t | min_bp_coverage, | ||
double | max_frac_n, | ||
bool | edges_only | ||
) |
Generic version used to implement the above three methods.
const vector< string > vg::balanced_kmers | ( | const string & | seq, |
const int | kmer_size, | ||
const int | stride | ||
) |
const int vg::balanced_stride | ( | int | read_length, |
int | kmer_size, | ||
int | stride | ||
) |
Alignment vg::bam_to_alignment | ( | const bam1_t * | b, |
const map< string, string > & | rg_sample, | ||
const map< int, path_handle_t > & | tid_path_handle | ||
) |
Alignment vg::bam_to_alignment | ( | const bam1_t * | b, |
const map< string, string > & | rg_sample, | ||
const map< int, path_handle_t > & | tid_path_handle, | ||
const bam_hdr_t * | bh, | ||
const PathPositionHandleGraph * | graph | ||
) |
void vg::benchmark_control | ( | ) |
The benchmark control function, designed to take some amount of time that might vary with CPU load.
real_t vg::binomial_cmf_ln | ( | ProbIn | success_logprob, |
size_t | trials, | ||
size_t | successes | ||
) |
Compute the probability of having the given number of successes or fewer in the given number of trials, with the given success probability. Returns the resulting log probability.
void vg::build_gcsa_lcp | ( | const HandleGraph & | graph, |
gcsa::GCSA *& | gcsa, | ||
gcsa::LCPArray *& | lcp, | ||
int | kmer_size, | ||
size_t | doubling_steps, | ||
size_t | size_limit, | ||
const string & | base_file_name | ||
) |
VG vg::cactus_to_vg | ( | stCactusGraph * | cactus_graph | ) |
PhaseUnfolder::path_type vg::canonical_orientation | ( | const PhaseUnfolder::path_type & | path, |
bool & | from_border, | ||
bool & | to_border | ||
) |
ChainIterator vg::chain_begin | ( | const Chain & | chain | ) |
We define free functions for getting iterators forward and backward through chains.
ChainIterator vg::chain_begin_from | ( | const Chain & | chain, |
const Snarl * | start_snarl, | ||
bool | snarl_orientation | ||
) |
We also define a function for getting the ChainIterator (forward or reverse complement) for a chain starting with a given snarl in the given inward orientation. Only works for bounding snarls of the chain.
ChainIterator vg::chain_end | ( | const Chain & | chain | ) |
ChainIterator vg::chain_end_from | ( | const Chain & | chain, |
const Snarl * | start_snarl, | ||
bool | snarl_orientation | ||
) |
And the end iterator for the chain (forward or reverse complement) viewed from a given snarl in the given inward orientation. Only works for bounding snarls of the chain, and should be the same bounding snarl as was used for chain_begin_from.
ChainIterator vg::chain_rbegin | ( | const Chain & | chain | ) |
ChainIterator vg::chain_rcbegin | ( | const Chain & | chain | ) |
We also define some reverse complement iterators, which go from right to left through the chains, but give us the reverse view. For ecample, if all the snarls are oriented forward in the chain, we will iterate through the snarls in reverse order, with each individual snarl also reversed.
ChainIterator vg::chain_rcend | ( | const Chain & | chain | ) |
ChainIterator vg::chain_rend | ( | const Chain & | chain | ) |
void vg::choose_good_thread_count | ( | ) |
Decide on and apply a sensible OMP thread count. Pay attention to OMP_NUM_THREADS if set, the "hardware concurrency", and container limit information that may be available in /proc.
|
inline |
Compute the number of ways to select k items from a collection of n distinguishable items, ignoring order. Returns the natural log of the (integer) result.
vector< pair< int, char > > vg::cigar_against_path | ( | const Alignment & | alignment, |
bool | on_reverse_strand, | ||
int64_t & | pos, | ||
size_t | path_len, | ||
size_t | softclip_suppress | ||
) |
Create a CIGAR from the given Alignment. If softclip_suppress is nonzero, suppress softclips up to that length. This will necessitate adjusting pos, which is why it is passed by reference.
vector< pair< int, char > > vg::cigar_against_path | ( | const multipath_alignment_t & | multipath_aln, |
const string & | path_name, | ||
bool | rev, | ||
int64_t | path_pos, | ||
const PathPositionHandleGraph & | graph, | ||
int64_t | min_splice_length = numeric_limits< int64_t >::max() |
||
) |
Convert a surjected multipath alignment into a CIGAR sequence against a path. Splicing will be allowed at connections and at any silent deletions of path sequence. Surjected multipath alignment graph must consist of a single non-branching path
void vg::cigar_mapping | ( | const bam1_t * | b, |
Mapping & | mapping | ||
) |
int64_t vg::cigar_mapping | ( | const bam1_t * | b, |
Mapping * | mapping | ||
) |
string vg::cigar_string | ( | const vector< pair< int, char > > & | cigar | ) |
std::vector<std::pair<Recombinator::kmer_presence, double> > vg::classify_kmers | ( | const Haplotypes::Subchain & | subchain, |
const hash_map< Haplotypes::Subchain::kmer_type, size_t > & | kmer_counts, | ||
double | coverage, | ||
Recombinator::Statistics * | statistics, | ||
const Recombinator::Parameters & | parameters | ||
) |
void vg::clear_alignment | ( | multipath_alignment_t & | multipath_aln | ) |
Clear all of the field associated with the alignment.
|
inline |
Clear the annotation with the given name.
|
inline |
Clear the annotation with the given name.
void vg::clear_crash_context | ( | ) |
User code should call this when it wants to clear context for a failure in its thread.
void vg::clip_contained_low_depth_nodes_and_edges | ( | MutablePathMutableHandleGraph * | graph, |
PathPositionHandleGraph * | pp_graph, | ||
const vector< Region > & | regions, | ||
SnarlManager & | snarl_manager, | ||
bool | include_endpoints, | ||
int64_t | min_depth, | ||
int64_t | min_fragment_len, | ||
bool | verbose | ||
) |
Or on contained snarls
void vg::clip_contained_snarls | ( | MutablePathMutableHandleGraph * | graph, |
PathPositionHandleGraph * | pp_graph, | ||
const vector< Region > & | regions, | ||
SnarlManager & | snarl_manager, | ||
bool | include_endpoints, | ||
int64_t | min_fragment_len, | ||
size_t | max_nodes, | ||
size_t | max_edges, | ||
size_t | max_nodes_shallow, | ||
size_t | max_edges_shallow, | ||
double | max_avg_degree, | ||
double | max_reflen_prop, | ||
size_t | max_reflen, | ||
bool | out_bed, | ||
bool | verbose | ||
) |
If a given bed region spans a snarl (overlaps its end nodes, and forms a traversal) then clip out all other nodes (ie nodes that don't lie on the traversal)
IMPORTANT: for any given snarl, the first region that contains it is used. (but other reference paths now whitelisted via ref_prefixes)
Update: now accepts some snarl complexity thresholds to ignore simple enough snarls
void vg::clip_contained_stubs | ( | MutablePathMutableHandleGraph * | graph, |
PathPositionHandleGraph * | pp_graph, | ||
const vector< Region > & | regions, | ||
SnarlManager & | snarl_manager, | ||
bool | include_endpoints, | ||
int64_t | min_fragment_len, | ||
bool | verbose | ||
) |
void vg::clip_deletion_edges | ( | MutablePathMutableHandleGraph * | graph, |
int64_t | max_deletion, | ||
int64_t | context_steps, | ||
const vector< string > & | ref_prefixes, | ||
int64_t | min_fragment_len, | ||
bool | verbose | ||
) |
clip out deletion edges
void vg::clip_low_depth_nodes_and_edges | ( | MutablePathMutableHandleGraph * | graph, |
int64_t | min_depth, | ||
const vector< string > & | ref_prefixes, | ||
int64_t | min_fragment_len, | ||
bool | verbose | ||
) |
Run above function on graph
void vg::clip_low_depth_nodes_and_edges_generic | ( | MutablePathMutableHandleGraph * | graph, |
function< void(function< void(handle_t, const Region *)>)> | iterate_handles, | ||
function< void(function< void(edge_t, const Region *)>)> | iterate_edges, | ||
int64_t | min_depth, | ||
const vector< string > & | ref_prefixes, | ||
int64_t | min_fragment_len, | ||
bool | verbose | ||
) |
Clip out nodes that don't pass depth threshold (depth < min_depth).
"depth" is the number of paths that step on the node. Nodes on path with given prefix ignored (todo: should really switch to regex or something) iterate_handles is a hack to generalize this function to whole graphs or snarls
void vg::clip_stubs | ( | MutablePathMutableHandleGraph * | graph, |
const vector< string > & | ref_prefixes, | ||
int64_t | min_fragment_len, | ||
bool | verbose | ||
) |
clip out stubs
void vg::clip_stubs_generic | ( | MutablePathMutableHandleGraph * | graph, |
function< void(function< void(handle_t, const Region *)>)> | iterate_handles, | ||
function< bool(handle_t)> | handle_in_range, | ||
const vector< string > & | ref_prefixes, | ||
int64_t | min_fragment_len, | ||
bool | verbose | ||
) |
vector< string::const_iterator > vg::cluster_cover | ( | const vector< MaximalExactMatch > & | cluster | ) |
int vg::cluster_coverage | ( | const vector< MaximalExactMatch > & | cluster | ) |
vector< pos_t > vg::cluster_nodes | ( | const vector< MaximalExactMatch > & | cluster | ) |
bdsg::HashGraph vg::cluster_subgraph_containing | ( | const HandleGraph & | base, |
const Alignment & | aln, | ||
const vector< vg::MaximalExactMatch > & | cluster, | ||
const GSSWAligner * | aligner | ||
) |
return a containing subgraph connecting the mems
bdsg::HashGraph vg::cluster_subgraph_walk | ( | const HandleGraph & | base, |
const Alignment & | aln, | ||
const vector< vg::MaximalExactMatch > & | mems, | ||
double | expansion | ||
) |
return a subgraph for a cluster of MEMs from the given alignment use walking to get the hits
vector< vector< int > > vg::cluster_traversals | ( | const PathHandleGraph * | graph, |
const vector< Traversal > & | traversals, | ||
const vector< int > & | traversal_order, | ||
const vector< pair< handle_t, handle_t >> & | child_snarls, | ||
double | min_jaccard, | ||
vector< pair< double, int64_t >> & | out_info, | ||
vector< int > & | out_child_snarl_to_trav | ||
) |
cluster the traversals. The algorithm is:
bool vg::clusters_overlap_in_graph | ( | const vector< MaximalExactMatch > & | cluster1, |
const vector< MaximalExactMatch > & | cluster2 | ||
) |
bool vg::clusters_overlap_in_read | ( | const vector< MaximalExactMatch > & | cluster1, |
const vector< MaximalExactMatch > & | cluster2 | ||
) |
int vg::clusters_overlap_length | ( | const vector< MaximalExactMatch > & | cluster1, |
const vector< MaximalExactMatch > & | cluster2 | ||
) |
std::string vg::compose_short_path_name | ( | const gbwt::GBWT & | gbwt_index, |
gbwt::size_type | id | ||
) |
Get a short version of a string representation of a thread name stored in GBWT metadata, made of just the sample and contig and haplotype. NOTE: id is a gbwt path id, not a gbwt sequence id.
pair< int32_t, int32_t > vg::compute_template_lengths | ( | const int64_t & | pos1, |
const vector< pair< int, char >> & | cigar1, | ||
const int64_t & | pos2, | ||
const vector< pair< int, char >> & | cigar2 | ||
) |
Work out the TLEN values for two reads. The magnitude is the distance between the outermost aligned bases, and the sign is positive for the leftmost read and negative for the rightmost.
vector< vector< int64_t > > vg::connected_components | ( | const multipath_alignment_t & | multipath_aln | ) |
Returns a vector whose elements are vectors with the indexes of the subpath_t's in each connected component. An unmapped multipath_alignment_t with no subpaths produces an empty vector.
void vg::connected_comps_do | ( | const multipath_alignment_t & | multipath_aln, |
function< void(void)> & | on_new_component, | ||
function< void(size_t)> & | on_new_node | ||
) |
void vg::construct_graph_from_nodes_and_edges | ( | Graph & | g, |
const HandleGraph & | source, | ||
set< int64_t > & | nodes, | ||
set< pair< int, int > > & | edges | ||
) |
bool vg::contains_connection | ( | const multipath_alignment_t & | multipath_aln | ) |
Returns true if any subpath has a connection adjacency.
bool vg::contains_match | ( | const multipath_alignment_t & | multipath_aln, |
const pos_t & | pos, | ||
int64_t | read_pos, | ||
int64_t | match_length | ||
) |
Returns true if the multipath alignment contains a match of a given length starting at the graph and read position
bool vg::convert | ( | const std::string & | s, |
T & | r | ||
) |
std::string vg::convert | ( | const T & | r | ) |
void vg::convert_alignment_char | ( | Alignment & | alignment, |
char | from, | ||
char | to | ||
) |
void vg::convert_multipath_alignment_char | ( | multipath_alignment_t & | multipath_aln, |
char | from, | ||
char | to | ||
) |
void vg::convert_Ts_to_Us | ( | Alignment & | alignment | ) |
Replaces any T's in the sequence or the Path with U's.
void vg::convert_Ts_to_Us | ( | multipath_alignment_t & | multipath_aln | ) |
Replaces all T's in the sequence and the aligned Paths with U's.
void vg::convert_Us_to_Ts | ( | Alignment & | alignment | ) |
Replaces any U's in the sequence or the Path with T's.
void vg::convert_Us_to_Ts | ( | multipath_alignment_t & | multipath_aln | ) |
Replaces all U's in the sequence and the aligned Paths with T's.
void vg::copy_file | ( | const string & | from_fp, |
const string & | to_fp | ||
) |
void vg::copy_metadata | ( | const gbwt::GBWT & | source, |
gbwt::GBWT & | target, | ||
const std::vector< std::vector< gbwt::size_type >> & | jobs, | ||
const std::vector< size_t > & | job_order | ||
) |
void vg::copy_reference_samples | ( | const gbwt::GBWT & | source, |
gbwt::GBWT & | destination | ||
) |
Copies the reference sample tag from the source GBWT index to the destination GBWT index.
void vg::copy_reference_samples | ( | const PathHandleGraph & | source, |
gbwt::GBWT & | destination | ||
) |
Copies reference samples from the source graph to the destination GBWT index. Every sample with at least one reference path in the source graph is considered a reference sample.
int vg::corresponding_from_length | ( | const path_t & | path, |
int | to_length, | ||
bool | from_end | ||
) |
int vg::corresponding_length_internal | ( | const path_t & | path, |
int | given_length, | ||
bool | is_from_length, | ||
bool | from_end | ||
) |
int vg::corresponding_to_length | ( | const path_t & | path, |
int | from_length, | ||
bool | from_end | ||
) |
void vg::crash_unless_impl | ( | bool | condition, |
const std::string & | condition_string, | ||
const std::string & | file, | ||
int | line, | ||
const std::string & | function | ||
) |
crash_unless calls into this function for a real implementation.
void vg::create_ref_allele | ( | vcflib::Variant & | variant, |
const std::string & | allele | ||
) |
Create the reference allele for an empty vcflib Variant, since apaprently there's no method for that already. Must be called before any alt alleles are added.
string vg::debug_string | ( | const connection_t & | connection | ) |
string vg::debug_string | ( | const edit_t & | edit | ) |
string vg::debug_string | ( | const multipath_alignment_t & | multipath_aln | ) |
string vg::debug_string | ( | const path_mapping_t & | mapping | ) |
string vg::debug_string | ( | const path_t & | path | ) |
string vg::debug_string | ( | const position_t & | pos | ) |
string vg::debug_string | ( | const subpath_t & | subpath | ) |
void vg::decompose | ( | const Path & | path, |
map< pos_t, int > & | ref_positions, | ||
map< pos_t, Edit > & | edits | ||
) |
void vg::delete_nodes_and_chop_paths | ( | MutablePathMutableHandleGraph * | graph, |
const unordered_set< nid_t > & | nodes_to_delete, | ||
const unordered_set< edge_t > & | edges_to_delete, | ||
int64_t | min_fragment_len, | ||
unordered_map< string, size_t > * | fragments_per_path | ||
) |
int32_t vg::determine_flag | ( | const Alignment & | alignment, |
const string & | refseq, | ||
const int32_t | refpos, | ||
const bool | refrev, | ||
const string & | mateseq, | ||
const int32_t | matepos, | ||
bool | materev, | ||
const int32_t | tlen, | ||
bool | paired, | ||
const int32_t | tlen_max | ||
) |
Returns the SAM bit-coded flag for alignment with.
bool vg::deterministic_beats | ( | const Number & | a, |
const Number & | b, | ||
LazyRNG & | rng | ||
) |
Return true if a is larger than b, or else equal to b and wins a coin flip.
bool vg::deterministic_flip | ( | LazyRNG & | rng | ) |
Flip a coin with 50% probability against the given RNG.
void vg::deterministic_shuffle | ( | RandomIt | begin, |
RandomIt | end | ||
) |
Do a deterministic shuffle with automatic seed determination.
void vg::deterministic_shuffle | ( | RandomIt | begin, |
RandomIt | end, | ||
LazyRNG & | rng | ||
) |
Given a pair of random access iterators defining a range, deterministically shuffle the contents of the range based on the given RNG. Allows one RNG from deterministic_start() to be used for multiple shuffles.
double vg::divergence | ( | const Mapping & | m | ) |
vector<int64_t> vg::each_approx_graph_memory | ( | const vector< string > & | fasta_filenames, |
const vector< string > & | vcf_filenames | ||
) |
int vg::edit_count | ( | const Alignment & | alignment | ) |
Return true if two edits could be combined into one (assuming adjacency).
void vg::emit_stacktrace | ( | int | signalNumber, |
siginfo_t * | signalInfo, | ||
void * | signalContext | ||
) |
Emit a stack trace when something bad happens. Add as a signal handler with sigaction.
|
inline |
Returns an empty GBWTGraph handle corresponding to the GBWT endmarker.
|
inline |
Get an unset pos_t.
void vg::enable_crash_handling | ( | ) |
Main should call this to turn on our stack tracing support.
gcsa::byte_type vg::encode_chars | ( | const vector< char > & | chars, |
const gcsa::Alphabet & | alpha | ||
) |
Encode the chars into the gcsa2 byte.
bool vg::end_backward | ( | const Chain & | chain | ) |
Return true if the last snarl in the given chain is backward relative to the chain.
map< pos_t, id_t > vg::ensure_breakpoints | ( | MutableHandleGraph * | graph, |
const unordered_map< id_t, set< pos_t >> & | breakpoints | ||
) |
Take a map from node ID to a set of offsets at which new nodes should start (which may include 0 and 1-past-the-end, which should be ignored), break the specified nodes at those positions. Returns a map from old node start position to new node pointer in the graph. Note that the caller will have to crear and rebuild path rank data.
Returns a map from old node start position to new node. This map contains some entries pointing to null, for positions past the ends of original nodes. It also maps from positions on either strand of the old node to the same new node pointer; the new node's forward strand is always the same as the old node's forward strand.
double vg::entropy | ( | const char * | st, |
size_t | len | ||
) |
double vg::entropy | ( | const string & | st | ) |
int vg::execute_in_fork | ( | const function< void(void)> & | exec | ) |
gbwt::vector_type vg::extract_as_gbwt_path | ( | const PathHandleGraph & | graph, |
const std::string & | path_name | ||
) |
Extract a path as a GBWT path. If the path does not exist, it is treated as empty.
Path vg::extract_gbwt_path | ( | const HandleGraph & | graph, |
const gbwt::GBWT & | gbwt_index, | ||
gbwt::size_type | id | ||
) |
Extract a GBWT thread as a path in the given graph. NOTE: id is a gbwt path id, not a gbwt sequence id.
pair< vector< pair< string, int64_t > >, unordered_map< string, int64_t > > vg::extract_path_metadata | ( | const vector< tuple< path_handle_t, size_t, size_t >> & | paths, |
const PathPositionHandleGraph & | graph, | ||
bool | subpath_support = false |
||
) |
Given a list of path handles and size info (from get_sequence_dictionary), return two things: 1) names and lengths of all of base paths in order. 2) a mapping of path names to length (reflects paths in the graph including subpaths)
If subpath_support is set to false, there won't be a distinction.
void vg::extract_sub_multipath_alignment | ( | const multipath_alignment_t & | multipath_aln, |
const vector< int64_t > & | subpath_indexes, | ||
multipath_alignment_t & | sub_multipath_aln | ||
) |
Extract the multipath_alignment_t consisting of the Subpaths with the given indexes into a new multipath_alignment_t object
|
inline |
Calculate the natural log of the factorial of the given integer. TODO: replace with a cache or giant lookup table from Freebayes.
int vg::fastq_for_each | ( | string & | filename, |
function< void(Alignment &)> | lambda | ||
) |
size_t vg::fastq_paired_interleaved_for_each | ( | const string & | filename, |
function< void(Alignment &, Alignment &)> | lambda | ||
) |
size_t vg::fastq_paired_interleaved_for_each_parallel | ( | const string & | filename, |
function< void(Alignment &, Alignment &)> | lambda, | ||
uint64_t | batch_size | ||
) |
size_t vg::fastq_paired_interleaved_for_each_parallel_after_wait | ( | const string & | filename, |
function< void(Alignment &, Alignment &)> | lambda, | ||
function< bool(void)> | single_threaded_until_true, | ||
uint64_t | batch_size | ||
) |
size_t vg::fastq_paired_two_files_for_each | ( | const string & | file1, |
const string & | file2, | ||
function< void(Alignment &, Alignment &)> | lambda | ||
) |
size_t vg::fastq_paired_two_files_for_each_parallel | ( | const string & | file1, |
const string & | file2, | ||
function< void(Alignment &, Alignment &)> | lambda, | ||
uint64_t | batch_size | ||
) |
size_t vg::fastq_paired_two_files_for_each_parallel_after_wait | ( | const string & | file1, |
const string & | file2, | ||
function< void(Alignment &, Alignment &)> | lambda, | ||
function< bool(void)> | single_threaded_until_true, | ||
uint64_t | batch_size | ||
) |
size_t vg::fastq_unpaired_for_each | ( | const string & | filename, |
function< void(Alignment &)> | lambda | ||
) |
size_t vg::fastq_unpaired_for_each_parallel | ( | const string & | filename, |
function< void(Alignment &)> | lambda, | ||
uint64_t | batch_size | ||
) |
string vg::file_base_name | ( | const string & | filename | ) |
Get the base name of a filename (without the directory and the extension).
bool vg::file_exists | ( | const string & | filename | ) |
Determine if a file exists. Only works for files readable by the current user.
void vg::fill_in_distance_index | ( | SnarlDistanceIndex * | distance_index, |
const HandleGraph * | graph, | ||
const HandleGraphSnarlFinder * | snarl_finder, | ||
size_t | size_limit, | ||
bool | silence_warnings | ||
) |
unordered_map< id_t, set< pos_t > > vg::filter_breakpoints_by_coverage | ( | const Packer & | packed_breakpoints, |
size_t | min_bp_coverage | ||
) |
Filters the breakpoints by coverage, and converts them back from the Packer to the STL map expected by following methods
void vg::find_breakpoints | ( | const Path & | path, |
unordered_map< id_t, set< pos_t >> & | breakpoints, | ||
bool | break_ends = true , |
||
const string & | base_quals = "" , |
||
double | min_baseq = 0 , |
||
double | max_frac_n = 1. |
||
) |
Find all the points at which a Path enters or leaves nodes in the graph. Adds them to the given map by node ID of sets of bases in the node that will need to become the starts of new nodes.
If break_ends is true, emits breakpoints at the ends of the path, even if it starts/ends with perfect matches. Find all the points at which a Path enters or leaves nodes in the graph. Adds them to the given map by node ID of sets of bases in the node that will need to become the starts of new nodes.
If break_ends is true, emits breakpoints at the ends of the path, even if it starts/ends with perfect matches.
hash_map<Haplotypes::Subchain::kmer_type, size_t>::iterator vg::find_kmer | ( | hash_map< Haplotypes::Subchain::kmer_type, size_t > & | counts, |
Haplotypes::Subchain::kmer_type | kmer, | ||
size_t | k | ||
) |
void vg::find_mismatches | ( | const std::string & | seq, |
const gbwtgraph::CachedGBWTGraph & | graph, | ||
std::vector< GaplessExtension > & | result | ||
) |
void vg::find_packed_breakpoints | ( | const Path & | path, |
Packer & | packed_breakpoints, | ||
bool | break_ends, | ||
const string & | base_quals, | ||
double | min_baseq, | ||
double | max_frac_n | ||
) |
Like "find_breakpoints", but store in packed structure (better for large gams and enables coverage filter)
pair<vector<SnarlTraversal>, vector<pair<step_handle_t, step_handle_t> > > vg::find_path_traversals | ( | const Snarl & | site | ) |
double vg::fit_fixed_rate_max_exponential | ( | const vector< double > & | x, |
double | rate, | ||
double | tolerance | ||
) |
Returns the MLE estimate for the number of iid exponential RVs the data are maxima of.
double vg::fit_fixed_shape_max_exponential | ( | const vector< double > & | x, |
double | shape, | ||
double | tolerance | ||
) |
Returns the MLE rate parameter for the distribution of (shape) iid exponential RVs.
pair< double, double > vg::fit_max_exponential | ( | const vector< double > & | x, |
double | tolerance | ||
) |
Returns the MLE rate and shape parameters of a max exponential.
tuple< double, double, double > vg::fit_offset_weibull | ( | const vector< double > & | x, |
double | tolerance | ||
) |
Returns an estimate of the rate, shape, and location (minimum value) of a 3-parameter Weibull distribution.
pair< double, double > vg::fit_weibull | ( | const vector< double > & | x | ) |
Returns an estimate of the rate and shape parameters of a Weibull distribution.
double vg::fit_zipf | ( | const vector< double > & | y | ) |
void vg::flip_doubly_reversed_edges | ( | Graph & | graph | ) |
clean up doubly-reversed edges
void vg::flip_nodes | ( | Alignment & | a, |
const set< int64_t > & | ids, | ||
const std::function< size_t(int64_t)> & | node_length | ||
) |
void vg::for_each_basic_annotation | ( | const Annotated & | annotated, |
const function< void(const string &)> | null_lambda, | ||
const function< void(const string &, double)> | double_lambda, | ||
const function< void(const string &, bool)> | bool_lambda, | ||
const function< void(const string &, const string &)> | string_lambda | ||
) |
Apply a lambda to all annotations, except for Struct and ListValue annotations (which cannot be easily typed without exposing ugly Protobuf internals
void vg::for_each_kmer | ( | const HandleGraph & | graph, |
size_t | k, | ||
const function< void(const kmer_t &)> & | lambda, | ||
id_t | head_id = 0 , |
||
id_t | tail_id = 0 , |
||
atomic< int > * | stop_flag = nullptr |
||
) |
Iterate over all the kmers in the graph, running lambda on each If the stop flag is included, stop execution if it ever evaluates to true
double vg::format_multiplier | ( | ) |
unordered_map< id_t, set< pos_t > > vg::forwardize_breakpoints | ( | const HandleGraph * | graph, |
const unordered_map< id_t, set< pos_t >> & | breakpoints | ||
) |
Flips the breakpoints onto the forward strand.
void vg::from_handle_graph | ( | const HandleGraph & | from, |
Graph & | to | ||
) |
multipath_alignment_t vg::from_hit | ( | const Alignment & | alignment, |
const HandleGraph & | graph, | ||
const pos_t & | hit_pos, | ||
const MaximalExactMatch & | mem, | ||
const GSSWAligner & | scorer | ||
) |
int vg::from_length | ( | const Mapping & | m | ) |
void vg::from_path_handle_graph | ( | const PathHandleGraph & | from, |
Graph & | to | ||
) |
void vg::from_proto_mapping | ( | const Mapping & | proto_mapping, |
path_mapping_t & | mapping | ||
) |
void vg::from_proto_multipath_alignment | ( | const MultipathAlignment & | proto_multipath_aln, |
multipath_alignment_t & | multipath_aln_out | ||
) |
Convert a protobuf MultipathAlignment to an STL-based multipath_alignment_t.
void vg::from_proto_position | ( | const Position & | from, |
position_t & | to | ||
) |
multipath_alignment_t && vg::fuse_spliced_alignments | ( | const Alignment & | alignment, |
multipath_alignment_t && | left_mp_aln, | ||
multipath_alignment_t && | right_mp_aln, | ||
int64_t | left_bridge_point, | ||
const Alignment & | splice_segment, | ||
int64_t | splice_junction_idx, | ||
int32_t | splice_score, | ||
const GSSWAligner & | scorer, | ||
const HandleGraph & | graph | ||
) |
Calculate the natural log of the gamma function of the given argument.
gbwt::size_type vg::gbwt_node_width | ( | const HandleGraph & | graph | ) |
Determine the node width in bits for the GBWT nodes based on the given graph.
|
inline |
Convert gbwt::node_type to handle_t.
|
inline |
Convert gbwt::node_type and an offset as size_t to pos_t.
set< pos_t > vg::gcsa_nodes_to_positions | ( | const vector< gcsa::node_type > & | nodes | ) |
std::string vg::generate_haplotype | ( | gbwt::edge_type | pos, |
handle_t | end, | ||
size_t | start_max, | ||
size_t | end_max, | ||
const gbwtgraph::GBWTGraph & | graph | ||
) |
void vg::genotype_svs | ( | VG * | graph, |
string | gamfile, | ||
string | refpath | ||
) |
Takes a graph and two GAMs, one tumor and one normal Locates existing variation supported by the tumor and annotate it with a path Then overlay the normal sample Use a depthmap of snarltraversal transforms, one for tumor, one for normal which we can use to count the normal and tumor alleles void somatic_genotyper(VG* graph, string tumorgam, string normalgam);
Do smart augment, maintaining a depth map for tumor/normal perfect matches and then editing in all of the SV reads (after normalization) with a T/N_ prefix Then, get our Snarls count reads supporting each and genotype void somatic_caller_genotyper(VG* graph, string tumorgam, string normalgam);
real_t vg::geometric_sampling_prob_ln | ( | ProbIn | success_logprob, |
size_t | trials | ||
) |
Get the log probability for sampling the given value from a geometric distribution with the given success log probability. The geometric distribution is the distribution of the number of trials, with a given success probability, required to observe a single success.
unique_ptr< AlignmentEmitter > vg::get_alignment_emitter | ( | const string & | filename, |
const string & | format, | ||
const vector< tuple< path_handle_t, size_t, size_t >> & | paths, | ||
size_t | max_threads, | ||
const HandleGraph * | graph = nullptr , |
||
int | flags = ALIGNMENT_EMITTER_FLAG_NONE |
||
) |
Get an AlignmentEmitter that can emit to the given file (or "-") in the given format. When writing HTSlib formats (SAM, BAM, CRAM), paths should contain the paths in the linear reference in sequence dictionary order (see get_sequence_dictionary), and a PathPositionHandleGraph must be provided. When writing GAF, a HandleGraph must be provided for obtaining node lengths and sequences. Other formats do not need a graph.
flags is an ORed together set of flags from alignment_emitter_flags_t.
Automatically applies per-thread buffering, but needs to know how many OMP threads will be in use.
|
inline |
Get the annotation with the given name and return it. If not present, returns the Protobuf default value for the annotation type. The value may be a primitive type or an entire Protobuf object. It is undefined behavior to read a value out into a different type than it was stored with.
|
inline |
Get the annotation with the given name and return it. If not present, returns the Protobuf default value for the annotation type. The value may be a primitive type or an entire Protobuf object. It is undefined behavior to read a value out into a different type than it was stored with.
double vg::get_avg_baseq | ( | const Edit & | edit, |
const string & | base_quals, | ||
size_t | position_in_read | ||
) |
Compute the average base quality of an edit. If the edit has no sequence or there are no base_quals given, then double_max is returned.
gbwtgraph::GFAParsingParameters vg::get_best_gbwtgraph_gfa_parsing_parameters | ( | ) |
Get the best configuration to use for the GBWTGraph library GFA parser, to best matcch the behavior of vg's GFA parser.
size_t vg::get_current_vmem_kb | ( | ) |
Get the current virtual memory size, in kb, or 0 if unsupported.
int64_t vg::get_file_size | ( | const string & | filename | ) |
double vg::get_fraction_of_ns | ( | const string & | seq | ) |
Return the number of Ns as a fraction of the total sequence length (or 0 if the sequence is empty)
gbwt::GBWT vg::get_gbwt | ( | const std::vector< gbwt::vector_type > & | paths | ) |
Transform the paths into a GBWT index. Primarily for testing.
void vg::get_input_file | ( | const string & | file_name, |
function< void(istream &)> | callback | ||
) |
Get a callback with an istream& to an open file. Handles "-" as a filename as indicating standard input. The reference passed is guaranteed to be valid only until the callback returns.
void vg::get_input_file | ( | int & | optind, |
int | argc, | ||
char ** | argv, | ||
function< void(istream &)> | callback | ||
) |
Get a callback with an istream& to an open file if a file name argument is present after the parsed options, or print an error message and exit if one is not. Handles "-" as a filename as indicating standard input. The reference passed is guaranteed to be valid only until the callback returns. Bumps up optind to the next argument if a filename is found.
Warning: If you're reading a HandleGraph via VPKG::load_one (as is the pattern in vg) it is best to use get_input_file_name() below instead, and run load_one on that. This allows better GFA support because it allows memmapping the file directly
string vg::get_input_file_name | ( | int & | optind, |
int | argc, | ||
char ** | argv, | ||
bool | test_open = true |
||
) |
Parse out the name of an input file (i.e. the next positional argument), or throw an error. File name must be nonempty, but may be "-" or may not exist.
|
inline |
Get a reference to the reverse flag of a pos_t.
size_t vg::get_max_rss_kb | ( | ) |
Get the max RSS usage ever, in kb, or 0 if unsupported.
size_t vg::get_max_vmem_kb | ( | ) |
Get the max virtual memory size ever, in kb, or 0 if unsupported.
MIPayloadValues vg::get_minimizer_distances | ( | const SnarlDistanceIndex & | distance_index, |
pos_t | pos | ||
) |
bool vg::get_next_alignment_from_fastq | ( | gzFile | fp, |
char * | buffer, | ||
size_t | len, | ||
Alignment & | alignment | ||
) |
bool vg::get_next_alignment_pair_from_fastqs | ( | gzFile | fp1, |
gzFile | fp2, | ||
char * | buffer, | ||
size_t | len, | ||
Alignment & | mate1, | ||
Alignment & | mate2 | ||
) |
bool vg::get_next_interleaved_alignment_pair_from_fastq | ( | gzFile | fp, |
char * | buffer, | ||
size_t | len, | ||
Alignment & | mate1, | ||
Alignment & | mate2 | ||
) |
int64_t vg::get_num_samples | ( | const string & | vcf_filename | ) |
Get a reference to the offset field of a pos_t, which counts along the selected strand of the node.
double vg::get_or_estimate_coverage | ( | const hash_map< Haplotypes::Subchain::kmer_type, size_t > & | counts, |
const Recombinator::Parameters & | parameters, | ||
Haplotypes::Verbosity | verbosity | ||
) |
string vg::get_or_make_variant_id | ( | const vcflib::Variant & | variant | ) |
string vg::get_output_file_name | ( | int & | optind, |
int | argc, | ||
char ** | argv | ||
) |
Parse out the name of an output file (i.e. the next positional argument), or throw an error. File name must be nonempty.
std::vector<handle_t> vg::get_path | ( | const std::vector< handle_t > & | first, |
gbwt::node_type | second | ||
) |
std::vector<handle_t> vg::get_path | ( | gbwt::node_type | reverse_first, |
const std::vector< handle_t > & | second | ||
) |
string vg::get_proc_status_value | ( | const string & | name | ) |
Get the string value for a field in /proc/self/status by name, or "" if unsupported or not found.
vector< tuple< path_handle_t, size_t, size_t > > vg::get_sequence_dictionary | ( | const string & | filename, |
const vector< string > & | path_names, | ||
const PathPositionHandleGraph & | graph | ||
) |
Produce a list of path handles in a fixed order, suitable for use with get_alignment_emitter_with_surjection(), by parsing a file. The file may be an HTSlib-style "sequence dictionary" (consisting of SAM @SQ header lines), or a plain list of sequence names (which do not start with "@SQ"). If the file is not openable or contains no entries, reports an error and quits.
If path_names has entries, they are treated as path names that supplement those in the file, if any.
If the filename is itself an empty string, and no path names are passed, then all reference-sense paths from the graph will be collected in arbitrary order. If there are none, all non-alt-allele generic sense paths from the graph will be collected in arbitrary order.
TODO: Be able to generate the autosomes human-sort, X, Y, MT order typical of references.
The tuple is <path, path length in graph, base path length> For a subpath (ie chr1[1000-10000]) the base path length would be that of chr1 This information needs to come from the user in order to be correct, but if it's not specified, it'll be guessed from the graph
int vg::get_thread_count | ( | void | ) |
Return the number of threads that OMP will produce for a parallel section. TODO: Assumes that this is the same for every parallel section.
SnarlTraversal vg::get_traversal_of_snarl | ( | VG & | graph, |
const Snarl * | snarl, | ||
const SnarlManager & | manager, | ||
const Path & | path | ||
) |
Given a path (which may run either direction through a snarl, or not touch the ends at all), collect a list of NodeTraversals in order for the part of the path that is inside the snarl, in the same orientation as the path.
vector< int > vg::get_traversal_order | ( | const PathHandleGraph * | graph, |
const vector< Traversal > & | traversals, | ||
const vector< string > & | trav_path_names, | ||
const vector< int > & | ref_travs, | ||
int64_t | ref_trav_idx, | ||
const vector< bool > & | use_traversal | ||
) |
sort the traversals, putting the reference first then using names traversals masked out by use_traversal will be filrtered out entirely (so the output vector may be smaller than the input...)
void vg::getReachableBridges | ( | stCactusEdgeEnd * | edgeEnd1, |
stList * | bridgeEnds | ||
) |
Get the bridge ends that form boundary pairs with edgeEnd1. Duplicated from the pinchesAndCacti tests.
void vg::getReachableBridges2 | ( | stCactusEdgeEnd * | edgeEnd1, |
stHash * | bridgeEndsToBridgeNodes, | ||
stList * | bridgeEnds | ||
) |
Get the bridge ends that form boundary pairs with edgeEnd1, using the given getBridgeEdgeEndsToBridgeNodes hash map. Duplicated from the pinchesAndCacti tests.
double vg::golden_section_search | ( | const function< double(double)> & | f, |
double | x_min, | ||
double | x_max, | ||
double | tolerance | ||
) |
Returns a local maximum of a function within an interval.
string vg::graph_interval_to_string | ( | const HandleGraph * | graph, |
const handle_t & | start_handle, | ||
const handle_t & | end_handle | ||
) |
void vg::graph_to_gfa | ( | const PathHandleGraph * | graph, |
ostream & | out, | ||
const set< string > & | rgfa_paths = {} , |
||
bool | rgfa_pline = false , |
||
bool | use_w_lines = true |
||
) |
Export the given VG graph to the given GFA file. Express paths mentioned in rgfa_paths as rGFA. If rgfa_pline is set, also express them as dedicated lines. If use_w_lines is set, reference and haplotype paths will use W lines instead of P lines.
size_t vg::guess_parallel_gbwt_jobs | ( | size_t | node_count, |
size_t | haplotype_count, | ||
size_t | available_memory, | ||
size_t | batch_size | ||
) |
void vg::handle_full_length | ( | const HandleGraph & | graph, |
std::vector< GaplessExtension > & | result, | ||
double | overlap_threshold | ||
) |
pair< stCactusGraph *, stList * > vg::handle_graph_to_cactus | ( | const PathHandleGraph & | graph, |
const unordered_set< string > & | hint_paths, | ||
bool | single_component | ||
) |
|
inline |
Convert handle_t to gbwt::node_type.
vector< Alignment > vg::haplotype_consistent_alignments | ( | const multipath_alignment_t & | multipath_aln, |
const haplo::ScoreProvider & | score_provider, | ||
size_t | soft_count, | ||
size_t | hard_count, | ||
bool | optimal_first = false |
||
) |
Finds all alignments consistent with haplotypes available by incremental search with the given haplotype score provider. Pads to a certain count with haplotype-inconsistent alignments that are population-scorable (i.e. use only edges used by some haplotype in the index), and then with unscorable alignments if scorable ones are unavailable. This may result in an empty vector.
Output Alignments may not be unique. The input multipath_alignment_t may have exponentially many ways to spell the same Alignment, and we will look at all of them. We also may have duplicates of the optimal alignment if we are asked to produce it unconsitionally.
Note: Assumes that each subpath's Path object uses one Mapping per node and that start subpaths have been identified
Args: multipath_aln multipath alignment to find optimal paths through score_provider a haplo::ScoreProvider that supports incremental search over its haplotype database (such as a GBWTScoreProvider) soft_count maximum number of haplotype-inconsistent alignments to pad to hard_count maximum number of alignments, including haplotype-consistent (0 if no limit) optimal_first always compute and return first the optimal alignment, even if not haplotype-consistent
|
inline |
Returns true if the Protobuf object has an annotation with this name.
bool vg::has_inversion | ( | const Graph & | graph | ) |
returns true if we find an edge that may specify an inversion
const string vg::hash_alignment | ( | const Alignment & | aln | ) |
tuple< double, double, double > vg::hash_to_rgb | ( | const string & | str, |
double | min_sum | ||
) |
bool vg::have_input_file | ( | int & | optind, |
int | argc, | ||
char ** | argv | ||
) |
Return true if there's a command line argument (i.e. input file name) waiting to be processed.
bam_hdr_t * vg::hts_file_header | ( | string & | filename, |
string & | header | ||
) |
int vg::hts_for_each | ( | string & | filename, |
function< void(Alignment &)> | lambda | ||
) |
int vg::hts_for_each | ( | string & | filename, |
function< void(Alignment &)> | lambda, | ||
const PathPositionHandleGraph * | graph | ||
) |
int vg::hts_for_each_parallel | ( | string & | filename, |
function< void(Alignment &)> | lambda | ||
) |
int vg::hts_for_each_parallel | ( | string & | filename, |
function< void(Alignment &)> | lambda, | ||
const PathPositionHandleGraph * | graph | ||
) |
bam_hdr_t * vg::hts_string_header | ( | string & | header, |
const map< string, int64_t > & | path_length, | ||
const map< string, string > & | rg_sample | ||
) |
bam_hdr_t * vg::hts_string_header | ( | string & | header, |
const vector< pair< string, int64_t >> & | path_order_and_length, | ||
const map< string, string > & | rg_sample | ||
) |
void vg::identify_start_subpaths | ( | multipath_alignment_t & | multipath_aln | ) |
Finds the start subpaths (i.e. the source nodes of the multipath DAG) and stores them in the 'start' field of the multipath_alignment_t
double vg::identity | ( | const Path & | path | ) |
void vg::in_place_subvector | ( | std::vector< Element > & | vec, |
size_t | head, | ||
size_t | tail | ||
) |
std::string vg::insert_gbwt_path | ( | MutablePathHandleGraph & | graph, |
const gbwt::GBWT & | gbwt_index, | ||
gbwt::size_type | id, | ||
std::string | path_name = "" |
||
) |
Insert a GBWT thread into the graph and return its name. Returns an empty string on failure. If a path name is specified and not empty, that name will be used for the inserted path. NOTE: id is a gbwt path id, not a gbwt sequence id.
size_t vg::integer_power | ( | size_t | x, |
size_t | power | ||
) |
size_t vg::integer_power | ( | uint64_t | base, |
uint64_t | exponent | ||
) |
Computes base^exponent in log(exponent) time.
size_t vg::interval_length | ( | std::pair< size_t, size_t > | interval | ) |
bool vg::is_all_n | ( | const string & | seq | ) |
Return True if the given string is entirely Ns of either case, and false otherwise.
|
inline |
Return true if a pos_t is unset.
bool vg::is_gzipped | ( | const string & | filename | ) |
bool vg::is_id_sortable | ( | const Graph & | graph | ) |
returns true if the graph is id-sortable (no reverse links)
bool vg::is_match | ( | const Translation & | translation | ) |
bool vg::is_number | ( | const std::string & | s | ) |
bool vg::is_number | ( | const string & | s | ) |
|
inline |
Return true if a pos_t is on the reverse strand of its node.
bool vg::isATGC | ( | const char & | b | ) |
Return true if a character is an uppercase A, C, G, or T, and false otherwise.
|
inline |
|
inline |
Returns the number of bytes required for a kmer in KFF format.
std::string vg::kff_decode | ( | const uint8_t * | kmer, |
size_t | k, | ||
const std::string & | decoding | ||
) |
Decodes a kmer in KFF format according to the given encoding.
void vg::kff_decode | ( | uint8_t | byte, |
size_t | chars, | ||
const std::string & | decoding, | ||
std::string & | output | ||
) |
std::vector< uint8_t > vg::kff_encode | ( | const std::string & | kmer, |
const uint8_t * | encoding | ||
) |
Encodes a kmer in KFF format according to the given encoding. Non-ACGT characters are encoded as 0s.
uint8_t vg::kff_encode | ( | const std::string & | kmer, |
size_t | start, | ||
size_t | limit, | ||
const uint8_t * | encoding | ||
) |
uint8_t vg::kff_get | ( | const uint8_t * | kmer, |
size_t | i | ||
) |
std::string vg::kff_invert | ( | const uint8_t * | encoding | ) |
Inverts the KFF encoding into a packed -> char table.
bool vg::kff_is_trivial | ( | const uint8_t * | encoding | ) |
Returns true
if the encoding is trivial (0, 1, 2, 3).
uint64_t vg::kff_parse | ( | const uint8_t * | data, |
size_t | bytes | ||
) |
Parses a big-endian integer from KFF data.
gbwtgraph::Key64::value_type vg::kff_recode | ( | const uint8_t * | kmer, |
size_t | k, | ||
kff_recoding_t | recoding | ||
) |
Recodes a KFF kmer in the minimizer index format according to the given encoding. Will fail silently if k
is too large or recoding
is not from kff_recoding()
.
std::vector< gbwtgraph::Key64::value_type > vg::kff_recode | ( | const uint8_t * | kmers, |
size_t | n, | ||
size_t | k, | ||
kff_recoding_t | recoding | ||
) |
Recodes n
KFF kmers in the minimizer index format according to the given encoding. Will fail silently if k
is too large or recoding
is not from kff_recoding()
.
std::vector< uint8_t > vg::kff_recode | ( | gbwtgraph::Key64::value_type | kmer, |
size_t | k, | ||
const uint8_t * | encoding | ||
) |
Recodes a kmer from a minimizer index in KFF format according to the given encoding.
uint8_t vg::kff_recode | ( | gbwtgraph::Key64::value_type | kmer, |
size_t | k, | ||
size_t | chars, | ||
const uint8_t * | encoding | ||
) |
gbwtgraph::Key64::value_type vg::kff_recode_trivial | ( | const uint8_t * | kmer, |
size_t | k, | ||
size_t | bytes | ||
) |
Recodes a KFF kmer in the minimizer index format, assuming that the encoding is the same. Will fail silently if k
or bytes
is too large.
kff_recoding_t vg::kff_recoding | ( | const uint8_t * | encoding | ) |
Returns a recoding for the given encoding.
std::vector< uint8_t > vg::kff_reverse_complement | ( | const uint8_t * | kmer, |
size_t | k, | ||
const uint8_t * | encoding | ||
) |
Returns the reverse complement of a KFF kmer.
void vg::kff_set | ( | std::vector< uint8_t > & | kmer, |
size_t | i, | ||
uint8_t | value | ||
) |
void vg::kmer_to_gcsa_kmers | ( | const kmer_t & | kmer, |
const gcsa::Alphabet & | alpha, | ||
const function< void(const gcsa::KMer &)> & | lambda | ||
) |
Convert the kmer_t to a set of gcsa2 binary kmers which are exposed via a callback.
size_t vg::kmp_search | ( | const char * | text, |
size_t | text_len, | ||
const char * | pattern, | ||
size_t | pattern_len, | ||
const vector< size_t > & | prefix_suffix_table | ||
) |
vector< pair< vector< gbwt::node_type >, gbwt::SearchState > > vg::list_haplotypes | ( | const HandleGraph & | graph, |
const gbwt::GBWT & | gbwt, | ||
handle_t | start, | ||
function< bool(const vector< gbwt::node_type > &)> | stop_fn | ||
) |
|
inline |
Convert a number ln to the same number log 10.
void vg::load_gbwt | ( | gbwt::DynamicGBWT & | index, |
const std::string & | filename, | ||
bool | show_progress | ||
) |
Load a dynamic GBWT from the file.
void vg::load_gbwt | ( | gbwt::GBWT & | index, |
const std::string & | filename, | ||
bool | show_progress | ||
) |
Load a compressed GBWT from the file.
void vg::load_gbwtgraph | ( | gbwtgraph::GBWTGraph & | graph, |
const std::string & | filename, | ||
bool | show_progress = false |
||
) |
Load GBWTGraph from the file. NOTE: Call graph.set_gbwt()
afterwards with the appropriate GBWT index.
void vg::load_gbz | ( | gbwt::GBWT & | index, |
gbwtgraph::GBWTGraph & | graph, | ||
const std::string & | filename, | ||
bool | show_progress | ||
) |
Load GBWT and GBWTGraph from the GBZ file.
void vg::load_gbz | ( | gbwtgraph::GBZ & | gbz, |
const std::string & | filename, | ||
bool | show_progress | ||
) |
Load GBZ from the file.
void vg::load_gbz | ( | gbwtgraph::GBZ & | gbz, |
const std::string & | gbwt_name, | ||
const std::string & | graph_name, | ||
bool | show_progress | ||
) |
Load GBZ from separate GBWT / GBWTGraph files.
void vg::load_gcsa | ( | gcsa::GCSA & | index, |
const std::string & | filename, | ||
bool | show_progress | ||
) |
Load GCSA from the file.
void vg::load_lcp | ( | gcsa::LCPArray & | lcp, |
const std::string & | filename, | ||
bool | show_progress | ||
) |
Load LCP array from the file.
void vg::load_minimizer | ( | gbwtgraph::DefaultMinimizerIndex & | index, |
const std::string & | filename, | ||
bool | show_progress | ||
) |
Load a minimizer index from the file.
void vg::load_r_index | ( | gbwt::FastLocate & | index, |
const std::string & | filename, | ||
bool | show_progress | ||
) |
Load an r-index from the file.
std::unordered_map< nid_t, std::pair< std::string, size_t > > vg::load_translation_back_map | ( | const gbwtgraph::GBWTGraph & | graph | ) |
Return a backwards mapping of chopped node to original segment position (id,offset pair)
Return a backwards mapping of chopped node to original segment position (id,offset pair) (mimicking logic and interface from function of same name in gbwt_helper.cpp)
unordered_map< nid_t, pair< string, size_t > > vg::load_translation_back_map | ( | HandleGraph & | graph, |
ifstream & | input_stream | ||
) |
Load a translation file (created with vg gbwt –translation) and return a backwards mapping of chopped node to original segment position (id,offset pair) NOTE: hopefully this is just a short-term hack, and we get a general interface baked into the handlegraphs themselves
std::unordered_map< std::string, std::vector< nid_t > > vg::load_translation_map | ( | const gbwtgraph::GBWTGraph & | graph | ) |
Return a mapping of the original segment ids to a list of chopped node ids.
Return a mapping of the original segment ids to a list of chopped node ids (mimicking logic and interface from function of same name in gbwt_helper.cpp)
unordered_map< string, vector< nid_t > > vg::load_translation_map | ( | ifstream & | input_stream | ) |
Load a translation file (created with vg gbwt –translation) and return a mapping original segment ids to a list of chopped node ids
|
inline |
Given the log10 of a value, retunr the log10 of (that value plus one).
|
inline |
Convert a number log 10 to the same number ln.
double vg::lognormal_pdf | ( | double | x, |
double | mu, | ||
double | sigma | ||
) |
Probability density function or log-normal distribution.
|
inline |
Add two probabilities (expressed as logprobs) together and return the result as a logprob.
|
inline |
Take the geometric mean of two logprobs.
|
inline |
Invert a logprob, and get the probability of its opposite.
Collection::value_type vg::logprob_sum | ( | const Collection & | collection | ) |
Compute the sum of the values in a collection, where the values are log probabilities and the result is the log of the total probability. Items must be convertible to/from doubles for math.
|
inline |
Convert a natural log probability of wrongness directly to a Phred quality score.
|
inline |
Convert natural log probability to a probability.
pos_t vg::make_pos_t | ( | const position_t & | pos | ) |
pos_t vg::make_pos_t | ( | gcsa::node_type | node | ) |
Create a pos_t from a gcsa node.
Create a pos_t from a Node ID, an orientation flag, and an offset along that strand of the node.
vector< size_t > vg::make_prefix_suffix_table | ( | const char * | pattern, |
size_t | len | ||
) |
|
inline |
Make seeds for Alignments based on their sequences.
|
inline |
Define seed generation for shuffling multipath alignments.
|
inline |
Make seeds for pairs of things we can make seeds for.
|
inline |
Make seeds for pointers to things we can make seeds for.
Support vg::make_support | ( | double | forward, |
double | reverse, | ||
double | quality | ||
) |
TBD Create a Support for the given forward and reverse coverage and quality.
SnarlDistanceIndex::TemporaryDistanceIndex vg::make_temporary_distance_index | ( | const HandleGraph * | graph, |
const HandleGraphSnarlFinder * | snarl_finder, | ||
size_t | size_limit | ||
) |
vector< Translation > vg::make_translation | ( | const HandleGraph * | graph, |
const map< pos_t, id_t > & | node_translation, | ||
const unordered_map< id_t, Path > & | added_nodes, | ||
const unordered_map< id_t, size_t > & | orig_node_sizes | ||
) |
Produce a graph Translation object from information about the editing process.
string vg::make_variant_id | ( | const vcflib::Variant & | variant | ) |
set<T> vg::map_keys_to_set | ( | const map< T, V > & | m | ) |
Container<Output> vg::map_over | ( | const Container< Input > & | in, |
const std::function< Output(const Input &)> & | lambda | ||
) |
We have a transforming map function that we can chain.
void vg::mapping_against_path | ( | Alignment & | alignment, |
const bam1_t * | b, | ||
const path_handle_t & | path, | ||
const PathPositionHandleGraph * | graph, | ||
bool | on_reverse_strand | ||
) |
Translate the CIGAR in the given BAM record into mappings in the given Alignment against the given path in the given graph.
void vg::mapping_cigar | ( | const Mapping & | mapping, |
vector< pair< int, char > > & | cigar | ||
) |
void vg::mapping_cigar | ( | const Mapping & | mapping, |
vector< pair< int, char >> & | cigar | ||
) |
bool vg::mapping_ends_in_deletion | ( | const Mapping & | m | ) |
int vg::mapping_from_length | ( | const Mapping & | m | ) |
int vg::mapping_from_length | ( | const path_mapping_t & | mapping | ) |
bool vg::mapping_is_match | ( | const Mapping & | m | ) |
bool vg::mapping_is_simple_match | ( | const Mapping & | m | ) |
bool vg::mapping_is_total_deletion | ( | const Mapping & | m | ) |
bool vg::mapping_is_total_insertion | ( | const Mapping & | m | ) |
const string vg::mapping_sequence | ( | const Mapping & | mp, |
const string & | node_seq | ||
) |
bool vg::mapping_starts_in_deletion | ( | const Mapping & | m | ) |
string vg::mapping_string | ( | const string & | source, |
const Mapping & | mapping | ||
) |
|
inline |
Convert Mapping to gbwt::node_type.
handle_t vg::mapping_to_handle | ( | const Mapping & | mapping, |
const HandleGraph & | graph | ||
) |
int vg::mapping_to_length | ( | const Mapping & | m | ) |
int vg::mapping_to_length | ( | const path_mapping_t & | mapping | ) |
void vg::match_backward | ( | GaplessExtension & | match, |
const std::string & | seq, | ||
gbwtgraph::view_type | target, | ||
uint32_t | mismatch_limit | ||
) |
size_t vg::match_forward | ( | GaplessExtension & | match, |
const std::string & | seq, | ||
gbwtgraph::view_type | target, | ||
uint32_t | mismatch_limit | ||
) |
void vg::match_initial | ( | GaplessExtension & | match, |
const std::string & | seq, | ||
gbwtgraph::view_type | target | ||
) |
vector< vector< double > > vg::matrix_invert | ( | const vector< vector< double >> & | A | ) |
vector< double > vg::matrix_multiply | ( | const vector< vector< double >> & | A, |
const vector< double > & | b | ||
) |
vector< vector< double > > vg::matrix_multiply | ( | const vector< vector< double >> & | A, |
const vector< vector< double >> & | B | ||
) |
|
inline |
Return the CDF of a max exponential with the given parameters.
double vg::max_exponential_log_likelihood | ( | const vector< double > & | x, |
double | rate, | ||
double | shape, | ||
double | location | ||
) |
The log likelihood of a max exponential with the given parameters on the given data.
double vg::median | ( | std::vector< int > & | v | ) |
pair< int64_t, int64_t > vg::mem_min_oriented_distances | ( | const MaximalExactMatch & | m1, |
const MaximalExactMatch & | m2 | ||
) |
vector< pair< gcsa::node_type, size_t > > vg::mem_node_start_positions | ( | const HandleGraph & | graph, |
const vg::MaximalExactMatch & | mem | ||
) |
get the handles that a mem covers
bool vg::mems_overlap | ( | const MaximalExactMatch & | mem1, |
const MaximalExactMatch & | mem2 | ||
) |
int vg::mems_overlap_length | ( | const MaximalExactMatch & | mem1, |
const MaximalExactMatch & | mem2 | ||
) |
const string vg::mems_to_json | ( | const vector< MaximalExactMatch > & | mems | ) |
Glom the second edit into the first, assuming adjacency.
void vg::merge_equivalent_traversals_in_graph | ( | MutablePathHandleGraph * | graph, |
const unordered_set< path_handle_t > & | selected_paths, | ||
bool | use_snarl_manager = false |
||
) |
For every top-level snarl in the graph, compute the traversal strings of every embedded path that spans it If two or more traversals share an allele string, then a "canoncial" path is chosen and all remaining paths are edited so that they share the exact same interval through the snarl as the canonical path's traversal. A path is considered "canoncial" if it's in the "selected_paths" and the other paths are not (otherwise the lowest name is used as a fallback)
Note: this doesn't modify the graph toplogy, so uncovered nodes and edges as a result of path editing would usually need removale with vg clip afterwards
the use_snarl_manager toggles between distnace index and snarl manager for computing snarls (adding this option to (hopefully) temporarily revert to the snarl manager for performance reasons)
void vg::merge_non_branching_subpaths | ( | multipath_alignment_t & | multipath_aln, |
const unordered_set< size_t > * | prohibited_merges = nullptr |
||
) |
Merges non-branching paths in a multipath alignment in place Does not assume topological order among subpaths
void* vg::mergeNodeObjects | ( | void * | a, |
void * | b | ||
) |
string vg::middle_signature | ( | const Alignment & | aln, |
int | len | ||
) |
pair< string, string > vg::middle_signature | ( | const Alignment & | aln1, |
const Alignment & | aln2, | ||
int | len | ||
) |
pair< int64_t, int64_t > vg::min_oriented_distances | ( | const unordered_map< path_handle_t, vector< pair< size_t, bool > > > & | path_offsets1, |
const unordered_map< path_handle_t, vector< pair< size_t, bool > > > & | path_offsets2 | ||
) |
Find the min distance in the path offsets where the path orientation is the same and different.
|
inline |
Returns the reverse complement of a minimizer index kmer.
size_t vg::minimum_distance | ( | const SnarlDistanceIndex & | distance_index, |
pos_t | pos1, | ||
pos_t | pos2, | ||
bool | unoriented_distance, | ||
const HandleGraph * | graph | ||
) |
size_t vg::modular_exponent | ( | uint64_t | base, |
uint64_t | exponent, | ||
uint64_t | modulus | ||
) |
Computes base^exponent mod modulus in log(exponent) time without requiring more than 64 bits to represent exponentiated number
real_t vg::multinomial_censored_sampling_prob_ln | ( | const vector< ProbIn > & | probs, |
const unordered_map< vector< bool >, int > & | obs | ||
) |
Get the log probability for sampling any actual set of category counts that is consistent with the constraints specified by obs, using the per-category probabilities defined in probs.
Obs maps from a vector of per-category flags (called a "class") to a number of items that might be in any of the flagged categories.
For example, if there are two equally likely categories, and one item flagged as potentially from either category, the probability of sampling a set of category counts consistent with that constraint is 1. If instead there are three equally likely categories, and one item flagged as potentially from two of the three but not the third, the probability of sampling a set of category counts consistent with that constraint is 2/3.
|
inline |
Compute the number of ways to select k_1, k_2, ... k_i items into i buckets from a collection of n distinguishable items, ignoring order. All of the items have to go into the buckets, so all k_i must sum to n. To compute choose you have to call this function with a 2-element vector, to represent the chosen and not-chosen buckets. Returns the natural log of the (integer) result.
TODO: Turns out we don't actually need this for the ambiguous multinomial after all.
real_t vg::multinomial_sampling_prob_ln | ( | const vector< ProbIn > & | probs, |
const vector< int > & | obs | ||
) |
Get the probability for sampling the counts in obs from a set of categories weighted by the probabilities in probs. Works for both double and real_t probabilities. Also works for binomials.
int vg::non_match_end | ( | const Alignment & | alignment | ) |
int vg::non_match_start | ( | const Alignment & | alignment | ) |
string vg::nonATGCNtoN | ( | const string & | s | ) |
T vg::normal_pdf | ( | T | x, |
T | m = 0.0 , |
||
T | s = 1.0 |
||
) |
void vg::normalize_alignment | ( | Alignment & | alignment | ) |
Merge adjacent edits of the same type and convert all N matches to mismatches.
size_t vg::num_connected_components | ( | const multipath_alignment_t & | multipath_aln | ) |
Returns the number of connected components in the multipath alignment.
Get the offset along the selected strand of the node from a pos_t.
Two Snarls are unequal if they are not equal.
bool vg::operator!= | ( | const SnarlTraversal & | a, |
const SnarlTraversal & | b | ||
) |
Two SnarlTraversals are unequal if they are not equal.
bool vg::operator!= | ( | const Transcript & | lhs, |
const Transcript & | rhs | ||
) |
Two Visits are unequal if they are not equal.
Scale a Support by a factor, the other way
Scale a Support by a factor.
Scale a Support by a factor, in place.
Add two Support values together, accounting for strand.
VectorView<Item>::const_iterator vg::operator+ | ( | typename VectorView< Item >::const_iterator::difference_type | a, |
const typename VectorView< Item >::const_iterator & | b | ||
) |
Allow VectorView iterators to be added to numbers.
Divide a Support by a factor.
Divide a Support by a factor, in place.
bool vg::operator< | ( | const MaximalExactMatch & | m1, |
const MaximalExactMatch & | m2 | ||
) |
bool vg::operator< | ( | const SnarlTraversal & | a, |
const SnarlTraversal & | b | ||
) |
Support less-than, based on total coverage.
bool vg::operator< | ( | const Transcript & | lhs, |
const Transcript & | rhs | ||
) |
ostream & vg::operator<< | ( | ostream & | os, |
const Counts & | counts | ||
) |
ostream & vg::operator<< | ( | ostream & | out, |
const BenchmarkResult & | result | ||
) |
Benchmark results can be output to streams
ostream & vg::operator<< | ( | ostream & | out, |
const BitString & | bs | ||
) | -> ostream & |
Allow BitStrings to be printed for debugging.
ostream& vg::operator<< | ( | ostream & | out, |
const MaximalExactMatch & | mem | ||
) |
|
inline |
Print a NodeSide to a stream.
|
inline |
Print the given NodeTraversal.
ostream & vg::operator<< | ( | ostream & | out, |
mapping_t | mapping | ||
) |
Allow a mapping_t to be printed, for debugging purposes.
|
inline |
|
inline |
Print a pos_t to a stream.
std::ostream & vg::operator<< | ( | std::ostream & | out, |
const WFAAlignment::Edit & | edit | ||
) |
Allow printing an Edit.
std::ostream& vg::operator<< | ( | std::ostream & | out, |
PathBranch | branch | ||
) |
bool vg::operator== | ( | const MaximalExactMatch & | m1, |
const MaximalExactMatch & | m2 | ||
) |
Two Snarls are equal if their types are equal and their bounding Visits are equal and their parents are equal.
bool vg::operator== | ( | const SnarlTraversal & | a, |
const SnarlTraversal & | b | ||
) |
Two SnarlTraversals are equal if their snarls are equal and they have the same number of visits and all their visits are equal.
bool vg::operator== | ( | const Transcript & | lhs, |
const Transcript & | rhs | ||
) |
Support greater-than, based on total coverage.
void vg::optimal_alignment | ( | const multipath_alignment_t & | multipath_aln, |
Alignment & | aln_out, | ||
bool | subpath_global = false |
||
) |
Stores the highest scoring alignment contained in the multipath_alignment_t in an Alignment
Note: Assumes that each subpath's Path object uses one Mapping per node and that start subpaths have been identified
Args: multipath_aln multipath alignment to find optimal path through aln_out empty alignment to store optimal alignment in (data will be overwritten if not empty) subpath_global if true, only allows alignments that source subpath to sink subpath in the multipath DAG, else allows any start and end subpath
int32_t vg::optimal_alignment_internal | ( | const multipath_alignment_t & | multipath_aln, |
Alignment * | aln_out, | ||
bool | subpath_global | ||
) |
int32_t vg::optimal_alignment_score | ( | const multipath_alignment_t & | multipath_aln, |
bool | subpath_global = false |
||
) |
Returns the score of the highest scoring alignment contained in the multipath_alignment_t
Note: Assumes that each subpath's Path object uses one Mapping per node and that start subpaths have been identified
Args: multipath_aln multipath alignment to find optimal score in subpath_global if true, only allows alignments that source subpath to sink subpath in the multipath DAG, else allows any start and end subpath
vector< Alignment > vg::optimal_alignments | ( | const multipath_alignment_t & | multipath_aln, |
size_t | count | ||
) |
Returns the top k highest-scoring alignments contained in the multipath_alignment_t. Note that some or all of these may be duplicate Alignments, which were spelled out by tracebacks through different sequences of subpaths that shared alignment material.
If the best alignment is no alignment (i.e. the read is unmapped), returns an empty vector.
Note: Assumes that each subpath's Path object uses one Mapping per node and that start subpaths have been identified
Args: multipath_aln multipath alignment to find optimal paths through count maximum number of top alignments to return
vector< Alignment > vg::optimal_alignments_with_disjoint_subpaths | ( | const multipath_alignment_t & | multipath_aln, |
size_t | count | ||
) |
Finds k or fewer top-scoring alignments using only distinct subpaths. Asymmetrical: the optimal alignment for each end subpath is found, greedily, subject to the constraint, but the other subpaths are first-come first-serve. Also, distinct subpaths may not guarantee distinct actual alignments, so alignments may need deduplication.
If the best alignment is no alignment (i.e. the read is unmapped), returns an empty vector.
Note: Assumes that each subpath's Path object uses one Mapping per node and that start subpaths have been identified
Args: multipath_aln multipath alignment to find optimal paths through count maximum number of top alignments to return
void vg::output_graph_with_embedded_paths | ( | ostream & | subgraph_ostream, |
vector< pair< thread_t, int >> & | haplotype_list, | ||
const HandleGraph & | source, | ||
bool | json | ||
) |
Graph vg::output_graph_with_embedded_paths | ( | vector< pair< thread_t, int >> & | haplotype_list, |
const HandleGraph & | source | ||
) |
void vg::output_haplotype_counts | ( | ostream & | annotation_ostream, |
vector< pair< thread_t, int >> & | haplotype_list | ||
) |
unordered_map< id_t, id_t > vg::overlay_node_translations | ( | const unordered_map< id_t, id_t > & | over, |
const unordered_map< id_t, id_t > & | under | ||
) |
Compose the translations from two graph operations, neither of which involved oriented transformations.
unordered_map< id_t, pair< id_t, bool > > vg::overlay_node_translations | ( | const unordered_map< id_t, id_t > & | over, |
const unordered_map< id_t, pair< id_t, bool >> & | under | ||
) |
Compose the translations from two graph operations, the first of which involved oriented transformations.
unordered_map< id_t, pair< id_t, bool > > vg::overlay_node_translations | ( | const unordered_map< id_t, pair< id_t, bool >> & | over, |
const unordered_map< id_t, id_t > & | under | ||
) |
Compose the translations from two graph operations, the second of which involved oriented transformations.
unordered_map< id_t, pair< id_t, bool > > vg::overlay_node_translations | ( | const unordered_map< id_t, pair< id_t, bool >> & | over, |
const unordered_map< id_t, pair< id_t, bool >> & | under | ||
) |
Compose the translations from two graph operations, both of which involved oriented transformations.
Result vg::parse | ( | const char * | arg | ) |
Parse a command-line argument C string. Exits with an error if the string does not contain exactly an item of the appropriate type.
Result vg::parse | ( | const string & | arg | ) |
Parse a command-line argument string. Exits with an error if the string does not contain exactly an item of the appropriate type.
bool vg::parse | ( | const string & | arg, |
double & | dest | ||
) |
bool vg::parse | ( | const string & | arg, |
pos_t & | dest | ||
) |
bool vg::parse | ( | const string & | arg, |
Result & | dest | ||
) |
Parse the appropriate type from the string to the destination value. Return true if parsing is successful and false (or throw something) otherwise.
bool vg::parse | ( | const string & | arg, |
std::regex & | dest | ||
) |
|
inline |
Parse a range as start[:end[:step]].
bool vg::parse | ( | const string & | arg | ) |
void vg::parse_bed_regions | ( | const string & | bed_path, |
vector< Region > & | out_regions, | ||
vector< string > * | out_names | ||
) |
void vg::parse_bed_regions | ( | istream & | bedstream, |
const PathPositionHandleGraph * | graph, | ||
const std::function< void(Alignment &)> & | callback | ||
) |
Parse regions from the given BED file and call the given callback with each. Does not write them to standard output. Reads the optional name, is_reverse, and score fields if present, and populates the relevant Alignment fields. Skips and warns about malformed or illegal BED records.
void vg::parse_bed_regions | ( | istream & | bedstream, |
const PathPositionHandleGraph * | graph, | ||
vector< Alignment > * | out_alignments | ||
) |
Parse regions from the given BED file into the given vector. Does not write them to standard output.
void vg::parse_gff_regions | ( | istream & | gtfstream, |
const PathPositionHandleGraph * | graph, | ||
const std::function< void(Alignment &)> & | callback | ||
) |
Parse regions from the given GFF file and call the given callback with each. Does not write them to standard output.
void vg::parse_gff_regions | ( | istream & | gtfstream, |
const PathPositionHandleGraph * | graph, | ||
vector< Alignment > * | out_alignments | ||
) |
Parse regions from the given GFF file into the given vector. Does not write them to standard output.
void vg::parse_region | ( | const string & | target, |
string & | name, | ||
int64_t & | start, | ||
int64_t & | end | ||
) |
|
inline |
void vg::parse_rg_sample_map | ( | char * | hts_header, |
map< string, string > & | rg_sample | ||
) |
Populate a mapping from read group to sample name, given the text BAM header.
void vg::parse_tid_path_handle_map | ( | const bam_hdr_t * | hts_header, |
const PathHandleGraph * | graph, | ||
map< int, path_handle_t > & | tid_path_handle | ||
) |
Populate a mapping from target ID number to path handle in the given graph, given a parsed BAM header. The graph may be null. Missing target paths in the graph produce no warning or error and no map entry.
std::vector< std::string > vg::parseGenotypes | ( | const std::string & | vcf_line, |
size_t | num_samples | ||
) |
std::vector<std::vector<gbwt::size_type> > vg::partition_gbwt_sequences | ( | const gbwt::GBWT & | gbwt_index, |
const std::unordered_map< nid_t, size_t > & | node_to_job, | ||
size_t | num_jobs | ||
) |
int vg::path_from_length | ( | const Path & | path | ) |
int vg::path_from_length | ( | const path_t & | path | ) |
Path vg::path_from_node_traversals | ( | const list< NodeTraversal > & | traversals | ) |
Path vg::path_from_path_handle | ( | const PathHandleGraph & | graph, |
path_handle_t | path_handle | ||
) |
Path vg::path_from_thread_t | ( | thread_t & | t, |
const HandleGraph & | source | ||
) |
bool vg::path_is_simple_match | ( | const Path & | p | ) |
vg::id_t vg::path_node | ( | const gbwt::vector_type & | path, |
size_t | i | ||
) |
gbwt::vector_type vg::path_predecessors | ( | const PathHandleGraph & | graph, |
const std::string & | path_name | ||
) |
Find all predecessor nodes of the path, ignoring self-loops. If the path does not exist, it is treated as empty.
bool vg::path_reverse | ( | const gbwt::vector_type & | path, |
size_t | i | ||
) |
bool vg::path_reverse | ( | const vector< pair< vg::id_t, bool >> & | path, |
size_t | i | ||
) |
string vg::path_sequence | ( | const HandleGraph & | graph, |
const Path & | path | ||
) |
size_t vg::path_size | ( | const gbwt::vector_type & | path | ) |
size_t vg::path_size | ( | const vector< pair< vg::id_t, bool >> & | path | ) |
int vg::path_to_length | ( | const Path & | path | ) |
int vg::path_to_length | ( | const path_t & | path | ) |
string vg::path_to_string | ( | Path | p | ) |
string vg::percent_url_encode | ( | const string & | seq | ) |
Escape "%" to "%25".
double vg::Phi | ( | double | x | ) |
The standard normal cumulative distribution function.
double vg::Phi_inv | ( | double | p | ) |
Inverse CDF of a standard normal distribution. Must have 0 < quantile < 1.
|
inline |
Add two probabilities (expressed as phred scores) together and return the result as a phred score.
double vg::phred_for_at_least_one | ( | size_t | p, |
size_t | n | ||
) |
Assume that we have n independent random events that occur with probability p each (p is interpreted as a real number between 0 at 0 and 1 at its maximum value). Return an approximate probability for at least one event occurring as a phred score.
n must be <= MAX_AT_LEAST_ONE_EVENTS.
Assume that we have n <= MAX_AT_LEAST_ONE_EVENTS independent events with probability p each. Let x be the AT_LEAST_ONE_PRECISION most significant bits of p. Then
phred_at_least_one[(n << AT_LEAST_ONE_PRECISION) + x]
is an approximate phred score of at least one event occurring.
We exploit the magical thread-safety of static local initialization to fill this in exactly once when needed.
|
inline |
Take the geometric mean of two phred-encoded probabilities.
Collection::value_type vg::phred_sum | ( | const Collection & | collection | ) |
Compute the sum of the values in a collection, where the values are Phred scores and the result is the Phred score of the total probability. Items must be convertible to/from doubles for math.
std::iterator_traits<Iterator>::value_type vg::phred_sum | ( | const Iterator & | begin_it, |
const Iterator & | end_it | ||
) |
Compute the sum of the values in a collection, represented by an iterator range, where the values are Phred scores and the result is the Phred score of the total probability. Items must be convertible to/from doubles for math.
|
inline |
Convert a Phred quality score directly to a natural log probability of wrongness.
|
inline |
Convert floating point Phred quality score to probability of wrongness.
double vg::phred_to_prob | ( | uint8_t | phred | ) |
Convert 8-bit Phred quality score to probability of wrongness, using a lookup table.
vector<T> vg::pmax | ( | const std::vector< T > & | a, |
const std::vector< T > & | b | ||
) |
Container<const Item*> vg::pointerfy | ( | const Container< Item > & | in | ) |
We have a wrapper of that to turn a container reference into a container of pointers.
Compute the log probability of a Poisson-distributed process: observed events in an interval where expected events happen on average.
void vg::populate_path_from_traceback | ( | const multipath_alignment_t & | multipath_aln, |
const MultipathProblem & | problem, | ||
TracebackIterator | traceback_start, | ||
TracebackIterator | traceback_end, | ||
Path * | output | ||
) |
void vg::populate_snarl_index | ( | SnarlDistanceIndex::TemporaryDistanceIndex & | temp_index, |
pair< SnarlDistanceIndex::temp_record_t, size_t > | snarl_index, | ||
size_t | size_limit, | ||
const HandleGraph * | graph | ||
) |
Record the distance
Add the next node to the priority queue
Check the minimum length of the snarl passing through this node
|
inline |
Extract gbwt::node_type from pos_t.
pos_t vg::position_at | ( | PathPositionHandleGraph * | graph_ptr, |
const string & | path_name, | ||
const size_t & | path_offset, | ||
bool | is_reverse | ||
) |
We have a helper function to convert path positions and orientations to pos_t values.
We have a utility function for turning positions along paths, with orientations, into pos_ts. Remember that pos_t counts offset from the start of the reoriented node, while here we count offset from the beginning of the forward version of the path.
void vg::preflight_check | ( | ) |
Run a preflight check to make sure that the system is usable for this build of vg. Aborts with a helpful message if this is not the case. We make sure to build it for a lowest-common-denominator architecture.
void vg::present_kmers | ( | const std::vector< std::vector< HaplotypePartitioner::kmer_type >> & | sequences, |
std::vector< std::pair< HaplotypePartitioner::kmer_type, size_t >> & | all_kmers, | ||
sdsl::bit_vector & | kmers_present | ||
) |
void vg::printId | ( | vg::id_t | id | ) |
double vg::prob_for_at_least_one | ( | size_t | p, |
size_t | n | ||
) |
Assume that we have n independent random events that occur with probability p each (p is interpreted as a real number between 0 at 0 and 1 at its maximum value). Return an approximate probability for at least one event occurring as a raw probability.
n must be <= MAX_AT_LEAST_ONE_EVENTS.
Assume that we have n <= MAX_AT_LEAST_ONE_EVENTS independent events with probability p each. Let x be the AT_LEAST_ONE_PRECISION most significant bits of p. Then
prob_at_least_one[(n << AT_LEAST_ONE_PRECISION) + x]
is an approximate probability of at least one event occurring.
We exploit the magical thread-safety of static local initialization to fill this in exactly once when needed.
|
inline |
Convert a probability to a natural log probability.
|
inline |
Convert probability of wrongness to integer Phred quality score.
string vg::pseudo_random_sequence | ( | size_t | length, |
uint64_t | seed | ||
) |
Returns a uniformly random DNA sequence sequence deterministically from a seed.
string vg::random_sequence | ( | size_t | length | ) |
Returns a uniformly random DNA sequence of the given length.
default_random_engine vg::random_sequence_gen | ( | 102 | ) |
vector< size_t > vg::range_vector | ( | size_t | begin, |
size_t | end | ||
) |
Vector containing positive integer values in [begin, end)
|
inline |
Vector containing positive integer values in [0, end)
gbwt::GBWT vg::rebuild_gbwt | ( | const gbwt::GBWT & | gbwt_index, |
const std::vector< RebuildJob > & | jobs, | ||
const std::unordered_map< nid_t, size_t > & | node_to_job, | ||
const RebuildParameters & | parameters | ||
) |
Rebuild the GBWT by applying all provided mappings. Each mapping is a pair (original subpath, new subpath). If the original subpath is empty, the mapping is ignored. If there are multiple applicable mappings, the first one will be used.
The mappings will be applied in both orientations. The reverse mapping replaces the reverse of the original subpath with the reverse of the new subpath.
The first and the last node can be used as context. For example (aXb, aYb) can be interpreted as "replace X with Y in context a b". If both subpaths end with the same node, the cursor will point at that node after the mapping. Otherwise the cursor will be set past the original subpath.
NOTE: To avoid infinite loops, the cursor will proceed after a mapping of the type (a, Xa).
The process can be partitioned into multiple non-overlapping jobs, each of them corresponding to one or more weakly connected components in the graph. Multiple jobs can be run in parallel using 2 threads each, and the jobs will be started from the largest to the smallest.
node_to_job
maps each node identifier to the corresponding job identifier. Empty paths go to the first job, but this can be overridden by including gbwt::ENDMARKER
in node_to_job
.
NOTE: Paths may be reordered if there are multiple jobs. Old path ids are no longer valid after rebuilding the GBWT.
NOTE: This could use the ConstructionJob / MetadataBuilder scheme for parallelization, but it would change the interface.
gbwt::GBWT vg::rebuild_gbwt | ( | const gbwt::GBWT & | gbwt_index, |
const std::vector< RebuildJob::mapping_type > & | mappings | ||
) |
As the general rebuild_gbwt
, but always using a single job with default parameters.
gbwt::GBWT vg::rebuild_gbwt_job | ( | const gbwt::GBWT & | gbwt_index, |
const RebuildJob & | job, | ||
size_t | job_id, | ||
const std::vector< gbwt::size_type > & | sequences, | ||
const RebuildParameters & | parameters | ||
) |
void vg::recombinator_sanity_checks | ( | const Recombinator::Parameters & | parameters | ) |
vector< double > vg::regress | ( | const vector< vector< double >> & | X, |
vector< double > & | y | ||
) |
Returns the coefficients of a regression (does not automatically compute constant)
void vg::remove_duplicate_edges | ( | Graph & | graph | ) |
remove duplicate edges
void vg::remove_duplicate_nodes | ( | Graph & | graph | ) |
remove duplicate nodes
void vg::remove_duplicates | ( | Graph & | graph | ) |
remove duplicate nodes and edges
void vg::remove_duplicates | ( | std::vector< GaplessExtension > & | result | ) |
void vg::remove_empty_alignment_sections | ( | multipath_alignment_t & | multipath_aln | ) |
Removes all edit, mappings, and subpaths that have no aligned bases, and introduces transitive edges to preserve connectivity through any completely removed subpaths
void vg::remove_low_scoring_sections | ( | multipath_alignment_t & | multipath_aln, |
int32_t | max_score_diff | ||
) |
Removes all subpaths and edges whose optimal full length alignment is less than the given difference from the highest-scoring full length alignment
void vg::remove_orphan_edges | ( | Graph & | graph | ) |
remove edges that link to a node that is not in the graph
void vg::remove_paths | ( | Graph & | graph, |
const function< bool(const string &)> & | paths_to_take, | ||
std::list< Path > * | matching | ||
) |
string vg::replace_in_string | ( | string | subject, |
const string & | search, | ||
const string & | replace | ||
) |
void vg::report_exception | ( | const std::exception & | ex | ) |
User code should call this if it catches an exception it doesn't know what to do with.
void vg::rev_comp_multipath_alignment | ( | const multipath_alignment_t & | multipath_aln, |
const function< int64_t(int64_t)> & | node_length, | ||
multipath_alignment_t & | rev_comp_out | ||
) |
Stores the reverse complement of a multipath_alignment_t in another multipath_alignment_t
Args: multipath_aln multipath alignment to reverse complement node_length a function that returns the length of a node sequence from its node ID rev_comp_out empty multipath alignment to store reverse complement in (some data may be overwritten if not empty)
void vg::rev_comp_multipath_alignment_in_place | ( | multipath_alignment_t * | multipath_aln, |
const function< int64_t(int64_t)> & | node_length | ||
) |
Stores the reverse complement of a multipath_alignment_t in another multipath_alignment_t
Args: multipath_aln multipath alignment to reverse complement in place node_length a function that returns the length of a node sequence from its node ID
|
inline |
Stores the reverse complement of a Subpath in another Subpath
note: this is not included in the header because reversing a subpath without going through the multipath alignment can break invariants related to the edge lists
Args: subpath subpath to reverse complement node_length a function that returns the length of a node sequence from its node ID rev_comp_out empty subpath to store reverse complement in (data will be overwritten if not empty)
Reverse a pos_t and get a pos_t at the same point between bases, going the other direction. To get a pos_t to the same base, subtract 1 from the resulting offset or call reverse_base_pos().
Reverse a pos_t and get a pos_t at the same base, going the other direction.
char vg::reverse_complement | ( | const char & | c | ) |
string vg::reverse_complement | ( | const string & | seq | ) |
Alignment vg::reverse_complement_alignment | ( | const Alignment & | aln, |
const function< int64_t(id_t)> & | node_length | ||
) |
void vg::reverse_complement_alignment_in_place | ( | Alignment * | aln, |
const function< int64_t(id_t)> & | node_length | ||
) |
vector< Alignment > vg::reverse_complement_alignments | ( | const vector< Alignment > & | alns, |
const function< int64_t(int64_t)> & | node_length | ||
) |
void vg::reverse_complement_in_place | ( | string & | seq | ) |
Mapping vg::reverse_complement_mapping | ( | const Mapping & | m, |
const function< int64_t(id_t)> & | node_length | ||
) |
path_mapping_t vg::reverse_complement_mapping | ( | const path_mapping_t & | m, |
const function< int64_t(id_t)> & | node_length | ||
) |
void vg::reverse_complement_mapping_in_place | ( | Mapping * | m, |
const function< int64_t(id_t)> & | node_length | ||
) |
void vg::reverse_complement_mapping_in_place | ( | path_mapping_t * | m, |
const function< int64_t(id_t)> & | node_length | ||
) |
Path vg::reverse_complement_path | ( | const Path & | path, |
const function< int64_t(id_t)> & | node_length | ||
) |
path_t vg::reverse_complement_path | ( | const path_t & | path, |
const function< int64_t(id_t)> & | node_length | ||
) |
void vg::reverse_complement_path_in_place | ( | Path * | path, |
const function< int64_t(id_t)> & | node_length | ||
) |
void vg::reverse_complement_path_in_place | ( | path_t * | path, |
const function< int64_t(id_t)> & | node_length | ||
) |
BenchmarkResult vg::run_benchmark | ( | const string & | name, |
size_t | iterations, | ||
const function< void(void)> & | setup, | ||
const function< void(void)> & | under_test | ||
) |
Run a benchmark with a setup function.
BenchmarkResult vg::run_benchmark | ( | const string & | name, |
size_t | iterations, | ||
const function< void(void)> & | under_test | ||
) |
Run the given function the given number of times, interleaved with runs of the control function, and return a BenchmarkResult describing its performance.
tuple<MultipathProblem, int64_t, int32_t> vg::run_multipath_dp | ( | const multipath_alignment_t & | multipath_aln, |
bool | subpath_global = false , |
||
bool | forward = true |
||
) |
Internal helper function for running the dynamic programming problem represented by a multipath alignment. Returns the filled DP problem, the optimal ending subpath, or -1 if no subpath is optimal, and the optimal score, or 0 if no score is optimal. An option toggles whether the traceback should be global (a source to a sink in the multipath DAG) or local (starting and ending at any subpath)
void vg::sa_to_da | ( | std::vector< HaplotypePartitioner::sequence_type > & | sequences, |
const gbwt::FastLocate & | r_index | ||
) |
int32_t vg::sam_flag | ( | const Alignment & | alignment, |
bool | on_reverse_strand, | ||
bool | paired | ||
) |
void vg::save_gbwt | ( | const gbwt::DynamicGBWT & | index, |
const std::string & | filename, | ||
bool | show_progress | ||
) |
Save a dynamic GBWT to the file.
void vg::save_gbwt | ( | const gbwt::GBWT & | index, |
const std::string & | filename, | ||
bool | show_progress | ||
) |
Save a compressed GBWT to the file.
void vg::save_gbwtgraph | ( | const gbwtgraph::GBWTGraph & | graph, |
const std::string & | filename, | ||
bool | show_progress | ||
) |
Save GBWTGraph to the file.
void vg::save_gbz | ( | const gbwt::GBWT & | index, |
gbwtgraph::GBWTGraph & | graph, | ||
const std::string & | filename, | ||
bool | show_progress | ||
) |
Save GBWT and GBWTGraph to the GBZ file.
void vg::save_gbz | ( | const gbwtgraph::GBZ & | gbz, |
const std::string & | filename, | ||
bool | show_progress | ||
) |
Save GBZ to the file.
void vg::save_gbz | ( | const gbwtgraph::GBZ & | gbz, |
const std::string & | gbwt_name, | ||
const std::string & | graph_name, | ||
bool | show_progress | ||
) |
Save GBZ to separate GBWT / GBWTGraph files.
void vg::save_gcsa | ( | const gcsa::GCSA & | index, |
const std::string & | filename, | ||
bool | show_progress | ||
) |
Save GCSA to the file.
void vg::save_lcp | ( | const gcsa::LCPArray & | lcp, |
const std::string & | filename, | ||
bool | show_progress | ||
) |
Save LCP array to the file.
void vg::save_minimizer | ( | const gbwtgraph::DefaultMinimizerIndex & | index, |
const std::string & | filename, | ||
bool | show_progress | ||
) |
Save a minimizer index to the file.
void vg::save_r_index | ( | const gbwt::FastLocate & | index, |
const std::string & | filename, | ||
bool | show_progress | ||
) |
Save an r-index to the file.
int32_t vg::score_gap | ( | size_t | gap_length, |
int32_t | gap_open, | ||
int32_t | gap_extension | ||
) |
Score a gap with the given open and extension scores.
vector< tuple< int64_t, int64_t, int64_t, int64_t > > vg::search_multipath_alignment | ( | const multipath_alignment_t & | multipath_aln, |
const pos_t & | graph_pos, | ||
int64_t | seq_pos | ||
) |
Returns all of the positions where a given sequence index occurs at a given graph graph position (if any), where positions are represented as tuples of (subpath index, mapping index, edit index, index within edit)
std::vector<std::pair<size_t, double> > vg::select_diploid | ( | const Haplotypes::Subchain & | subchain, |
const std::vector< std::pair< size_t, double >> & | candidates, | ||
const std::vector< std::pair< Recombinator::kmer_presence, double >> & | kmer_types | ||
) |
std::vector<std::pair<size_t, double> > vg::select_haplotypes | ( | const Haplotypes::Subchain & | subchain, |
const hash_map< Haplotypes::Subchain::kmer_type, size_t > & | kmer_counts, | ||
double | coverage, | ||
Recombinator::Statistics * | statistics, | ||
std::vector< Recombinator::LocalHaplotype > * | local_haplotypes, | ||
const Recombinator::Parameters & | parameters | ||
) |
|
inline |
Set the annotation with the given name to the given value. The value may be a primitive type or a vector of a primitive type.
|
inline |
Set the annotation with the given name to the given value. The value may be a primitive type or a vector of a primitive type.
void vg::set_crash_context | ( | const std::string & | message | ) |
User code should call this when it has context for a failure in its thread.
|
inline |
void vg::set_score | ( | GaplessExtension & | extension, |
const Aligner * | aligner | ||
) |
const std::string vg::sha1head | ( | const std::string & | data, |
size_t | head | ||
) |
const std::string vg::sha1sum | ( | const std::string & | data | ) |
string vg::signature | ( | const Alignment & | aln | ) |
Merge adjacent edits of the same type, strip leading and trailing deletion edits (while updating positions if necessary), and makes sure position is actually set.
Simplify the path for addition as new material in the graph. Remove any mappings that are merely single deletions, merge adjacent edits of the same type, strip leading and trailing deletion edits on mappings, and make sure no mappings have missing positions.
void vg::simplify_cigar | ( | vector< pair< int, char >> & | cigar | ) |
Merge runs of successive I/D operations into a single I and D, remove 0-length operations, and merge adjacent operations of the same type
bool vg::simplify_filtered_edits | ( | HandleGraph * | graph, |
Alignment & | aln, | ||
Path & | path, | ||
const map< pos_t, id_t > & | node_translation, | ||
const unordered_map< id_t, size_t > & | orig_node_sizes, | ||
double | min_baseq = 0 , |
||
double | max_frac_n = 1. |
||
) |
Remove edits in our graph that don't correspond to breakpoints (ie were effectively filtered out due to insufficient coverage. This way, subsequent logic in add_nodes_and_edges can be run correctly. Returns true if at least one edit survived the filter.
double vg::slope | ( | const std::vector< double > & | x, |
const std::vector< double > & | y | ||
) |
int vg::softclip_end | ( | const Alignment & | alignment | ) |
int vg::softclip_end | ( | const Mapping & | mapping | ) |
int vg::softclip_start | ( | const Alignment & | alignment | ) |
int vg::softclip_start | ( | const Mapping & | mapping | ) |
int vg::softclip_trim | ( | Alignment & | alignment | ) |
void vg::sort_by_id | ( | Graph & | graph | ) |
order the nodes and edges in the graph by id
void vg::sort_by_id_dedup_and_clean | ( | Graph & | graph | ) |
remove duplicates and sort by id
void vg::sort_edges_by_id | ( | Graph & | graph | ) |
order the edges in the graph by id pairs
void vg::sort_nodes_by_id | ( | Graph & | graph | ) |
order the nodes in the graph by id
bool vg::sort_pair_by_second | ( | const pair< uint32_t, uint32_t > & | lhs, |
const pair< uint32_t, uint32_t > & | rhs | ||
) |
std::vector<size_t> vg::sort_permutation | ( | const Iterator & | begin, |
const Iterator & | end | ||
) |
Get the index permutation that sorts the given items ascending using <.
std::vector<size_t> vg::sort_permutation | ( | const Iterator & | begin, |
const Iterator & | end, | ||
const std::function< bool(const typename Iterator::value_type &, const typename Iterator::value_type &)> & | comparator | ||
) |
Get the index permutation that sorts the given items with the given comparator instead of <.
void vg::sort_shuffling_ties | ( | RandomIt | begin, |
RandomIt | end, | ||
Compare | comp | ||
) |
Sort the items between the two given random-access iterators, as with std::sort. Deterministically shuffle the ties, if any, at the top end, using automatic seed determination as defined by a make_shuffle_seed() overload for the collection's item type.
void vg::sort_shuffling_ties | ( | RandomIt | begin, |
RandomIt | end, | ||
Compare | comp, | ||
LazyRNG & | rng | ||
) |
Sort the items between the two given random-access iterators, as with std::sort. Deterministically shuffle the ties, if any, at the top end.
bool vg::sort_transcript_paths_by_name | ( | const CompletedTranscriptPath & | lhs, |
const CompletedTranscriptPath & | rhs | ||
) |
std::vector< std::string > vg::split_delims | ( | const std::string & | s, |
const std::string & | delims, | ||
size_t | max_cuts | ||
) |
std::vector< std::string > & vg::split_delims | ( | const std::string & | s, |
const std::string & | delims, | ||
std::vector< std::string > & | elems, | ||
size_t | max_cuts | ||
) |
pair< string, string > vg::split_ext | ( | const string & | filename | ) |
Split off the extension from a filename and return both parts.
pair<pair<path_t, int32_t>, pair<path_t, int32_t> > vg::split_splice_segment | ( | const Alignment & | splice_segment, |
const tuple< int64_t, int64_t, int64_t > & | left_trace, | ||
const tuple< int64_t, int64_t, int64_t > & | right_trace, | ||
int64_t | splice_junction_idx, | ||
const GSSWAligner & | scorer, | ||
const HandleGraph & | graph | ||
) |
std::vector< size_t > vg::stack_permutations | ( | const std::vector< size_t > & | bottom, |
const std::vector< size_t > & | top | ||
) |
Apply one permutation on top of another. Retutn the combined permutation.
void vg::stacktrace_manually | ( | ostream & | out, |
int | signalNumber, | ||
void * | ip, | ||
void ** | bp | ||
) |
bool vg::start_backward | ( | const Chain & | chain | ) |
Return true if the first snarl in the given chain is backward relative to the chain.
bool vg::starts_with | ( | const std::string & | value, |
const std::string & | prefix | ||
) |
Check if a string starts with another string.
double vg::stdev | ( | const T & | v | ) |
void vg::stubbify_ref_paths | ( | MutablePathMutableHandleGraph * | graph, |
const vector< string > & | ref_prefixes, | ||
int64_t | min_fragment_len, | ||
bool | verbose | ||
) |
stubbify reference
int vg::sub_overlaps_of_first_aln | ( | const vector< Alignment > & | alns, |
float | overlap_fraction | ||
) |
void vg::subgraph_containing_path_snarls | ( | const SnarlDistanceIndex & | distance_index, |
const HandleGraph * | graph, | ||
const Path & | path, | ||
std::unordered_set< nid_t > & | subgraph | ||
) |
void vg::subgraph_in_distance_range | ( | const SnarlDistanceIndex & | distance_index, |
const Path & | path, | ||
const HandleGraph * | super_graph, | ||
size_t | min_distance, | ||
size_t | max_distance, | ||
std::unordered_set< nid_t > & | subgraph, | ||
bool | look_forward | ||
) |
void vg::subgraph_in_distance_range_walk_across_chain | ( | const SnarlDistanceIndex & | distance_index, |
const HandleGraph * | super_graph, | ||
std::unordered_set< nid_t > & | subgraph, | ||
net_handle_t | current_node, | ||
size_t | current_distance, | ||
vector< pair< handle_t, size_t >> & | search_start_nodes, | ||
hash_set< pair< nid_t, bool >> & | seen_nodes, | ||
const size_t & | min_distance, | ||
const size_t & | max_distance, | ||
bool | checked_loop | ||
) |
void vg::subgraph_in_distance_range_walk_graph | ( | const HandleGraph * | super_graph, |
size_t | min_distance, | ||
size_t | max_distance, | ||
std::unordered_set< nid_t > & | subgraph, | ||
vector< pair< handle_t, size_t >> & | start_nodes, | ||
hash_set< pair< nid_t, bool >> & | seen_nodes, | ||
const pair< nid_t, bool > & | traversal_start | ||
) |
Helper for subgraph_in_distance_range Given starting handles in the super graph and the distances to each handle (including the start position and
vector<size_t> vg::subpath_topological_order | ( | const multipath_alignment_t & | multipath_aln, |
bool | do_index | ||
) |
Return either the vector of topological order by index or the vector of indexes within the topological order.
|
inline |
Collection::value_type vg::sum | ( | const Collection & | collection | ) |
Compute the sum of the values in a collection. Values must be default- constructable (like numbers are).
SummaryStatistics vg::summary_statistics | ( | const std::map< Number, size_t > & | values | ) |
Returns summary statistics for a multiset of numbers.
Get the maximum support of a pair of Supports, by taking the max in each orientation.
Get the minimum support of a pair of Supports, by taking the min in each orientation.
std::vector<HaplotypePartitioner::kmer_type> vg::take_unique_minimizers | ( | const std::string & | sequence, |
const HaplotypePartitioner::minimizer_index_type & | minimizer_index | ||
) |
Alignment vg::target_alignment | ( | const PathPositionHandleGraph * | graph, |
const path_handle_t & | path, | ||
size_t | pos1, | ||
size_t | pos2, | ||
const string & | feature, | ||
bool | is_reverse | ||
) |
Make an Alignment corresponding to a subregion of a stored path. Positions are 0-based, and pos2 is excluded. Respects path circularity, so pos2 < pos1 is not a problem. If pos1 == pos2, returns an empty alignment.
Alignment vg::target_alignment | ( | const PathPositionHandleGraph * | graph, |
const path_handle_t & | path, | ||
size_t | pos1, | ||
size_t | pos2, | ||
const string & | feature, | ||
bool | is_reverse, | ||
Mapping & | cigar_mapping | ||
) |
void vg::thread_to_graph_spanned | ( | thread_t & | t, |
Graph & | g, | ||
const HandleGraph & | source | ||
) |
std::vector< gbwt::size_type > vg::threads_for_contig | ( | const gbwt::GBWT & | gbwt_index, |
const std::string & | contig_name | ||
) |
Return the list of thread ids / gbwt path ids for the given contig.
std::vector< gbwt::size_type > vg::threads_for_sample | ( | const gbwt::GBWT & | gbwt_index, |
const std::string & | sample_name | ||
) |
Return the list of thread ids / gbwt path ids for the given sample.
|
inline |
Convert a snarl traversal into an alignment.
edge_t vg::to_edge | ( | const HandleGraph & | graph, |
const Visit & | v1, | ||
const Visit & | v2 | ||
) |
Make an edge_t from a pair of visits.
int vg::to_length | ( | const Mapping & | m | ) |
|
inline |
void vg::to_multipath_alignment | ( | const Alignment & | aln, |
multipath_alignment_t & | multipath_aln_out | ||
) |
Converts a Alignment into a multipath_alignment_t with one subpath and stores it in an object
Args: aln alignment to convert multipath_aln empty multipath alignment to store converted alignment in (data may be be overwritten if not empty)
|
inline |
Converts a Visit to a NodeTraversal. Throws an exception if the Visit is of a Snarl instead of a Node
|
inline |
void vg::to_proto_mapping | ( | const path_mapping_t & | mapping, |
Mapping & | proto_mapping | ||
) |
void vg::to_proto_multipath_alignment | ( | const multipath_alignment_t & | multipath_aln, |
MultipathAlignment & | proto_multipath_aln_out | ||
) |
Convert an STL-based multipath_alignment_t to a protobuf MultipathAlignment.
|
inline |
Converts a Visit to a NodeTraversal in the opposite orientation. Throws an exception if the Visit is of a Snarl instead of a Node
|
inline |
|
inline |
Converts a NodeTraversal to a Visit in the opposite orientation.
|
inline |
|
inline |
std::string vg::to_string | ( | handle_t | handle | ) |
std::string vg::to_string_gbwtgraph | ( | gbwt::node_type | node | ) |
Returns a string representation of a GBWTGraph node.
std::string vg::to_string_gbwtgraph | ( | handle_t | handle | ) |
Returns a string representation of a GBWTGraph handle.
string vg::to_vcf_genotype | ( | const Genotype & | gt | ) |
Get a VCF-style 1/2, 1|2|3, etc. string from a Genotype.
|
inline |
Make a Visit from a handle in a HandleGraph.
|
inline |
Converts a NodeTraversal to a Visit.
Make a Visit from a node ID and an orientation.
void vg::topologically_order_subpaths | ( | multipath_alignment_t & | multipath_aln | ) |
Put subpaths in topological order (assumed to be true for other algorithms)
string vg::toUppercase | ( | const string & | s | ) |
void vg::toUppercaseInPlace | ( | string & | s | ) |
void vg::trace_haplotypes_and_paths | ( | const PathHandleGraph & | source, |
const gbwt::GBWT & | haplotype_database, | ||
vg::id_t | start_node, | ||
int | extend_distance, | ||
Graph & | out_graph, | ||
map< string, int > & | out_thread_frequencies, | ||
bool | expand_graph | ||
) |
pair< tuple< int64_t, int64_t, int64_t >, vector< tuple< int64_t, int64_t, int64_t, int64_t > > > vg::trace_path | ( | const multipath_alignment_t & | multipath_aln, |
const Path & | path, | ||
int64_t | subpath_idx, | ||
int64_t | mapping_idx, | ||
int64_t | edit_idx, | ||
int64_t | base_idx, | ||
bool | search_left, | ||
int64_t | search_limit | ||
) |
Returns a pair of (mapping, edit, base) and possibly multiple (subpath, mapping, edit, base),of the furthest position that can be traced through the multipath alignment along the pathstarting the indicated position in the multipath alignment. The path can be traced rightward starting at the beginning, or leftward starting. Search is limited to not passing a given mapping on the path.
bool vg::transcript_file_nonempty | ( | const string & | transcripts | ) |
void vg::transfer_between_proto_annotation | ( | const ProtoAlignment1 & | from, |
ProtoAlignment2 & | to | ||
) |
Copies the boundary Visits from one Snarl into another.
void vg::transfer_from_proto_annotation | ( | const ProtoAlignment & | from, |
multipath_alignment_t & | to | ||
) |
void vg::transfer_proto_metadata | ( | const Alignment & | from, |
MultipathAlignment & | to | ||
) |
Transfer the annotations that are carried with the Protobuf formats but not the internal multipath_alignment_t (and which therefore get lost when using it as an intermediate format).
void vg::transfer_proto_metadata | ( | const MultipathAlignment & | from, |
Alignment & | to | ||
) |
void vg::transfer_read_metadata | ( | const Alignment & | from, |
multipath_alignment_t & | to | ||
) |
All functions of this form transfer:
void vg::transfer_read_metadata | ( | const multipath_alignment_t & | from, |
Alignment & | to | ||
) |
void vg::transfer_read_metadata | ( | const multipath_alignment_t & | from, |
multipath_alignment_t & | to | ||
) |
void vg::transfer_read_metadata | ( | const multipath_alignment_t & | from, |
MultipathAlignment & | to | ||
) |
void vg::transfer_read_metadata | ( | const MultipathAlignment & | from, |
multipath_alignment_t & | to | ||
) |
void vg::transfer_to_proto_annotation | ( | const multipath_alignment_t & | from, |
ProtoAlignment & | to | ||
) |
void vg::transfer_uniform_metadata | ( | const Alignment1 & | from, |
Alignment2 & | to | ||
) |
vector< MaximalExactMatch > vg::translate_mems | ( | const vector< MaximalExactMatch > & | mems, |
const unordered_map< id_t, pair< id_t, bool > > & | trans | ||
) |
Switches the node ids in the path to the ones indicated by the translator.
void vg::translate_node_ids | ( | Path & | path, |
const unordered_map< id_t, id_t > & | translator, | ||
id_t | cut_node, | ||
size_t | bases_removed, | ||
bool | from_right | ||
) |
Replaces the node IDs in the path with the ones indicated by the translator. Supports a single cut node in the source graph, where the given number of bases of the given node were removed from its left or right side when making the source graph from the destination graph.
void vg::translate_nodes | ( | Alignment & | a, |
const unordered_map< id_t, pair< id_t, bool > > & | ids, | ||
const std::function< size_t(int64_t)> & | node_length | ||
) |
void vg::translate_oriented_node_ids | ( | Path & | path, |
const function< pair< id_t, bool >(id_t)> & | translator | ||
) |
Switches node ids and orientations in the path to the ones indicated by the translator.
void vg::translate_oriented_node_ids | ( | Path & | path, |
const unordered_map< id_t, pair< id_t, bool >> & | translator | ||
) |
Switches the node ids and orientations in the path to the ones indicated by the translator.
void vg::translate_oriented_node_ids | ( | path_t & | path, |
const function< pair< id_t, bool >(id_t)> & | translator | ||
) |
void vg::translate_oriented_node_ids | ( | path_t & | path, |
const unordered_map< id_t, pair< id_t, bool >> & | translator | ||
) |
vector< vector< double > > vg::transpose | ( | const vector< vector< double >> & | A | ) |
A shitty set of linear algebra functions.
string vg::traversal_to_string | ( | const PathHandleGraph * | graph, |
const Traversal & | traversal, | ||
int64_t | max_steps | ||
) |
string vg::traversal_to_string | ( | VG & | graph, |
const SnarlTraversal & | path | ||
) |
Make a SnarlTraversal into the string it represents, including notes for nested child snarls.
Alignment vg::trim_alignment | ( | const Alignment & | aln, |
const Position & | pos1, | ||
const Position & | pos2 | ||
) |
bool vg::trim_mismatches | ( | GaplessExtension & | extension, |
const gbwtgraph::CachedGBWTGraph & | graph, | ||
const Aligner & | aligner | ||
) |
bool vg::trim_path | ( | path_t * | path, |
bool | from_left, | ||
int64_t | mapping_idx, | ||
int64_t | edit_idx, | ||
int64_t | base_idx | ||
) |
tuple< pos_t, int64_t, int32_t > vg::trimmed_end | ( | const Alignment & | aln, |
int64_t | len, | ||
bool | from_end, | ||
const HandleGraph & | graph, | ||
const GSSWAligner & | aligner | ||
) |
bool vg::uses_Us | ( | const Alignment & | alignment | ) |
Returns true if the alignment sequence contains any U's and false if the alignment sequence contains and T's. In the case that both T's and U's are included, responds according to whichever comes first. If the sequence contains neither U's nor T's, returns false.
bool vg::validate_multipath_alignment | ( | const multipath_alignment_t & | multipath_aln, |
const HandleGraph & | handle_graph | ||
) |
Debugging function to check that multipath alignment meets the formalism's basic invariants. Returns true if multipath alignment is valid, else false. Does not validate alignment score.
|
inline |
|
inline |
Cast a Protobuf generic Value to any type.
|
inline |
Cast a Protobuf generic Value to any type.
|
inline |
Cast any type to a generic Protobuf value.
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
void vg::variant_recall | ( | VG * | graph, |
vcflib::VariantCallFile * | vars, | ||
FastaReference * | ref_genome, | ||
vector< FastaReference * > | insertions, | ||
string | gamfile | ||
) |
run with : vg genotype -L -V v.vcf -I i.fa -R ref.fa
vector<string> vg::vcf_contigs | ( | const string & | filename | ) |
bool vg::verify_path | ( | const PathType & | path, |
MutableHandleGraph & | unfolded, | ||
const hash_map< vg::id_t, std::vector< vg::id_t >> & | reverse_mapping | ||
) |
void vg::view_multipath_alignment | ( | ostream & | out, |
const multipath_alignment_t & | multipath_aln, | ||
const HandleGraph & | handle_graph | ||
) |
Send a formatted string representation of the multipath_alignment_t into the ostream.
void vg::view_multipath_alignment_as_dot | ( | ostream & | out, |
const multipath_alignment_t & | multipath_aln, | ||
bool | show_graph | ||
) |
Converts a multipath_alignment_t to a GraphViz Dot representation, output to the given ostream.
void vg::visit_contained_snarls | ( | const PathPositionHandleGraph * | graph, |
const vector< Region > & | regions, | ||
SnarlManager & | snarl_manager, | ||
bool | include_endpoints, | ||
function< void(const Snarl *, step_handle_t, step_handle_t, int64_t, int64_t, bool, const Region *)> | visit_fn | ||
) |
Visit each snarl if it is fully contained in at least one region from the input set. Only the top-most snarl is visited. The parameters to visit_fn are: <the snarl, start_step, end_step, steps_reversed, the containing input region>
vector<T> vg::vpmax | ( | const std::vector< std::vector< T >> & | vv | ) |
|
inline |
Thomas Wang's integer hash function. In many implementations, std::hash is identity function for integers, which leads to performance issues.
|
inline |
Return the CDF of a max exponential with the given parameters.
double vg::weibull_log_likelihood | ( | const vector< double > & | x, |
double | scale, | ||
double | shape, | ||
double | location | ||
) |
Returns the log likelihood of some data generated by a Weibull distribution.
double vg::weighted_jaccard_coefficient | ( | const PathHandleGraph * | graph, |
const multiset< handle_t > & | target, | ||
const multiset< handle_t > & | query | ||
) |
pair< double, double > vg::wellford_mean_var | ( | size_t | count, |
double | mean, | ||
double | M2, | ||
bool | sample_variance | ||
) |
void vg::wellford_update | ( | size_t & | count, |
double & | mean, | ||
double & | M2, | ||
double | new_val | ||
) |
void vg::with_exception_handling | ( | const std::function< void(void)> & | body | ) |
User code should call this to get all its exceptions handled.
int32_t vg::worst_alignment_score | ( | const multipath_alignment_t & | multipath_aln | ) |
Returns the score of the lowest-scoring source-to-sink alignment in the multipath_alignment_t. Assumes that subpaths are topologically ordered and starts have been identified.
string vg::wrap_text | ( | const string & | str, |
size_t | width | ||
) |
void vg::write_alignment_to_file | ( | const Alignment & | aln, |
const string & | filename | ||
) |
void vg::write_fasta_sequence | ( | const std::string & | name, |
const std::string & | sequence, | ||
ostream & | os, | ||
size_t | width | ||
) |
void vg::write_gcsa_kmers | ( | const HandleGraph & | graph, |
int | kmer_size, | ||
ostream & | out, | ||
size_t & | size_limit, | ||
id_t | head_id, | ||
id_t | tail_id | ||
) |
Write GCSA2 formatted binary KMers to the given ostream. size_limit is the maximum size of the kmer file in bytes. When the function returns, size_limit is the size of the kmer file in bytes.
string vg::write_gcsa_kmers_to_tmpfile | ( | const HandleGraph & | graph, |
int | kmer_size, | ||
size_t & | size_limit, | ||
id_t | head_id, | ||
id_t | tail_id, | ||
const string & | base_file_name = "vg-kmers-tmp-" |
||
) |
Open a tempfile and write the kmers to it. The calling context should remove it with temp_file::remove(). In the case that the size limit is exceeded, throws a SizeLimitExceededException and deletes the temp file.
size_t vg::xg_index_size | ( | const xg::XG & | index | ) |
const char* const vg::BAM_DNA_LOOKUP = "=ACMGRSVTWYHKDBN" |
bool vg::fullTrace = false |
const char* vg::ISSUE_URL = "https://github.com/vgteam/vg/issues/new/choose" |
const alignment_index_t vg::NO_INDEX {std::numeric_limits<size_t>::max(), std::numeric_limits<size_t>::max(), std::numeric_limits<bool>::max()} |
Represents an unset index.
const read_alignment_index_t vg::NO_READ_INDEX = {std::numeric_limits<size_t>::infinity(), std::numeric_limits<size_t>::infinity()} |
Represents an unset index.
thread_local std::string vg::stored_crash_context |
const char* vg::var = "VG_FULL_TRACEBACK" |