vg
tools for working with variation graphs
|
#include <readfilter.hpp>
Public Member Functions | |
Counts | filter_alignment (Read &aln) |
int | filter (istream *alignment_stream) |
bool | trim_ambiguous_ends (Read &read, int k) const |
int | filter (istream *alignment_stream) |
int | filter (istream *alignment_stream) |
bool | trim_ambiguous_ends (Alignment &alignment, int k) const |
bool | trim_ambiguous_ends (MultipathAlignment &aln, int k) const |
Public Attributes | |
bool | complement_filter = false |
Actually take the complement of the filter. More... | |
vector< string > | name_prefixes |
bool | exact_name = false |
Read name must not have anything in it besides the prefix. More... | |
vector< regex > | excluded_refpos_contigs |
Read must not have a refpos set with a contig name containing a match to any of these. More... | |
vector< string > | subsequences |
Read must contain at least one of these strings as a subsequence. More... | |
unordered_set< string > | excluded_features |
double | min_secondary = numeric_limits<double>::lowest() |
double | min_primary = numeric_limits<double>::lowest() |
bool | rescore = false |
bool | frac_score = false |
bool | sub_score = false |
int | max_overhang = numeric_limits<int>::max() / 2 |
int | min_end_matches = numeric_limits<int>::min() / 2 |
bool | verbose = false |
double | min_mapq = numeric_limits<double>::lowest() |
int | repeat_size = 0 |
bool | drop_split = false |
Should we drop split reads that follow edges not in the graph? More... | |
double | downsample_probability = 1.0 |
We can also pseudorandomly drop reads. What's the probability that we keep a read? More... | |
uint32_t | downsample_seed_mask = 0 |
int | defray_length = 0 |
int | defray_count = 99999 |
Limit defray recursion to visit this many nodes. More... | |
bool | only_proper_pairs = true |
Filter to proper pairs. More... | |
bool | only_mapped = true |
Filter to only mapped reads. More... | |
int | threads = -1 |
Number of threads from omp. More... | |
int | buffer_size = 512 |
GAM output buffer size. More... | |
bool | write_output = true |
bool | write_tsv = false |
Sometimes we want to pick out fields and write a tsv instead of a gam;. More... | |
vector< string > | output_fields |
What fields do we want to write to the tsv? More... | |
const HandleGraph * | graph = nullptr |
A HandleGraph is required for some filters (Note: ReadFilter doesn't own/free this) More... | |
bool | interleaved = false |
Interleaved input. More... | |
bool | filter_on_all = false |
int | min_base_quality = numeric_limits<int>::min() / 2 |
double | min_base_quality_fraction = numeric_limits<double>::lowest() |
string | annotation_to_match = "" |
bool | only_correctly_mapped = false |
Filter to only correctly mapped reads. More... | |
Private Member Functions | |
bool | has_repeat (const Read &read, int k) const |
bool | is_mapped (const Read &read) const |
bool | trim_ambiguous_end (Alignment &alignment, int k) const |
bool | is_split (const Read &read) const |
bool | sample_read (const Read &read) const |
Alignment | to_alignment (const MultipathAlignment &multipath_aln) const |
double | get_score (const Read &read) const |
bool | matches_name (const Read &read) const |
bool | has_excluded_refpos (const Read &read) const |
bool | has_excuded_feature (const Read &read) const |
bool | is_secondary (const Read &read) const |
int | get_overhang (const Read &read) const |
int | alignment_overhang (const Alignment &aln) const |
int | get_end_matches (const Read &read) const |
int | alignment_end_matches (const Alignment &aln) const |
int | get_mapq (const Read &read) const |
double | get_min_base_qual_fraction (const Read &read) const |
bool | get_is_paired (const Read &read) const |
bool | is_proper_pair (const Read &read) const |
bool | contains_subsequence (const Read &read) const |
bool | matches_annotation (const Read &read) const |
bool | is_correctly_mapped (const Read &read) const |
void | emit (Read &read) |
void | emit (Read &read1, Read &read2) |
void | emit_tsv (Read &read) |
void | filter_internal (istream *in) |
Helper function for filter. More... | |
bool | is_mapped (const Alignment &alignment) const |
bool | is_mapped (const MultipathAlignment &mp_alignment) const |
bool | is_correctly_mapped (const Alignment &alignment) const |
bool | is_correctly_mapped (const MultipathAlignment &alignment) const |
void | emit (Alignment &aln) |
void | emit (Alignment &aln1, Alignment &aln2) |
void | emit (MultipathAlignment &mp_aln) |
void | emit (MultipathAlignment &mp_aln1, MultipathAlignment &mp_aln2) |
double | get_score (const Alignment &aln) const |
double | get_score (const MultipathAlignment &read) const |
bool | has_excluded_refpos (const MultipathAlignment &read) const |
bool | has_excluded_refpos (const Alignment &aln) const |
bool | is_secondary (const MultipathAlignment &mp_aln) const |
bool | is_secondary (const Alignment &aln) const |
int | get_overhang (const Alignment &aln) const |
int | get_overhang (const MultipathAlignment &mp_aln) const |
int | get_end_matches (const MultipathAlignment &read) const |
int | get_end_matches (const Alignment &read) const |
bool | is_split (const Alignment &alignment) const |
bool | is_split (const MultipathAlignment &mp_aln) const |
bool | get_is_paired (const Alignment &aln) const |
bool | get_is_paired (const MultipathAlignment &mp_aln) const |
void | emit_tsv (MultipathAlignment &read) |
void | emit_tsv (Alignment &read) |
Private Attributes | |
unique_ptr< AlignmentEmitter > | aln_emitter |
The twp specializations have different writing infrastructure. More... | |
unique_ptr< MultipathAlignmentEmitter > | mp_aln_emitter |
|
private |
internal helper for get_end_matches
|
private |
Internal helper for get_overhang
|
private |
Does the read contain at least one of the indicated sequences
|
inlineprivate |
|
inlineprivate |
|
inlineprivate |
|
inlineprivate |
|
private |
Write the read to stdout
|
private |
Write a read pair to stdout
|
inlineprivate |
|
inlineprivate |
|
private |
Write a tsv line for a read to stdout
int vg::ReadFilter< Read >::filter | ( | istream * | alignment_stream | ) |
Filter the alignments available from the given stream, placing them on standard output or in the appropriate file. Returns 0 on success, exit code to use on error.
|
inline |
|
inline |
Counts vg::ReadFilter< Read >::filter_alignment | ( | Read & | aln | ) |
Run all the filters on an alignment. The alignment may get modified in-place by the defray filter
|
private |
Helper function for filter.
Template implementations
|
inlineprivate |
|
inlineprivate |
|
private |
What is the shortest run of end matches on either end?
|
inlineprivate |
|
inlineprivate |
|
private |
Is the read paired?
|
private |
What is the read's mapping quality
|
private |
What fraction of the base qualities are at least as large as the min base quality
|
inlineprivate |
|
inlineprivate |
|
private |
How long are the read overhangs?
|
inlineprivate |
|
inlineprivate |
|
private |
Get the score indicated by the params
|
inlineprivate |
|
inlineprivate |
|
private |
Does the read match one of the excluded refpos contigs?
|
private |
Is the read annotated with any of the excluded features
|
private |
quick and dirty filter to see if removing reads that can slip around and still map perfectly helps vg call. returns true if at either end of read sequence, at least k bases are repetitive, checking repeats of up to size 2k
|
inlineprivate |
|
inlineprivate |
|
private |
Check if the alignment is marked as being correctly mapped
|
private |
|
private |
|
private |
Check if the alignment includes any aligned bases
|
private |
Is the read in a proper-mapped pair?
|
inlineprivate |
|
inlineprivate |
|
private |
Is the read a secondary alignments?
|
inlineprivate |
|
inlineprivate |
|
private |
Return false if the read only follows edges in the graph, and true if the read is split (or just incorrect) and takes edges not in the index.
Throws an error if no graph is specified.
|
private |
Does has the given annotation and does it match
|
private |
Does the read name have one of the indicated prefixes? If exact_name is set, only finds complete matches of a "prefix" to the whole read name.
|
private |
Based on the read name and paired-ness, compute the SAM-style QNAME and use that and the configured sampling probability and seed in the Samtools read sampling algorithm, to determine if the read should be kept. Returns true if the read should stay, and false if it should be removed. Always accepts or rejects paired reads together.
|
private |
Convert a multipath alignment to a single path
|
private |
Trim only the end of the given alignment, leaving the start alone. Two calls of this implement trim_ambiguous_ends above.
|
inline |
|
inline |
bool vg::ReadFilter< Read >::trim_ambiguous_ends | ( | Read & | read, |
int | k | ||
) | const |
Look at either end of the given alignment, up to k bases in from the end. See if that tail of the alignment is mapped such that another embedding in the given graph can produce the same sequence as the sequence along the embedding that the read actually has, and if so trim back the read.
In the case of softclips, the aligned portion of the read is considered, and if trimmign is required, the softclips are hard-clipped off.
Returns true if the read had to be modified, and false otherwise.
MUST NOT be called with a null index.
|
private |
The twp specializations have different writing infrastructure.
string vg::ReadFilter< Read >::annotation_to_match = "" |
A string formatted "annotation[.subfield]*:value" Value is optional if the key is a flag Used like jq select
int vg::ReadFilter< Read >::buffer_size = 512 |
GAM output buffer size.
bool vg::ReadFilter< Read >::complement_filter = false |
Actually take the complement of the filter.
int vg::ReadFilter< Read >::defray_count = 99999 |
Limit defray recursion to visit this many nodes.
int vg::ReadFilter< Read >::defray_length = 0 |
How far in from the end should we look for ambiguous end alignment to clip off?
double vg::ReadFilter< Read >::downsample_probability = 1.0 |
We can also pseudorandomly drop reads. What's the probability that we keep a read?
uint32_t vg::ReadFilter< Read >::downsample_seed_mask = 0 |
Samtools-compatible internal seed mask, for deciding which read pairs to keep. To be generated with rand() after srand() from the user-visible seed.
bool vg::ReadFilter< Read >::drop_split = false |
Should we drop split reads that follow edges not in the graph?
bool vg::ReadFilter< Read >::exact_name = false |
Read name must not have anything in it besides the prefix.
unordered_set<string> vg::ReadFilter< Read >::excluded_features |
If a read has one of the features in this set as annotations, the read is filtered out.
vector<regex> vg::ReadFilter< Read >::excluded_refpos_contigs |
Read must not have a refpos set with a contig name containing a match to any of these.
bool vg::ReadFilter< Read >::filter_on_all = false |
When outputting paired reads, fail the pair only if both (all) reads fail (true) instead of if either (any) read fails (false)
bool vg::ReadFilter< Read >::frac_score = false |
const HandleGraph* vg::ReadFilter< Read >::graph = nullptr |
A HandleGraph is required for some filters (Note: ReadFilter doesn't own/free this)
bool vg::ReadFilter< Read >::interleaved = false |
Interleaved input.
int vg::ReadFilter< Read >::max_overhang = numeric_limits<int>::max() / 2 |
int vg::ReadFilter< Read >::min_base_quality = numeric_limits<int>::min() / 2 |
double vg::ReadFilter< Read >::min_base_quality_fraction = numeric_limits<double>::lowest() |
int vg::ReadFilter< Read >::min_end_matches = numeric_limits<int>::min() / 2 |
double vg::ReadFilter< Read >::min_mapq = numeric_limits<double>::lowest() |
double vg::ReadFilter< Read >::min_primary = numeric_limits<double>::lowest() |
double vg::ReadFilter< Read >::min_secondary = numeric_limits<double>::lowest() |
|
private |
vector<string> vg::ReadFilter< Read >::name_prefixes |
Read name must have one of these prefixes, if any are present. TODO: This should be a trie but I don't have one handy. Must be sorted for vaguely efficient search.
bool vg::ReadFilter< Read >::only_correctly_mapped = false |
Filter to only correctly mapped reads.
bool vg::ReadFilter< Read >::only_mapped = true |
Filter to only mapped reads.
bool vg::ReadFilter< Read >::only_proper_pairs = true |
Filter to proper pairs.
vector<string> vg::ReadFilter< Read >::output_fields |
What fields do we want to write to the tsv?
int vg::ReadFilter< Read >::repeat_size = 0 |
bool vg::ReadFilter< Read >::rescore = false |
Should we rescore each alignment with default parameters and no e.g. haplotype info?
bool vg::ReadFilter< Read >::sub_score = false |
vector<string> vg::ReadFilter< Read >::subsequences |
Read must contain at least one of these strings as a subsequence.
int vg::ReadFilter< Read >::threads = -1 |
Number of threads from omp.
bool vg::ReadFilter< Read >::verbose = false |
bool vg::ReadFilter< Read >::write_output = true |
Sometimes we only want a report, and not a filtered gam. toggling off output speeds things up considerably.
bool vg::ReadFilter< Read >::write_tsv = false |
Sometimes we want to pick out fields and write a tsv instead of a gam;.