vg
tools for working with variation graphs
Public Member Functions | Static Public Member Functions | Public Attributes | Protected Member Functions | Protected Attributes | Friends | List of all members
vg::IndexRegistry Class Reference

#include <index_registry.hpp>

Public Member Functions

 IndexRegistry ()=default
 Constructor. More...
 
 ~IndexRegistry ()
 Destructor to clean up temp files. More...
 
 IndexRegistry (const IndexRegistry &other)=delete
 
IndexRegistryoperator= (const IndexRegistry &other)=delete
 
 IndexRegistry (IndexRegistry &&other)
 
IndexRegistryoperator= (IndexRegistry &&other)
 
void set_prefix (const string &prefix)
 Prefix for all saved outputs. More...
 
string get_prefix () const
 Get the current prefix for saving output files. More...
 
void set_intermediate_file_keeping (bool keep_intermediates)
 
void register_index (const IndexName &identifier, const string &suffix)
 Register an index containing the given identifier. More...
 
void register_index (const IndexName &identifier, const vector< string > &suffixes)
 
RecipeName register_recipe (const vector< IndexName > &identifiers, const vector< IndexName > &input_identifiers, const RecipeFunc &exec)
 
void register_generalization (const RecipeName &generalizer, const RecipeName &generalizee)
 
void provide (const IndexName &identifier, const string &filename, const map< string, string > &scopes={})
 
void provide (const IndexName &identifier, const vector< string > &filenames, const map< string, string > &scopes={})
 
void reset (const IndexName &identifier)
 Remove a provided index. More...
 
bool available (const IndexName &identifier) const
 
bool predates (const IndexName &earlier, const IndexName &later) const
 
vector< string > get_possible_filenames (const IndexName &identifier) const
 
vector< string > require (const IndexName &identifier) const
 
void set_target_memory_usage (int64_t bytes)
 
int64_t get_target_memory_usage () const
 Get the maximum memory we will try to consume. More...
 
vector< IndexNamecompleted_indexes () const
 Get a list of all indexes that have already been completed or provided. More...
 
void make_indexes (const vector< IndexName > &identifiers)
 
string to_dot () const
 Returns the recipe graph in dot format. More...
 
string to_dot (const vector< IndexName > &targets) const
 Returns the recipe graph in dot format with a plan highlighted. More...
 
void reset ()
 Discard any provided or constructed indexes. More...
 

Static Public Member Functions

static set< string > get_wildcards (const string &pattern)
 Get the names of all brace-enclosed {wildcards} in the given pattern. More...
 
static string substitute_wildcards (const string &pattern, const map< string, string > values)
 
static int64_t get_system_memory ()
 Get the amount of free memory. More...
 
static bool vcf_is_phased (const string &filepath)
 Determine if a VCF file is phased or not. More...
 
static bool gfa_has_haplotypes (const string &filepath)
 Determine if a GFA has haplotypes as W-lines. More...
 

Public Attributes

bool check_files = true
 

Protected Member Functions

vector< IndexGroupdependency_order () const
 get a topological ordering of all registered indexes in the dependency DAG More...
 
IndexingPlan make_plan (const IndexGroup &end_products) const
 generate a plan to create the indexes More...
 
bool has_recipe (const RecipeName &recipe_name) const
 
const IndexRecipeget_recipe (const RecipeName &recipe_name) const
 use a recipe identifier to get the recipe More...
 
vector< vector< string > > execute_recipe (const RecipeName &recipe_name, const IndexingPlan *plan, AliasGraph &alias_graph)
 
IndexFileget_index (const IndexName &identifier)
 access index file More...
 
const IndexFileget_index (const IndexName &identifier) const
 access const index file More...
 
bool all_finished (const vector< const IndexFile * > &inputs) const
 
bool all_finished (const IndexGroup &inputs) const
 
string get_work_dir ()
 Function to get and/or initialize the temporary directory in which indexes will live. More...
 

Protected Attributes

map< IndexName, unique_ptr< IndexFile > > index_registry
 The storage struct for named indexes. Ordered so it is easier to key on index names. More...
 
unordered_set< string > registered_suffixes
 All of the suffixes that have been registered by indexes. More...
 
map< IndexGroup, vector< IndexRecipe > > recipe_registry
 The storage struct for recipes, which may make index. More...
 
map< RecipeName, RecipeNamegeneralizations
 Map from generalizees to generalizers. More...
 
string work_dir
 Temporary directory in which indexes will live. More...
 
string output_prefix = "index"
 filepath that will prefix all saved output More...
 
bool keep_intermediates = false
 should intermediate files end up in the scratch or the output directory? More...
 
int64_t target_memory_usage = numeric_limits<int64_t>::max()
 the max memory we will attempt to use More...
 

Friends

class IndexingPlan
 

Detailed Description

An object that can record methods to produce indexes and design workflows to create a set of desired indexes.

Constructor & Destructor Documentation

◆ IndexRegistry() [1/3]

vg::IndexRegistry::IndexRegistry ( )
default

◆ ~IndexRegistry()

vg::IndexRegistry::~IndexRegistry ( )

Destructor to clean up temp files.

◆ IndexRegistry() [2/3]

vg::IndexRegistry::IndexRegistry ( const IndexRegistry other)
delete

◆ IndexRegistry() [3/3]

vg::IndexRegistry::IndexRegistry ( IndexRegistry &&  other)

Member Function Documentation

◆ all_finished() [1/2]

bool vg::IndexRegistry::all_finished ( const IndexGroup inputs) const
protected

◆ all_finished() [2/2]

bool vg::IndexRegistry::all_finished ( const vector< const IndexFile * > &  inputs) const
protected

◆ available()

bool vg::IndexRegistry::available ( const IndexName identifier) const

Return true if the given index is available and can be require()'d, and false otherwise.

◆ completed_indexes()

vector< IndexName > vg::IndexRegistry::completed_indexes ( ) const

Get a list of all indexes that have already been completed or provided.

◆ dependency_order()

vector< IndexGroup > vg::IndexRegistry::dependency_order ( ) const
protected

get a topological ordering of all registered indexes in the dependency DAG

◆ execute_recipe()

vector< vector< string > > vg::IndexRegistry::execute_recipe ( const RecipeName recipe_name,
const IndexingPlan plan,
AliasGraph alias_graph 
)
protected

Build the index using the recipe with the provided priority. Expose the plan so that the recipe knows where it is supposed to go.

◆ get_index() [1/2]

IndexFile * vg::IndexRegistry::get_index ( const IndexName identifier)
protected

access index file

◆ get_index() [2/2]

const IndexFile * vg::IndexRegistry::get_index ( const IndexName identifier) const
protected

access const index file

◆ get_possible_filenames()

vector< string > vg::IndexRegistry::get_possible_filenames ( const IndexName identifier) const

Get the possible filename(s) associated with the given index with the given prefix. TODO: Get this to account for sample-scoped indexes.

◆ get_prefix()

string vg::IndexRegistry::get_prefix ( ) const

Get the current prefix for saving output files.

◆ get_recipe()

const IndexRecipe & vg::IndexRegistry::get_recipe ( const RecipeName recipe_name) const
protected

use a recipe identifier to get the recipe

◆ get_system_memory()

int64_t vg::IndexRegistry::get_system_memory ( )
static

Get the amount of free memory.

◆ get_target_memory_usage()

int64_t vg::IndexRegistry::get_target_memory_usage ( ) const

Get the maximum memory we will try to consume.

◆ get_wildcards()

set< string > vg::IndexRegistry::get_wildcards ( const string &  pattern)
static

Get the names of all brace-enclosed {wildcards} in the given pattern.

◆ get_work_dir()

string vg::IndexRegistry::get_work_dir ( )
protected

Function to get and/or initialize the temporary directory in which indexes will live.

◆ gfa_has_haplotypes()

bool vg::IndexRegistry::gfa_has_haplotypes ( const string &  filepath)
static

Determine if a GFA has haplotypes as W-lines.

◆ has_recipe()

bool vg::IndexRegistry::has_recipe ( const RecipeName recipe_name) const
protected

Check if a recipe identifier correesponds to a recipe.

Recipe identifiers not corresponding to actual recipes are used during planning to represent provided inputs.

TODO: Refactor that with some kind of tagged union or optional.

◆ make_indexes()

void vg::IndexRegistry::make_indexes ( const vector< IndexName > &  identifiers)

Create and execute a plan to make the indicated indexes using provided inputs If provided inputs cannot create the desired indexes, throws a InsufficientInputException. When completed, all requested index files will be available via require().

◆ make_plan()

IndexingPlan vg::IndexRegistry::make_plan ( const IndexGroup end_products) const
protected

generate a plan to create the indexes

◆ operator=() [1/2]

IndexRegistry& vg::IndexRegistry::operator= ( const IndexRegistry other)
delete

◆ operator=() [2/2]

IndexRegistry & vg::IndexRegistry::operator= ( IndexRegistry &&  other)

◆ predates()

bool vg::IndexRegistry::predates ( const IndexName earlier,
const IndexName later 
) const

For two available indexes, returns true if the modification times on the eariler index are no later than those on the later index.

Useful for enforcing that downstream indexes haven't had their upstream indexes overwritten.

◆ provide() [1/2]

void vg::IndexRegistry::provide ( const IndexName identifier,
const string &  filename,
const map< string, string > &  scopes = {} 
)

Indicate a serialized file that contains some identified index, optionally with scopes that propagates to descendant files.

TODO: If scopes contain ".", we can run into problems with combinations of different scopes producing the same final string. Right now we only use one kind of scope, which avoids this.

◆ provide() [2/2]

void vg::IndexRegistry::provide ( const IndexName identifier,
const vector< string > &  filenames,
const map< string, string > &  scopes = {} 
)

Indicate a list of serialized files that contains some identified index, optionally with scopes that propagates to descendant files.

TODO: If scopes contain ".", we can run into problems with combinations of different scopes producing the same final string. Right now we only use one kind of scope, which avoids this.

◆ register_generalization()

void vg::IndexRegistry::register_generalization ( const RecipeName generalizer,
const RecipeName generalizee 
)

Indicate one recipe is a broadened version of another. The indexes consumed and produced by the generalization must be semantically identical to those of the generalizee

◆ register_index() [1/2]

void vg::IndexRegistry::register_index ( const IndexName identifier,
const string &  suffix 
)

Register an index containing the given identifier.

◆ register_index() [2/2]

void vg::IndexRegistry::register_index ( const IndexName identifier,
const vector< string > &  suffixes 
)

Register an index containing the given identifier, with multiple possible suffixes. The first suffix where all {wildcards} can be substituted with scopes will be used.

◆ register_recipe()

RecipeName vg::IndexRegistry::register_recipe ( const vector< IndexName > &  identifiers,
const vector< IndexName > &  input_identifiers,
const RecipeFunc exec 
)

Register a recipe to produce an index using other indexes or input files. Recipes registered earlier will have higher priority.

◆ require()

vector< string > vg::IndexRegistry::require ( const IndexName identifier) const

Get the filename(s) associated with the given index. Aborts if the index is not a known type, or if it is not provided or made.

◆ reset() [1/2]

void vg::IndexRegistry::reset ( )

Discard any provided or constructed indexes.

◆ reset() [2/2]

void vg::IndexRegistry::reset ( const IndexName identifier)

Remove a provided index.

◆ set_intermediate_file_keeping()

void vg::IndexRegistry::set_intermediate_file_keeping ( bool  keep_intermediates)

Should intermediate files be saved to the output directory or the temp directory?

◆ set_prefix()

void vg::IndexRegistry::set_prefix ( const string &  prefix)

Prefix for all saved outputs.

◆ set_target_memory_usage()

void vg::IndexRegistry::set_target_memory_usage ( int64_t  bytes)

Set the maximum memory that indexing should try to consume (note: this is not strictly adhered to due to difficulties in estimating memory use)

◆ substitute_wildcards()

string vg::IndexRegistry::substitute_wildcards ( const string &  pattern,
const map< string, string >  values 
)
static

Substitute wildcards into the given pattern. All wildcards must have values assigned. Extra values not used are allowed.

◆ to_dot() [1/2]

string vg::IndexRegistry::to_dot ( ) const

Returns the recipe graph in dot format.

◆ to_dot() [2/2]

string vg::IndexRegistry::to_dot ( const vector< IndexName > &  targets) const

Returns the recipe graph in dot format with a plan highlighted.

◆ vcf_is_phased()

bool vg::IndexRegistry::vcf_is_phased ( const string &  filepath)
static

Determine if a VCF file is phased or not.

Friends And Related Function Documentation

◆ IndexingPlan

friend class IndexingPlan
friend

Member Data Documentation

◆ check_files

bool vg::IndexRegistry::check_files = true

◆ generalizations

map<RecipeName, RecipeName> vg::IndexRegistry::generalizations
protected

Map from generalizees to generalizers.

◆ index_registry

map<IndexName, unique_ptr<IndexFile> > vg::IndexRegistry::index_registry
protected

The storage struct for named indexes. Ordered so it is easier to key on index names.

◆ keep_intermediates

bool vg::IndexRegistry::keep_intermediates = false
protected

should intermediate files end up in the scratch or the output directory?

◆ output_prefix

string vg::IndexRegistry::output_prefix = "index"
protected

filepath that will prefix all saved output

◆ recipe_registry

map<IndexGroup, vector<IndexRecipe> > vg::IndexRegistry::recipe_registry
protected

The storage struct for recipes, which may make index.

◆ registered_suffixes

unordered_set<string> vg::IndexRegistry::registered_suffixes
protected

All of the suffixes that have been registered by indexes.

◆ target_memory_usage

int64_t vg::IndexRegistry::target_memory_usage = numeric_limits<int64_t>::max()
protected

the max memory we will attempt to use

◆ work_dir

string vg::IndexRegistry::work_dir
protected

Temporary directory in which indexes will live.


The documentation for this class was generated from the following files: