SHOGUN
4.0.0
|
This class implements streaming features for a document collection. Like in the standard Bag-of-Words representation, this class considers each document as a collection of tokens, which are then hashed into a new feature space of a specified dimension. This class is very flexible and allows the user to specify the tokenizer used to tokenize each document, specify whether the results should be normalized with regards to the sqrt of the document size, as well as to specify whether he wants to combine different tokens. The latter implements a k-skip n-grams approach, meaning that you can combine up to n tokens, while skipping up to k. Eg. for the tokens ["a", "b", "c", "d"], with n_grams = 2 and skips = 2, one would get the following combinations : ["a", "ab", "ac" (skipped 1), "ad" (skipped 2), "b", "bc", "bd" (skipped 1), "c", "cd", "d"].
The current example is stored as a combination of current_vector and current_label. Call get_next_example() followed by get_current_vector() to iterate through the stream.
在文件 StreamingHashedDocDotFeatures.h 第 42 行定义.
Public 成员函数 | |
CStreamingHashedDocDotFeatures () | |
CStreamingHashedDocDotFeatures (CStreamingFile *file, bool is_labelled, int32_t size, CTokenizer *tzer, int32_t bits=20) | |
CStreamingHashedDocDotFeatures (CStringFeatures< char > *dot_features, CTokenizer *tzer, int32_t bits=20, float64_t *lab=NULL) | |
virtual | ~CStreamingHashedDocDotFeatures () |
virtual float32_t | dot (CStreamingDotFeatures *df) |
virtual float32_t | dense_dot (const float32_t *vec2, int32_t vec2_len) |
virtual void | add_to_dense_vec (float32_t alpha, float32_t *vec2, int32_t vec2_len, bool abs_val=false) |
virtual int32_t | get_dim_feature_space () const |
virtual const char * | get_name () const |
virtual int32_t | get_num_vectors () const |
virtual CFeatures * | duplicate () const |
virtual void | set_vector_reader () |
virtual void | set_vector_and_label_reader () |
virtual EFeatureType | get_feature_type () const |
virtual EFeatureClass | get_feature_class () const |
virtual void | start_parser () |
virtual void | end_parser () |
virtual float64_t | get_label () |
virtual bool | get_next_example () |
virtual void | release_example () |
virtual int32_t | get_num_features () |
SGSparseVector< float64_t > | get_vector () |
void | set_normalization (bool normalize) |
void | set_k_skip_n_grams (int32_t k, int32_t n) |
virtual void | dense_dot_range (float32_t *output, float32_t *alphas, float32_t *vec, int32_t dim, float32_t b, int32_t num_vec=0) |
virtual void | expand_if_required (float32_t *&vec, int32_t &len) |
virtual void | expand_if_required (float64_t *&vec, int32_t &len) |
virtual void * | get_feature_iterator () |
virtual int32_t | get_nnz_features_for_vector () |
virtual bool | get_next_feature (int32_t &index, float32_t &value, void *iterator) |
virtual void | free_feature_iterator (void *iterator) |
void | set_read_functions () |
virtual bool | get_has_labels () |
virtual bool | is_seekable () |
virtual void | reset_stream () |
virtual CFeatures * | get_streamed_features (index_t num_elements) |
virtual void | add_preprocessor (CPreprocessor *p) |
virtual void | del_preprocessor (int32_t num) |
CPreprocessor * | get_preprocessor (int32_t num) const |
void | set_preprocessed (int32_t num) |
bool | is_preprocessed (int32_t num) const |
int32_t | get_num_preprocessed () const |
int32_t | get_num_preprocessors () const |
void | clean_preprocessors () |
void | list_preprocessors () |
int32_t | get_cache_size () const |
virtual bool | reshape (int32_t num_features, int32_t num_vectors) |
void | list_feature_obj () const |
virtual void | load (CFile *loader) |
virtual void | save (CFile *writer) |
bool | check_feature_compatibility (CFeatures *f) const |
bool | has_property (EFeatureProperty p) const |
void | set_property (EFeatureProperty p) |
void | unset_property (EFeatureProperty p) |
virtual CFeatures * | create_merged_copy (CList *others) |
virtual CFeatures * | create_merged_copy (CFeatures *other) |
virtual void | add_subset (SGVector< index_t > subset) |
virtual void | add_subset_in_place (SGVector< index_t > subset) |
virtual void | remove_subset () |
virtual void | remove_all_subsets () |
virtual CSubsetStack * | get_subset_stack () |
virtual void | subset_changed_post () |
virtual CFeatures * | copy_subset (SGVector< index_t > indices) |
virtual CFeatures * | copy_dimension_subset (SGVector< index_t > dims) |
virtual CSGObject * | shallow_copy () const |
virtual CSGObject * | deep_copy () const |
virtual bool | is_generic (EPrimitiveType *generic) const |
template<class T > | |
void | set_generic () |
template<> | |
void | set_generic () |
template<> | |
void | set_generic () |
template<> | |
void | set_generic () |
template<> | |
void | set_generic () |
template<> | |
void | set_generic () |
template<> | |
void | set_generic () |
template<> | |
void | set_generic () |
template<> | |
void | set_generic () |
template<> | |
void | set_generic () |
template<> | |
void | set_generic () |
template<> | |
void | set_generic () |
template<> | |
void | set_generic () |
template<> | |
void | set_generic () |
template<> | |
void | set_generic () |
template<> | |
void | set_generic () |
void | unset_generic () |
virtual void | print_serializable (const char *prefix="") |
virtual bool | save_serializable (CSerializableFile *file, const char *prefix="", int32_t param_version=Version::get_version_parameter()) |
virtual bool | load_serializable (CSerializableFile *file, const char *prefix="", int32_t param_version=Version::get_version_parameter()) |
DynArray< TParameter * > * | load_file_parameters (const SGParamInfo *param_info, int32_t file_version, CSerializableFile *file, const char *prefix="") |
DynArray< TParameter * > * | load_all_file_parameters (int32_t file_version, int32_t current_version, CSerializableFile *file, const char *prefix="") |
void | map_parameters (DynArray< TParameter * > *param_base, int32_t &base_version, DynArray< const SGParamInfo * > *target_param_infos) |
void | set_global_io (SGIO *io) |
SGIO * | get_global_io () |
void | set_global_parallel (Parallel *parallel) |
Parallel * | get_global_parallel () |
void | set_global_version (Version *version) |
Version * | get_global_version () |
SGStringList< char > | get_modelsel_names () |
void | print_modsel_params () |
char * | get_modsel_param_descr (const char *param_name) |
index_t | get_modsel_param_index (const char *param_name) |
void | build_gradient_parameter_dictionary (CMap< TParameter *, CSGObject * > *dict) |
virtual void | update_parameter_hash () |
virtual bool | parameter_hash_changed () |
virtual bool | equals (CSGObject *other, float64_t accuracy=0.0, bool tolerant=false) |
virtual CSGObject * | clone () |
Public 属性 | |
SGIO * | io |
Parallel * | parallel |
Version * | version |
Parameter * | m_parameters |
Parameter * | m_model_selection_parameters |
Parameter * | m_gradient_parameters |
ParameterMap * | m_parameter_map |
uint32_t | m_hash |
Protected 成员函数 | |
virtual TParameter * | migrate (DynArray< TParameter * > *param_base, const SGParamInfo *target) |
virtual void | one_to_one_migration_prepare (DynArray< TParameter * > *param_base, const SGParamInfo *target, TParameter *&replacement, TParameter *&to_migrate, char *old_name=NULL) |
virtual void | load_serializable_pre () throw (ShogunException) |
virtual void | load_serializable_post () throw (ShogunException) |
virtual void | save_serializable_pre () throw (ShogunException) |
virtual void | save_serializable_post () throw (ShogunException) |
Protected 属性 | |
int32_t | num_bits |
SGSparseVector< float64_t > | current_vector |
CTokenizer * | tokenizer |
CHashedDocConverter * | converter |
CInputParser< char > | parser |
float64_t | current_label |
bool | has_labels |
Whether examples are labelled or not. 更多... | |
CStreamingFile * | working_file |
The StreamingFile object to read from. 更多... | |
bool | seekable |
Whether the stream is seekable. 更多... | |
CSubsetStack * | m_subset_stack |
Constructor
在文件 StreamingHashedDocDotFeatures.cpp 第 24 行定义.
CStreamingHashedDocDotFeatures | ( | CStreamingFile * | file, |
bool | is_labelled, | ||
int32_t | size, | ||
CTokenizer * | tzer, | ||
int32_t | bits = 20 |
||
) |
Constructor with input information passed. Will use normalization and no quadratic features by default, user should use the set_normalization() and set_k_skip_n_gram() methods to change that.
file | CStreamingFile to take input from. |
is_labelled | Whether examples are labelled or not. |
size | Number of examples to be held in the parser's "ring". |
tzer | the tokenizer to use on the document collection |
bits | the number of bits of the new dimension (means a dimension of size 2^bits) |
在文件 StreamingHashedDocDotFeatures.cpp 第 17 行定义.
CStreamingHashedDocDotFeatures | ( | CStringFeatures< char > * | dot_features, |
CTokenizer * | tzer, | ||
int32_t | bits = 20 , |
||
float64_t * | lab = NULL |
||
) |
Constructor taking a CDotFeatures object and optionally, labels, as args. Will use normalization and no quadratic features by default, user should use the set_normalization() and set_k_skip_n_gram() methods to change that.
The derived class should implement it so that the Streaming*Features class uses the DotFeatures object as the input, getting examples one by one from the DotFeatures object (and labels, if applicable).
dot_features | CDotFeatures object |
tzer | the tokenizer to use on the document collection |
bits | the number of bits of the new dimension (means a dimension of size 2^bits) |
lab | labels (optional) |
在文件 StreamingHashedDocDotFeatures.cpp 第 29 行定义.
|
virtual |
Destructor
在文件 StreamingHashedDocDotFeatures.cpp 第 76 行定义.
|
virtualinherited |
Adds a subset of indices on top of the current subsets (possibly subset of subset). Every call causes a new active index vector to be stored. Added subsets can be removed one-by-one. If this is not needed, add_subset_in_place() should be used (does not store intermediate index vectors)
Calls subset_changed_post() afterwards
subset | subset of indices to add |
被 CCombinedFeatures 重载.
在文件 Features.cpp 第 310 行定义.
Sets/changes latest added subset. This allows to add multiple subsets with in-place memory requirements. They cannot be removed one-by-one afterwards, only the latest active can. If this is needed, use add_subset(). If no subset is active, this just adds.
Calls subset_changed_post() afterwards
subset | subset of indices to replace the latest one with. |
在文件 Features.cpp 第 316 行定义.
|
virtual |
add current vector multiplied with alpha to dense vector, 'vec'
alpha | scalar alpha |
vec2 | real valued vector to add to |
vec2_len | length of vector |
abs_val | if true add the absolute value |
在文件 StreamingHashedDocDotFeatures.cpp 第 108 行定义.
|
inherited |
Builds a dictionary of all parameters in SGObject as well of those of SGObjects that are parameters of this object. Dictionary maps parameters to the objects that own them.
dict | dictionary of parameters to be built. |
在文件 SGObject.cpp 第 1243 行定义.
|
inherited |
check feature compatibility
f | features to check for compatibility |
在文件 Features.cpp 第 283 行定义.
|
inherited |
clears all preprocs
在文件 Features.cpp 第 116 行定义.
|
virtualinherited |
Creates a clone of the current object. This is done via recursively traversing all parameters, which corresponds to a deep copy. Calling equals on the cloned object always returns true although none of the memory of both objects overlaps.
在文件 SGObject.cpp 第 1360 行定义.
Creates a new CFeatures instance containing only the dimensions of the feature vector which are specified by the provided indices.
This method is needed for feature selection tasks NOT IMPLEMENTED!
dims | indices of feature dimensions to copy |
被 CDenseFeatures< ST >, CDenseFeatures< uint32_t >, CDenseFeatures< float64_t >, CDenseFeatures< T > , 以及 CDenseFeatures< uint16_t > 重载.
在文件 Features.cpp 第 348 行定义.
Creates a new CFeatures instance containing copies of the elements which are specified by the provided indices.
This method is needed for a KernelMachine to store its model data. NOT IMPLEMENTED!
indices | indices of feature elements to copy |
被 CStringFeatures< ST >, CStringFeatures< T >, CStringFeatures< uint8_t >, CStringFeatures< char >, CStringFeatures< uint16_t >, CSparseFeatures< ST >, CSparseFeatures< float64_t >, CSparseFeatures< T >, CDenseFeatures< ST >, CDenseFeatures< uint32_t >, CDenseFeatures< float64_t >, CDenseFeatures< T >, CDenseFeatures< uint16_t > , 以及 CCombinedFeatures 重载.
在文件 Features.cpp 第 340 行定义.
Takes a list of feature instances and returns a new instance being a concatenation of a copy of this instace's data and the given instancess data. Note that the feature types have to be equal.
NOT IMPLEMENTED!
others | list of feature objects to append |
被 CDenseFeatures< ST >, CDenseFeatures< uint32_t >, CDenseFeatures< float64_t >, CDenseFeatures< T > , 以及 CDenseFeatures< uint16_t > 重载.
在文件 Features.h 第 235 行定义.
Convenience method for method with same name and list as parameter.
NOT IMPLEMENTED!
other | feature object to append |
被 CDenseFeatures< ST >, CDenseFeatures< uint32_t >, CDenseFeatures< float64_t >, CDenseFeatures< T >, CDenseFeatures< uint16_t > , 以及 CCombinedFeatures 重载.
在文件 Features.h 第 249 行定义.
|
virtualinherited |
A deep copy. All the instance variables will also be copied.
在文件 SGObject.cpp 第 200 行定义.
|
virtualinherited |
compute dot product between current vector and a dense vector
vec2 | real valued vector |
vec2_len | length of vector |
在文件 StreamingHashedDocDotFeatures.cpp 第 95 行定义.
|
virtualinherited |
Compute the dot product for all vectors. This function makes use of dense_dot alphas[i] * sparse[i]^T * w + b
output | result for the given vector range |
alphas | scalars to multiply with, may be NULL |
vec | dense vector to compute dot product with |
dim | length of the dense vector |
b | bias |
num_vec | number of vectors to operate on (indices 0 to num_vec-1) |
If num_vec == 0 or left to its default value, the function attempts to return dot product for all vectors. However, the given output vector must be preallocated!
note that the result will be written to output[0...(num_vec-1)] except when num_vec = 0
在文件 StreamingDotFeatures.cpp 第 30 行定义.
|
virtual |
compute dot product between vectors of two StreamingDotFeatures objects.
df | StreamingDotFeatures (of same kind) to compute dot product with |
在文件 StreamingHashedDocDotFeatures.cpp 第 85 行定义.
|
virtual |
Duplicate the object.
实现了 CFeatures.
在文件 StreamingHashedDocDotFeatures.cpp 第 127 行定义.
|
virtual |
End the parser. Wait for the parsing thread to complete.
实现了 CStreamingFeatures.
在文件 StreamingHashedDocDotFeatures.cpp 第 148 行定义.
Recursively compares the current SGObject to another one. Compares all registered numerical parameters, recursion upon complex (SGObject) parameters. Does not compare pointers!
May be overwritten but please do with care! Should not be necessary in most cases.
other | object to compare with |
accuracy | accuracy to use for comparison (optional) |
tolerant | allows linient check on float equality (within accuracy) |
在文件 SGObject.cpp 第 1264 行定义.
|
virtualinherited |
Expand the vector passed so that it its length is equal to the dimensionality of the features. The previous values are kept intact through realloc, and the new ones are set to zero.
vec | float32_t* vector |
len | length of the vector |
被 CStreamingVwFeatures 重载.
在文件 StreamingDotFeatures.cpp 第 53 行定义.
|
virtualinherited |
Expand the vector passed so that it its length is equal to the dimensionality of the features. The previous values are kept intact through realloc, and the new ones are set to zero.
vec | float64_t* vector |
len | length of the vector |
被 CStreamingVwFeatures 重载.
在文件 StreamingDotFeatures.cpp 第 64 行定义.
|
virtualinherited |
clean up iterator call this function with the iterator returned by get_first_feature
iterator | as returned by get_first_feature |
在文件 StreamingDotFeatures.cpp 第 93 行定义.
|
inherited |
|
virtual |
obtain the dimensionality of the feature space
(not mix this up with the dimensionality of the input space, usually obtained via get_num_features())
在文件 StreamingHashedDocDotFeatures.cpp 第 117 行定义.
|
virtual |
Return the feature class
实现了 CFeatures.
在文件 StreamingHashedDocDotFeatures.cpp 第 137 行定义.
|
virtualinherited |
iterate over the non-zero features
call get_feature_iterator first, followed by get_next_feature and free_feature_iterator to cleanup
在文件 StreamingDotFeatures.cpp 第 75 行定义.
|
virtual |
Return the feature type, depending on T.
实现了 CFeatures.
在文件 StreamingHashedDocDotFeatures.cpp 第 132 行定义.
|
inherited |
|
inherited |
|
inherited |
|
virtualinherited |
Return whether the examples are labelled or not.
在文件 StreamingFeatures.cpp 第 35 行定义.
|
virtual |
Return the label of the current example.
Raise an error if the input has been specified as unlabelled.
实现了 CStreamingFeatures.
在文件 StreamingHashedDocDotFeatures.cpp 第 177 行定义.
|
inherited |
在文件 SGObject.cpp 第 1135 行定义.
|
inherited |
Returns description of a given parameter string, if it exists. SG_ERROR otherwise
param_name | name of the parameter |
在文件 SGObject.cpp 第 1159 行定义.
|
inherited |
Returns index of model selection parameter with provided index
param_name | name of model selection parameter |
在文件 SGObject.cpp 第 1172 行定义.
|
virtual |
Return the name.
实现了 CSGObject.
在文件 StreamingHashedDocDotFeatures.cpp 第 122 行定义.
|
virtual |
Indicate to the parser that it must fetch the next example.
实现了 CStreamingFeatures.
在文件 StreamingHashedDocDotFeatures.cpp 第 153 行定义.
|
virtualinherited |
iterate over the non-zero features
call this function with the iterator returned by get_first_feature and call free_feature_iterator to cleanup
index | is returned by reference (-1 when not available) |
value | is returned by reference |
iterator | as returned by get_first_feature |
在文件 StreamingDotFeatures.cpp 第 87 行定义.
|
virtualinherited |
get number of non-zero features in vector
(in case accurate estimates are too expensive overestimating is OK)
被 CStreamingVwFeatures, CStreamingSparseFeatures< T >, CStreamingDenseFeatures< T >, CStreamingDenseFeatures< float64_t > , 以及 CStreamingDenseFeatures< float32_t > 重载.
在文件 StreamingDotFeatures.cpp 第 81 行定义.
|
virtual |
Get the number of features in the current example.
实现了 CStreamingFeatures.
在文件 StreamingHashedDocDotFeatures.cpp 第 172 行定义.
|
inherited |
|
inherited |
|
virtual |
Return the number of vectors stored in this object.
实现了 CFeatures.
在文件 StreamingHashedDocDotFeatures.cpp 第 182 行定义.
|
inherited |
Returns a new CFeatures instance which contains num_elements elements from the underlying stream. Not SG_REF'ed
num_elements | num elements to save from stream |
NOT IMPLEMENTED!
被 CStreamingDenseFeatures< T >, CStreamingDenseFeatures< float64_t > , 以及 CStreamingDenseFeatures< float32_t > 重载.
在文件 StreamingFeatures.h 第 191 行定义.
|
virtualinherited |
SGSparseVector< float64_t > get_vector | ( | ) |
Get the current example
在文件 StreamingHashedDocDotFeatures.cpp 第 197 行定义.
|
inherited |
check if features have given property
p | feature property |
在文件 Features.cpp 第 295 行定义.
|
virtualinherited |
If the SGSerializable is a class template then TRUE will be returned and GENERIC is set to the type of the generic.
generic | set to the type of the generic if returning TRUE |
在文件 SGObject.cpp 第 297 行定义.
|
inherited |
get whether specified preprocessor was already applied
num | index of preprocessor in list |
在文件 Features.cpp 第 149 行定义.
|
virtualinherited |
Whether the stream is seekable (to check if multiple epochs are possible), i.e., whether we can process examples in a batch fashion.
A stream can usually seekable when it comes from a file or when it comes from another conventional CFeatures object.
在文件 StreamingFeatures.cpp 第 40 行定义.
|
inherited |
list feature object
在文件 Features.cpp 第 171 行定义.
|
inherited |
print preprocessors
在文件 Features.cpp 第 131 行定义.
|
virtualinherited |
load features from file
loader | File object via which data shall be loaded |
被 CDenseFeatures< ST >, CDenseFeatures< uint32_t >, CDenseFeatures< float64_t >, CDenseFeatures< T >, CDenseFeatures< uint16_t >, CSparseFeatures< ST >, CSparseFeatures< float64_t >, CSparseFeatures< T >, CStringFeatures< ST >, CStringFeatures< T >, CStringFeatures< uint8_t >, CStringFeatures< char > , 以及 CStringFeatures< uint16_t > 重载.
在文件 Features.cpp 第 269 行定义.
|
inherited |
maps all parameters of this instance to the provided file version and loads all parameter data from the file into an array, which is sorted (basically calls load_file_parameter(...) for all parameters and puts all results into a sorted array)
file_version | parameter version of the file |
current_version | version from which mapping begins (you want to use Version::get_version_parameter() for this in most cases) |
file | file to load from |
prefix | prefix for members |
在文件 SGObject.cpp 第 704 行定义.
|
inherited |
loads some specified parameters from a file with a specified version The provided parameter info has a version which is recursively mapped until the file parameter version is reached. Note that there may be possibly multiple parameters in the mapping, therefore, a set of TParameter instances is returned
param_info | information of parameter |
file_version | parameter version of the file, must be <= provided parameter version |
file | file to load from |
prefix | prefix for members |
在文件 SGObject.cpp 第 545 行定义.
|
virtualinherited |
Load this object from file. If it will fail (returning FALSE) then this object will contain inconsistent data and should not be used!
file | where to load from |
prefix | prefix for members |
param_version | (optional) a parameter version different to (this is mainly for testing, better do not use) |
在文件 SGObject.cpp 第 374 行定义.
|
protectedvirtualinherited |
Can (optionally) be overridden to post-initialize some member variables which are not PARAMETER::ADD'ed. Make sure that at first the overridden method BASE_CLASS::LOAD_SERIALIZABLE_POST is called.
ShogunException | will be thrown if an error occurs. |
被 CKernel, CWeightedDegreePositionStringKernel, CList, CAlphabet, CLinearHMM, CGaussianKernel, CInverseMultiQuadricKernel, CCircularKernel , 以及 CExponentialKernel 重载.
在文件 SGObject.cpp 第 1062 行定义.
|
protectedvirtualinherited |
Can (optionally) be overridden to pre-initialize some member variables which are not PARAMETER::ADD'ed. Make sure that at first the overridden method BASE_CLASS::LOAD_SERIALIZABLE_PRE is called.
ShogunException | will be thrown if an error occurs. |
被 CDynamicArray< T >, CDynamicArray< float64_t >, CDynamicArray< float32_t >, CDynamicArray< int32_t >, CDynamicArray< char >, CDynamicArray< bool > , 以及 CDynamicObjectArray 重载.
在文件 SGObject.cpp 第 1057 行定义.
|
inherited |
Takes a set of TParameter instances (base) with a certain version and a set of target parameter infos and recursively maps the base level wise to the current version using CSGObject::migrate(...). The base is replaced. After this call, the base version containing parameters should be of same version/type as the initial target parameter infos. Note for this to work, the migrate methods and all the internal parameter mappings have to match
param_base | set of TParameter instances that are mapped to the provided target parameter infos |
base_version | version of the parameter base |
target_param_infos | set of SGParamInfo instances that specify the target parameter base |
在文件 SGObject.cpp 第 742 行定义.
|
protectedvirtualinherited |
creates a new TParameter instance, which contains migrated data from the version that is provided. The provided parameter data base is used for migration, this base is a collection of all parameter data of the previous version. Migration is done FROM the data in param_base TO the provided param info Migration is always one version step. Method has to be implemented in subclasses, if no match is found, base method has to be called.
If there is an element in the param_base which equals the target, a copy of the element is returned. This represents the case when nothing has changed and therefore, the migrate method is not overloaded in a subclass
param_base | set of TParameter instances to use for migration |
target | parameter info for the resulting TParameter |
在文件 SGObject.cpp 第 949 行定义.
|
protectedvirtualinherited |
This method prepares everything for a one-to-one parameter migration. One to one here means that only ONE element of the parameter base is needed for the migration (the one with the same name as the target). Data is allocated for the target (in the type as provided in the target SGParamInfo), and a corresponding new TParameter instance is written to replacement. The to_migrate pointer points to the single needed TParameter instance needed for migration. If a name change happened, the old name may be specified by old_name. In addition, the m_delete_data flag of to_migrate is set to true. So if you want to migrate data, the only thing to do after this call is converting the data in the m_parameter fields. If unsure how to use - have a look into an example for this. (base_migration_type_conversion.cpp for example)
param_base | set of TParameter instances to use for migration |
target | parameter info for the resulting TParameter |
replacement | (used as output) here the TParameter instance which is returned by migration is created into |
to_migrate | the only source that is used for migration |
old_name | with this parameter, a name change may be specified |
在文件 SGObject.cpp 第 889 行定义.
|
virtualinherited |
在文件 SGObject.cpp 第 263 行定义.
|
inherited |
prints all parameter registered for model selection and their type
在文件 SGObject.cpp 第 1111 行定义.
|
virtualinherited |
|
virtual |
Indicate that processing of the current example is done. The parser then considers it safe to dispose of that example and replace it with another one.
实现了 CStreamingFeatures.
在文件 StreamingHashedDocDotFeatures.cpp 第 167 行定义.
|
virtualinherited |
removes all subsets Calls subset_changed_post() afterwards
被 CCombinedFeatures 重载.
在文件 Features.cpp 第 328 行定义.
|
virtualinherited |
removes that last added subset from subset stack, if existing Calls subset_changed_post() afterwards
被 CCombinedFeatures 重载.
在文件 Features.cpp 第 322 行定义.
|
virtualinherited |
Function to reset the stream (if possible).
被 CStreamingSparseFeatures< T >, CStreamingVwFeatures, CStreamingDenseFeatures< T >, CStreamingDenseFeatures< float64_t > , 以及 CStreamingDenseFeatures< float32_t > 重载.
在文件 StreamingFeatures.cpp 第 45 行定义.
|
virtualinherited |
in case there is a feature matrix allow for reshaping
NOT IMPLEMENTED!
num_features | new number of features |
num_vectors | new number of vectors |
被 CDenseFeatures< ST >, CDenseFeatures< uint32_t >, CDenseFeatures< float64_t >, CDenseFeatures< T > , 以及 CDenseFeatures< uint16_t > 重载.
在文件 Features.cpp 第 165 行定义.
|
virtualinherited |
save features to file
writer | File object via which data shall be saved |
被 CStringFeatures< ST >, CStringFeatures< T >, CStringFeatures< uint8_t >, CStringFeatures< char >, CStringFeatures< uint16_t >, CDenseFeatures< ST >, CDenseFeatures< uint32_t >, CDenseFeatures< float64_t >, CDenseFeatures< T >, CDenseFeatures< uint16_t >, CSparseFeatures< ST >, CSparseFeatures< float64_t > , 以及 CSparseFeatures< T > 重载.
在文件 Features.cpp 第 276 行定义.
|
virtualinherited |
Save this object to file.
file | where to save the object; will be closed during returning if PREFIX is an empty string. |
prefix | prefix for members |
param_version | (optional) a parameter version different to (this is mainly for testing, better do not use) |
在文件 SGObject.cpp 第 315 行定义.
|
protectedvirtualinherited |
Can (optionally) be overridden to post-initialize some member variables which are not PARAMETER::ADD'ed. Make sure that at first the overridden method BASE_CLASS::SAVE_SERIALIZABLE_POST is called.
ShogunException | will be thrown if an error occurs. |
被 CKernel 重载.
在文件 SGObject.cpp 第 1072 行定义.
|
protectedvirtualinherited |
Can (optionally) be overridden to pre-initialize some member variables which are not PARAMETER::ADD'ed. Make sure that at first the overridden method BASE_CLASS::SAVE_SERIALIZABLE_PRE is called.
ShogunException | will be thrown if an error occurs. |
被 CKernel, CDynamicArray< T >, CDynamicArray< float64_t >, CDynamicArray< float32_t >, CDynamicArray< int32_t >, CDynamicArray< char >, CDynamicArray< bool > , 以及 CDynamicObjectArray 重载.
在文件 SGObject.cpp 第 1067 行定义.
|
inherited |
在文件 SGObject.cpp 第 42 行定义.
|
inherited |
在文件 SGObject.cpp 第 47 行定义.
|
inherited |
在文件 SGObject.cpp 第 52 行定义.
|
inherited |
在文件 SGObject.cpp 第 57 行定义.
|
inherited |
在文件 SGObject.cpp 第 62 行定义.
|
inherited |
在文件 SGObject.cpp 第 67 行定义.
|
inherited |
在文件 SGObject.cpp 第 72 行定义.
|
inherited |
在文件 SGObject.cpp 第 77 行定义.
|
inherited |
在文件 SGObject.cpp 第 82 行定义.
|
inherited |
在文件 SGObject.cpp 第 87 行定义.
|
inherited |
在文件 SGObject.cpp 第 92 行定义.
|
inherited |
在文件 SGObject.cpp 第 97 行定义.
|
inherited |
在文件 SGObject.cpp 第 102 行定义.
|
inherited |
在文件 SGObject.cpp 第 107 行定义.
|
inherited |
在文件 SGObject.cpp 第 112 行定义.
|
inherited |
set generic type to T
|
inherited |
|
inherited |
|
inherited |
void set_k_skip_n_grams | ( | int32_t | k, |
int32_t | n | ||
) |
Method used to specify the parameters for the quadratic approach of k-skip n-grams. See class description for more details and an example.
k | the max number of allowed skips |
n | the max number of tokens to combine |
在文件 StreamingHashedDocDotFeatures.cpp 第 207 行定义.
void set_normalization | ( | bool | normalize | ) |
specify whether hashed vector should be normalized or not
normalize | whether to normalize |
在文件 StreamingHashedDocDotFeatures.cpp 第 202 行定义.
|
inherited |
|
inherited |
|
inherited |
Set the vector reading functions.
The functions are implemented specific to the type in the derived class.
在文件 StreamingFeatures.cpp 第 29 行定义.
|
virtual |
Sets the read function (in case the examples are labelled) to get_*_vector_and_label from CStreamingFile.
The exact function depends on type T.
The parser uses the function set by this while reading labelled examples.
实现了 CStreamingFeatures.
在文件 StreamingHashedDocDotFeatures.cpp 第 192 行定义.
|
virtual |
Sets the read function (in case the examples are unlabelled) to get_*_vector() from CStreamingFile.
The exact function depends on type T.
The parser uses the function set by this while reading unlabelled examples.
实现了 CStreamingFeatures.
在文件 StreamingHashedDocDotFeatures.cpp 第 187 行定义.
|
virtualinherited |
A shallow copy. All the SGObject instance variables will be simply assigned and SG_REF-ed.
被 CGaussianKernel 重载.
在文件 SGObject.cpp 第 194 行定义.
|
virtual |
Start the parser. It stores parsed examples from the input in a separate thread.
实现了 CStreamingFeatures.
在文件 StreamingHashedDocDotFeatures.cpp 第 142 行定义.
|
virtualinherited |
method may be overwritten to update things that depend on subset
被 CStringFeatures< ST >, CStringFeatures< T >, CStringFeatures< uint8_t >, CStringFeatures< char > , 以及 CStringFeatures< uint16_t > 重载.
在文件 Features.h 第 293 行定义.
|
inherited |
unset generic type
this has to be called in classes specializing a template class
在文件 SGObject.cpp 第 304 行定义.
|
inherited |
|
virtualinherited |
Updates the hash of current parameter combination
在文件 SGObject.cpp 第 250 行定义.
|
protected |
Converter
在文件 StreamingHashedDocDotFeatures.h 第 253 行定义.
|
protected |
The current example's label
在文件 StreamingHashedDocDotFeatures.h 第 259 行定义.
|
protected |
Current example
在文件 StreamingHashedDocDotFeatures.h 第 247 行定义.
|
protectedinherited |
Whether examples are labelled or not.
在文件 StreamingFeatures.h 第 201 行定义.
|
inherited |
io
在文件 SGObject.h 第 496 行定义.
|
inherited |
parameters wrt which we can compute gradients
在文件 SGObject.h 第 511 行定义.
|
inherited |
Hash of parameter values
在文件 SGObject.h 第 517 行定义.
|
inherited |
model selection parameters
在文件 SGObject.h 第 508 行定义.
|
inherited |
map for different parameter versions
在文件 SGObject.h 第 514 行定义.
|
inherited |
parameters
在文件 SGObject.h 第 505 行定义.
|
protectedinherited |
subset used for index transformations
在文件 Features.h 第 335 行定义.
|
protected |
number of bits for the target dimension
在文件 StreamingHashedDocDotFeatures.h 第 244 行定义.
|
inherited |
parallel
在文件 SGObject.h 第 499 行定义.
|
protected |
The parser
在文件 StreamingHashedDocDotFeatures.h 第 256 行定义.
|
protectedinherited |
Whether the stream is seekable.
在文件 StreamingFeatures.h 第 207 行定义.
|
protected |
在文件 StreamingHashedDocDotFeatures.h 第 250 行定义.
|
inherited |
version
在文件 SGObject.h 第 502 行定义.
|
protectedinherited |
The StreamingFile object to read from.
在文件 StreamingFeatures.h 第 204 行定义.