21 #include <shogun/lib/external/libocas.h> 29 #ifndef DOXYGEN_SHOULD_SKIP_THIS 30 struct wdocas_thread_params_output
40 struct wdocas_thread_params_add
49 #endif // DOXYGEN_SHOULD_SKIP_THIS 51 CWDSVMOcas::CWDSVMOcas()
52 :
CMachine(), use_bias(false), bufsize(3000), C1(1), C2(1),
53 epsilon(1e-3), method(SVM_OCAS)
67 CWDSVMOcas::CWDSVMOcas(E_SVM_TYPE type)
68 :
CMachine(), use_bias(false), bufsize(3000), C1(1), C2(1),
69 epsilon(1e-3), method(type)
81 CWDSVMOcas::CWDSVMOcas(
84 :
CMachine(), use_bias(false), bufsize(3000), C1(C), C2(C), epsilon(1e-3),
98 CWDSVMOcas::~CWDSVMOcas()
121 SG_ERROR(
"Features not of class string type byte\n")
129 set_normalization_const();
134 int32_t num=features->get_num_vectors();
139 for (int32_t i=0; i<num; i++)
140 outputs[i] = apply_one(i);
146 int32_t CWDSVMOcas::set_wd_weights()
148 ASSERT(degree>0 && degree<=8)
152 w_offsets=SG_MALLOC(int32_t, degree);
153 int32_t w_dim_single_c=0;
155 for (int32_t i=0; i<degree; i++)
158 wd_weights[i]=sqrt(2.0*(from_degree-i)/(from_degree*(from_degree+1)));
159 w_dim_single_c+=w_offsets[i];
161 return w_dim_single_c;
164 bool CWDSVMOcas::train_machine(
CFeatures* data)
166 SG_INFO(
"C=%f, epsilon=%f, bufsize=%d\n", get_C1(), get_epsilon(), bufsize)
175 SG_ERROR(
"Features not of class string type byte\n")
181 CAlphabet* alphabet=get_features()->get_alphabet();
184 alphabet_size=alphabet->get_num_symbols();
185 string_length=features->get_num_vectors();
189 w_dim_single_char=set_wd_weights();
191 SG_DEBUG("w_dim_single_char=%d\n", w_dim_single_char)
192 w_dim=string_length*w_dim_single_char;
193 SG_DEBUG("cutting plane has %d dims\n", w_dim)
194 num_vec=get_features()->get_max_vector_length();
196 set_normalization_const();
197 SG_INFO("num_vec: %d num_lab: %d\n", num_vec, labvec.vlen)
198 ASSERT(num_vec==labvec.vlen)
207 memset(old_w, 0, w_dim*sizeof(
float32_t));
212 memset(cuts, 0, sizeof(*cuts)*bufsize);
214 memset(cp_bias, 0, sizeof(
float64_t)*bufsize);
229 if (method == SVM_OCAS)
231 ocas_return_value_T result = svm_ocas_solver( get_C1(), num_vec, get_epsilon(),
232 TolAbs, QPBound, get_max_train_time(), bufsize, Method,
233 &CWDSVMOcas::compute_W,
234 &CWDSVMOcas::update_W,
235 &CWDSVMOcas::add_new_cut,
236 &CWDSVMOcas::compute_output,
241 SG_INFO("Ocas Converged after %d iterations\n"
242 "==================================\n"
243 "timing statistics:\n"
244 "output_time: %f s\n"
249 "ocas_time %f s\n\n", result.nIter, result.output_time, result.sort_time,
250 result.add_time, result.w_time, result.qp_solver_time, result.ocas_time);
252 for (int32_t i=bufsize-1; i>=0; i--)
272 CWDSVMOcas* o = (CWDSVMOcas*) ptr;
273 uint32_t nDim = (uint32_t) o->w_dim;
279 for(uint32_t j=0; j <nDim; j++)
281 W[j] = oldW[j]*(1-t) + t*W[j];
282 sq_norm_W += W[j]*W[j];
285 bias=old_bias*(1-t) + t*bias;
289 o->old_bias=old_bias;
302 void* CWDSVMOcas::add_new_cut_helper(
void* ptr)
304 wdocas_thread_params_add* p = (wdocas_thread_params_add*) ptr;
305 CWDSVMOcas* o = p->wdocas;
306 int32_t start = p->start;
307 int32_t end = p->end;
308 int32_t string_length = o->string_length;
310 uint32_t cut_length=p->cut_length;
311 uint32_t* new_cut=p->new_cut;
312 int32_t* w_offsets = o->w_offsets;
314 int32_t alphabet_size = o->alphabet_size;
316 int32_t degree = o->degree;
318 float64_t normalization_const = o->normalization_const;
325 int32_t* val=SG_MALLOC(int32_t, cut_length);
326 for (int32_t j=start; j<end; j++)
328 int32_t offs=o->w_dim_single_char*j;
329 memset(val,0,
sizeof(int32_t)*cut_length);
330 int32_t lim=
CMath::min(degree, string_length-j);
333 for (int32_t k=0; k<lim; k++)
337 float32_t wd = wd_weights[k]/normalization_const;
339 for(uint32_t i=0; i < cut_length; i++)
341 val[i]=val[i]*alphabet_size + vec[new_cut[i]];
342 new_a[offs+val[i]]+=wd * y[new_cut[i]];
354 int CWDSVMOcas::add_new_cut(
355 float64_t *new_col_H, uint32_t *new_cut, uint32_t cut_length,
356 uint32_t nSel,
void* ptr)
358 CWDSVMOcas* o = (CWDSVMOcas*) ptr;
361 uint32_t nDim=(uint32_t) o->w_dim;
364 memset(new_a, 0,
sizeof(
float32_t)*nDim);
367 wdocas_thread_params_add* params_add=SG_MALLOC(wdocas_thread_params_add, o->parallel->get_num_threads());
368 pthread_t* threads=SG_MALLOC(pthread_t, o->parallel->get_num_threads());
370 int32_t string_length = o->string_length;
372 int32_t nthreads=o->parallel->get_num_threads()-1;
373 int32_t step= string_length/o->parallel->get_num_threads();
377 nthreads=string_length-1;
381 for (t=0; t<nthreads; t++)
383 params_add[t].wdocas=o;
385 params_add[t].new_a=new_a;
386 params_add[t].new_cut=new_cut;
387 params_add[t].start = step*t;
388 params_add[t].end = step*(t+1);
389 params_add[t].cut_length = cut_length;
391 if (pthread_create(&threads[t], NULL, &CWDSVMOcas::add_new_cut_helper, (
void*)¶ms_add[t]) != 0)
399 params_add[t].wdocas=o;
401 params_add[t].new_a=new_a;
402 params_add[t].new_cut=new_cut;
403 params_add[t].start = step*t;
404 params_add[t].end = string_length;
405 params_add[t].cut_length = cut_length;
406 add_new_cut_helper(¶ms_add[t]);
409 for (t=0; t<nthreads; t++)
411 if (pthread_join(threads[t], NULL) != 0)
422 for(i=0; i < cut_length; i++)
425 c_bias[nSel]+=o->lab[new_cut[i]];
429 for(i=0; i < nSel; i++)
430 new_col_H[i] =
CMath::dot(new_a, cuts[i], nDim) + c_bias[nSel]*c_bias[i];
452 void* CWDSVMOcas::compute_output_helper(
void* ptr)
454 wdocas_thread_params_output* p = (wdocas_thread_params_output*) ptr;
455 CWDSVMOcas* o = p->wdocas;
456 int32_t start = p->start;
457 int32_t end = p->end;
460 int32_t* val = p->val;
464 int32_t degree = o->degree;
465 int32_t string_length = o->string_length;
466 int32_t alphabet_size = o->alphabet_size;
467 int32_t* w_offsets = o->w_offsets;
472 float64_t normalization_const = o->normalization_const;
475 for (int32_t j=0; j<string_length; j++)
477 int32_t offs=o->w_dim_single_char*j;
478 for (int32_t i=start ; i<end; i++)
481 int32_t lim=
CMath::min(degree, string_length-j);
484 for (int32_t k=0; k<lim; k++)
490 for (int32_t i=start; i<end; i++)
492 val[i]=val[i]*alphabet_size + vec[i];
493 out[i]+=wd*w[offs+val[i]];
536 for (int32_t i=start; i<end; i++)
537 output[i]=y[i]*o->bias + out[i]*y[i]/normalization_const;
544 int CWDSVMOcas::compute_output(
float64_t *output,
void* ptr )
547 CWDSVMOcas* o = (CWDSVMOcas*) ptr;
548 int32_t nData=o->num_vec;
549 wdocas_thread_params_output* params_output=SG_MALLOC(wdocas_thread_params_output, o->parallel->get_num_threads());
550 pthread_t* threads = SG_MALLOC(pthread_t, o->parallel->get_num_threads());
553 int32_t* val=SG_MALLOC(int32_t, nData);
557 int32_t nthreads=o->parallel->get_num_threads()-1;
558 int32_t step= nData/o->parallel->get_num_threads();
566 for (t=0; t<nthreads; t++)
568 params_output[t].wdocas=o;
569 params_output[t].output=output;
570 params_output[t].out=out;
571 params_output[t].val=val;
572 params_output[t].start = step*t;
573 params_output[t].end = step*(t+1);
576 if (pthread_create(&threads[t], NULL, &CWDSVMOcas::compute_output_helper, (
void*)¶ms_output[t]) != 0)
584 params_output[t].wdocas=o;
585 params_output[t].output=output;
586 params_output[t].out=out;
587 params_output[t].val=val;
588 params_output[t].start = step*t;
589 params_output[t].end = nData;
590 compute_output_helper(¶ms_output[t]);
593 for (t=0; t<nthreads; t++)
595 if (pthread_join(threads[t], NULL) != 0)
599 SG_FREE(params_output);
614 void CWDSVMOcas::compute_W(
618 CWDSVMOcas* o = (CWDSVMOcas*) ptr;
619 uint32_t nDim= (uint32_t) o->w_dim;
629 for (uint32_t i=0; i<nSel; i++)
634 bias += c_bias[i]*alpha[i];
638 *dp_WoldW =
CMath::dot(W,oldW, nDim) + bias*old_bias;;
642 o->old_bias = old_bias;
645 #endif //USE_GPL_SHOGUN
Real Labels are real-valued labels.
RAWDNA - letters 0,1,2,3.
The class Labels models labels, i.e. class assignments of objects.
SGString< ST > * features
static void qsort_index(T1 *output, T2 *index, uint32_t size)
The class Alphabet implements an alphabet and alphabet utility functions.
A generic learning machine interface.
void print(CJLCoverTreePoint &p)
void free_feature_vector(ST *feat_vec, int32_t num, bool dofree)
virtual EFeatureClass get_feature_class() const =0
SGVector< ST > get_feature_vector(int32_t num)
SGStringList< ST > get_features()
float64_t normalization_const
static float64_t dot(const bool *v1, const bool *v2, int32_t n)
Compute dot product between v1 and v2 (blas optimized)
all of classes and functions are contained in the shogun namespace
The class Features is the base class of all feature objects.
Binary Labels for binary classification.
static void swap(T &a, T &b)
#define SG_UNSTABLE(func,...)
static int32_t pow(bool x, int32_t n)
virtual EFeatureType get_feature_type() const =0
static void vec1_plus_scalar_times_vec2(T *vec1, const T scalar, const T *vec2, int32_t n)
x=x+alpha*y