class Ferret::Index::FieldInfos

Summary

The FieldInfos class holds all the field descriptors for an index. It is this class that is used to create a new index using the #create_index method. If you are happy with the default properties for FieldInfo then you don't need to worry about this class. IndexWriter can create the index for you. Otherwise you should set up the index like in the example;

Example

field_infos = FieldInfos.new(:term_vector => :no)

field_infos.add_field(:title, :index => :untokenized, :term_vector => :no,
                      :boost => 10.0)

field_infos.add_field(:content)

field_infos.add_field(:created_on, :index => :untokenized_omit_norms,
                      :term_vector => :no)

field_infos.add_field(:image, :store => :compressed, :index => :no,
                      :term_vector => :no)

field_infos.create_index("/path/to/index")

Default Properties

See FieldInfo for the available field property values.

When you create the FieldInfos object you specify the default properties for the fields. Often you'll specify all of the fields in the index before you create the index so the default values won't come into play. However, it is possible to continue to dynamically add fields as indexing goes along. If you add a document to the index which has fields that the index doesn't know about then the default properties are used for the new field.

Public Class Methods

load(yaml_str) click to toggle source

Load FieldInfos from a YAML file. The YAML file should look something like this: default:

store: :yes
index: :yes
term_vector: :no

fields:

id:
  index: :untokenized
  term_vector: :no

title:
  boost: 20.0
  term_vector: :no

content:
  term_vector: :with_positions_offsets
# File lib/ferret/field_infos.rb, line 24
def self.load(yaml_str)
  info = YAML.load(yaml_str)
  convert_strings_to_symbols(info)
  fis = FieldInfos.new(info[:default])
  fields = info[:fields]
  fields.keys.each {|key| fis.add_field(key, fields[key])} if fields
  fis
end
new(defaults = {}) → field_infos click to toggle source

Create a new FieldInfos object which uses the default values for fields specified in the default hash parameter. See FieldInfo for available property values.

static VALUE
frb_fis_init(int argc, VALUE *argv, VALUE self)
{
    VALUE roptions;
    FieldInfos *fis;
    StoreValue store = STORE_YES;
    IndexValue index = INDEX_YES;
    TermVectorValue term_vector = TERM_VECTOR_WITH_POSITIONS_OFFSETS;
    float boost;

    rb_scan_args(argc, argv, "01", &roptions);
    if (argc > 0) {
        frb_fi_get_params(roptions, &store, &index, &term_vector, &boost);
    }
    fis = fis_new(store, index, term_vector);
    Frt_Wrap_Struct(self, &frb_fis_mark, &frb_fis_free, fis);
    object_add(fis, self);
    return self;
}

Public Instance Methods

fis << fi → fis click to toggle source
add(fi) → fis

Add a FieldInfo object. Use the #add_field method where possible.

static VALUE
frb_fis_add(VALUE self, VALUE rfi)
{
    FieldInfos *fis = (FieldInfos *)DATA_PTR(self);
    FieldInfo *fi = (FieldInfo *)frb_rb_data_ptr(rfi);
    fis_add_field(fis, fi);
    REF(fi);
    return self;
}
fis[name] → field_info click to toggle source
fis[number] → field_info

Get the FieldInfo object. FieldInfo objects can be referenced by either their field-number of the field-name (which must be a symbol). For example;

fi = fis[:name]
fi = fis[2]
static VALUE
frb_fis_get(VALUE self, VALUE ridx)
{
    FieldInfos *fis = (FieldInfos *)DATA_PTR(self);
    VALUE rfi = Qnil;
    switch (TYPE(ridx)) {
        case T_FIXNUM: {
            int index = FIX2INT(ridx);
            if (index < 0) index += fis->size;
            if (index < 0 || index >= fis->size) {
                rb_raise(rb_eArgError, "index of %d is out of range (0..%d)\n",
                         index, fis->size - 1);
            }
            rfi = frb_get_field_info(fis->fields[index]);
            break;
                       }
        case T_SYMBOL:
        case T_STRING:
            rfi = frb_get_field_info(fis_get_field(fis, frb_field(ridx)));
            break;
            /*
        case T_STRING:
            rfi = frb_get_field_info(fis_get_field(fis, StringValuePtr(ridx)));
            break;
            */
        default:
            rb_raise(rb_eArgError, "Can't index FieldInfos with %s",
                     rs2s(rb_obj_as_string(ridx)));
            break;
    }
    return rfi;
}
fis << fi → fis click to toggle source
add(fi) → fis

Add a FieldInfo object. Use the #add_field method where possible.

static VALUE
frb_fis_add(VALUE self, VALUE rfi)
{
    FieldInfos *fis = (FieldInfos *)DATA_PTR(self);
    FieldInfo *fi = (FieldInfo *)frb_rb_data_ptr(rfi);
    fis_add_field(fis, fi);
    REF(fi);
    return self;
}
add_field(name, properties = {} → fis click to toggle source

Add a new field to the FieldInfos object. See FieldInfo for a description of the available properties.

static VALUE
frb_fis_add_field(int argc, VALUE *argv, VALUE self)
{
    FieldInfos *fis = (FieldInfos *)DATA_PTR(self);
    FieldInfo *fi;
    StoreValue store = fis->store;
    IndexValue index = fis->index;
    TermVectorValue term_vector = fis->term_vector;
    float boost = 1.0f;
    VALUE rname, roptions;

    rb_scan_args(argc, argv, "11", &rname, &roptions);
    if (argc > 1) {
        frb_fi_get_params(roptions, &store, &index, &term_vector, &boost);
    }
    fi = fi_new(frb_field(rname), store, index, term_vector);
    fi->boost = boost;
    fis_add_field(fis, fi);
    return self;
}
create_index(dir) → self click to toggle source

Create a new index in the directory specified. The directory dir can either be a string path representing a directory on the file-system or an actual directory object. Care should be taken when using this method. Any existing index (or other files for that matter) will be deleted from the directory and overwritten by the new index.

static VALUE
frb_fis_create_index(VALUE self, VALUE rdir)
{
    FieldInfos *fis = (FieldInfos *)DATA_PTR(self);
    Store *store = NULL;
    if (TYPE(rdir) == T_DATA) {
        store = DATA_PTR(rdir);
        REF(store);
    } else {
        StringValue(rdir);
        frb_create_dir(rdir);
        store = open_fs_store(rs2s(rdir));
    }
    index_create(store, fis);
    store_deref(store);
    return self;
}
each {|fi| do_something } → fis click to toggle source

Iterate through the FieldInfo objects.

static VALUE
frb_fis_each(VALUE self)
{
    int i;
    FieldInfos *fis = (FieldInfos *)DATA_PTR(self);

    for (i = 0; i < fis->size; i++) {
        rb_yield(frb_get_field_info(fis->fields[i]));
    }
    return self;
}
fields → symbol array click to toggle source
field_names → symbol array

Return a list of the field names (as symbols) of all the fields in the index.

static VALUE
frb_fis_get_fields(VALUE self)
{
    FieldInfos *fis = (FieldInfos *)DATA_PTR(self);
    VALUE rfield_names = rb_ary_new();
    int i;
    for (i = 0; i < fis->size; i++) {
        rb_ary_push(rfield_names, FSYM2SYM(fis->fields[i]->name));
    }
    return rfield_names;
}
size → int click to toggle source

Return the number of fields in the FieldInfos object.

static VALUE
frb_fis_size(VALUE self)
{
    FieldInfos *fis = (FieldInfos *)DATA_PTR(self);
    return INT2FIX(fis->size);
}
to_a → array click to toggle source

Return an array of the FieldInfo objects contained but this FieldInfos object.

static VALUE
frb_fis_to_a(VALUE self)
{
    FieldInfos *fis = (FieldInfos *)DATA_PTR(self);
    VALUE rary = rb_ary_new();
    int i;

    for (i = 0; i < fis->size; i++) {
        rb_ary_push(rary, frb_get_field_info(fis->fields[i]));
    }
    return rary;
}
to_s → string click to toggle source

Return a string representation of the FieldInfos object.

static VALUE
frb_fis_to_s(VALUE self)
{
    FieldInfos *fis = (FieldInfos *)DATA_PTR(self);
    char *fis_s = fis_to_s(fis);
    VALUE rfis_s = rb_str_new2(fis_s);
    free(fis_s);
    return rfis_s;
}
tokenized_fields → symbol array click to toggle source

Return a list of the field names (as symbols) of all the tokenized fields in the index.

static VALUE
frb_fis_get_tk_fields(VALUE self)
{
    FieldInfos *fis = (FieldInfos *)DATA_PTR(self);
    VALUE rfield_names = rb_ary_new();
    int i;
    for (i = 0; i < fis->size; i++) {
        if (!fi_is_tokenized(fis->fields[i])) continue;
        rb_ary_push(rfield_names, FSYM2SYM(fis->fields[i]->name));
    }
    return rfield_names;
}