class String

Public Instance Methods

ascii() click to toggle source
# File lib/sup/util.rb, line 316
def ascii
  out = ""
  each_byte do |b|
    if (b & 128) != 0
      out << "\\x#{b.to_s 16}"
    else
      out << b.chr
    end
  end
  out.force_encoding Encoding::UTF_8 if out.respond_to? :force_encoding
  out
end
camel_to_hyphy() click to toggle source
# File lib/sup/util.rb, line 187
def camel_to_hyphy
  self.gsub(/([a-z])([A-Z0-9])/, '\1-\2').downcase
end
check() click to toggle source
# File lib/sup/util.rb, line 307
def check
  begin
    fail "unexpected encoding #{encoding}" if respond_to?(:encoding) && !(encoding == Encoding::UTF_8 || encoding == Encoding::ASCII)
    fail "invalid encoding" if respond_to?(:valid_encoding?) && !valid_encoding?
  rescue
    raise CheckError.new($!.message)
  end
end
display_length() click to toggle source

nasty multibyte hack for ruby 1.8. if it's utf-8, split into chars using the utf8 regex and count those. otherwise, use the byte length.

# File lib/sup/util.rb, line 179
def display_length
  if RUBY_VERSION < '1.9.1' && ($encoding == "UTF-8" || $encoding == "utf8")
    scan(/./).size
  else
    size
  end
end
each(&b) click to toggle source
# File lib/sup/util.rb, line 294
def each &b
  each_line &b
end
find_all_positions(x) click to toggle source
# File lib/sup/util.rb, line 191
def find_all_positions x
  ret = []
  start = 0
  while start < length
    pos = index x, start
    break if pos.nil?
    ret << pos
    start = pos + 1
  end
  ret
end
normalize_whitespace() click to toggle source
# File lib/sup/util.rb, line 283
def normalize_whitespace
  gsub(/\t/, "    ").gsub(/\r/, "")
end
ord() click to toggle source
# File lib/sup/util.rb, line 288
def ord
  self[0]
end
split_on_commas() click to toggle source

a very complicated regex found on teh internets to split on commas, unless they occurr within double quotes.

# File lib/sup/util.rb, line 205
def split_on_commas
  split(/,\s*(?=(?:[^"]*"[^"]*")*(?![^"]*"))/)
end
split_on_commas_with_remainder() click to toggle source

ok, here we do it the hard way. got to have a remainder for purposes of tab-completing full email addresses

# File lib/sup/util.rb, line 211
def split_on_commas_with_remainder
  ret = []
  state = :outstring
  pos = 0
  region_start = 0
  while pos <= length
    newpos = case state
      when :escaped_instring, :escaped_outstring then pos
      else index(/[,"\]/, pos)
    end

    if newpos
      char = self[newpos]
    else
      char = nil
      newpos = length
    end

    case char
    when "
      state = case state
        when :outstring then :instring
        when :instring then :outstring
        when :escaped_instring then :instring
        when :escaped_outstring then :outstring
      end
    when ,, nil
      state = case state
        when :outstring, :escaped_outstring then
          ret << self[region_start ... newpos].gsub(/^\s+|\s+$/, "")
          region_start = newpos + 1
          :outstring
        when :instring then :instring
        when :escaped_instring then :instring
      end
    when \         state = case state
        when :instring then :escaped_instring
        when :outstring then :escaped_outstring
        when :escaped_instring then :instring
        when :escaped_outstring then :outstring
      end
    end
    pos = newpos + 1
  end

  remainder = case state
    when :instring
      self[region_start .. -1].gsub(/^\s+/, "")
    else
      nil
    end

  [ret, remainder]
end
to_set_of_symbols(split_on=nil;) click to toggle source

takes a list of words, and returns an array of symbols. typically used in Sup for translating Ferret's representation of a list of labels (a string) to an array of label symbols.

split_on will be passed to String#split, so you can leave this nil for space.

# File lib/sup/util.rb, line 304
def to_set_of_symbols split_on=nil; Set.new split(split_on).map { |x| x.strip.intern } end
transcode(src_encoding=$encoding) click to toggle source
# File lib/sup/util.rb, line 329
def transcode src_encoding=$encoding
  Iconv.easy_decode $encoding, src_encoding, self
end
wrap(len) click to toggle source
# File lib/sup/util.rb, line 267
def wrap len
  ret = []
  s = self
  while s.length > len
    cut = s[0 ... len].rindex(/\s/)
    if cut
      ret << s[0 ... cut]
      s = s[(cut + 1) .. -1]
    else
      ret << s[0 ... len]
      s = s[len .. -1]
    end
  end
  ret << s
end