class MARC::Reader

Public Class Methods

decode(marc, params={}) click to toggle source

A static method for turning raw MARC data in transission format into a MARC::Record object.

# File lib/marc/reader.rb, line 73
def self.decode(marc, params={})
  record = Record.new()
  record.leader = marc[0..LEADER_LENGTH-1]

  # where the field data starts
  base_address = record.leader[12..16].to_i

  # get the byte offsets from the record directory
  directory = marc[LEADER_LENGTH..base_address-1]

  throw "invalid directory in record" if directory == nil

  # the number of fields in the record corresponds to 
  # how many directory entries there are
  num_fields = directory.length / DIRECTORY_ENTRY_LENGTH

  # when operating in forgiving mode we just split on end of
  # field instead of using calculated byte offsets from the 
  # directory
  if params[:forgiving]
    all_fields = marc[base_address..-1].split(END_OF_FIELD)
  else
    mba =  marc.bytes.to_a
  end

  0.upto(num_fields-1) do |field_num|

    # pull the directory entry for a field out
    entry_start = field_num * DIRECTORY_ENTRY_LENGTH
    entry_end = entry_start + DIRECTORY_ENTRY_LENGTH
    entry = directory[entry_start..entry_end]
    
    # extract the tag
    tag = entry[0..2]

    # get the actual field data
    # if we were told to be forgiving we just use the
    # next available chuck of field data that we 
    # split apart based on the END_OF_FIELD
    field_data = ''
    if params[:forgiving]
      field_data = all_fields.shift()

    # otherwise we actually use the byte offsets in 
    # directory to figure out what field data to extract
    else
      length = entry[3..6].to_i
      offset = entry[7..11].to_i
      field_start = base_address + offset
      field_end = field_start + length - 1
      field_data = mba[field_start..field_end].pack("c*")
    end

    # remove end of field
    field_data.delete!(END_OF_FIELD)
     
    # add a control field or data field
    if MARC::ControlField.control_tag?(tag)
      record.append(MARC::ControlField.new(tag,field_data))
    else
      field = MARC::DataField.new(tag)

      # get all subfields
      subfields = field_data.split(SUBFIELD_INDICATOR)

      # must have at least 2 elements (indicators, and 1 subfield)
      # TODO some sort of logging?
      next if subfields.length() < 2

      # get indicators
      indicators = subfields.shift()
      field.indicator1 = indicators[0,1]
      field.indicator2 = indicators[1,1]

      # add each subfield to the field
      subfields.each() do |data|
        subfield = MARC::Subfield.new(data[0,1],data[1..-1])
        field.append(subfield)
      end

      # add the field to the record
      record.append(field)
    end
  end

  return record
end
new(file) click to toggle source

The constructor which you may pass either a path

reader = MARC::Reader.new('marc.dat')

or, if it's more convenient a File object:

fh = File.new('marc.dat')
reader = MARC::Reader.new(fh)

or really any object that responds to read(n)

# marc is a string with a bunch of records in it
reader = MARC::Reader.new(StringIO.new(reader))

If your data have non-standard control fields in them (e.g., Aleph's 'FMT') you need to add them specifically to the MARC::ControlField.control_tags Set object

MARC::ControlField.control_tags << 'FMT'
# File lib/marc/reader.rb, line 26
def initialize(file)
  if file.is_a?(String)
    @handle = File.new(file)
  elsif file.respond_to?("read", 5)
    @handle = file
  else
    throw "must pass in path or file"
  end
end

Public Instance Methods

each() { |record| ... } click to toggle source

to support iteration:

for record in reader
  print record
end

and even searching:

record.find { |f| f['245'] =~ %rHuckleberry/ }
# File lib/marc/reader.rb, line 44
def each 
  # while there is data left in the file
  while rec_length_s = @handle.read(5)
    # make sure the record length looks like an integer
    rec_length_i = rec_length_s.to_i
    if rec_length_i == 0
      raise MARC::Exception.new("invalid record length: #{rec_length_s}")
    end

    # get the raw MARC21 for a record back from the file
    # using the record length
    raw = rec_length_s + @handle.read(rec_length_i-5)
    
    # Ruby 1.9 will try to set the encoding to ASCII-8BIT, which we don't want.
    # Not entirely sure what happens for MARC-8 encoded records, but, technically,
    # ruby-marc doesn't support MARC-8, anyway.
    raw.force_encoding('utf-8') if raw.respond_to?(:force_encoding)

    # create a record from the data and return it
    #record = MARC::Record.new_from_marc(raw)
    record = MARC::Reader.decode(raw)
    yield record 
  end
end