class JMESPath::Lexer

@api private

Constants

NUMBERS
SIMPLE_TOKENS
STATE_AND
STATE_EQ
STATE_GT
STATE_IDENTIFIER
STATE_JSON_LITERAL
STATE_LBRACKET
STATE_LT
STATE_NOT
STATE_NUMBER
STATE_PIPE
STATE_QUOTED_STRING
STATE_SINGLE_CHAR
STATE_STRING_LITERAL
STATE_WHITESPACE
TRANSLATION_TABLE
T_AND
T_COLON
T_COMMA
T_COMPARATOR
T_CURRENT
T_DOT
T_EOF
T_EXPREF
T_FILTER
T_FLATTEN
T_IDENTIFIER
T_LBRACE
T_LBRACKET
T_LITERAL
T_LPAREN
T_NOT
T_NUMBER
T_OR
T_PIPE
T_QUOTED_IDENTIFIER
T_RBRACE
T_RBRACKET
T_RPAREN
T_STAR
T_UNKNOWN
VALID_IDENTIFIERS

Private Class Methods

requires_wrapping?() click to toggle source

Certain versions of Ruby and of the pure_json gem not support loading scalar JSON values, such a numbers, booleans, strings, etc. These simple values must be first wrapped inside a JSON object before calling `JSON.load`.

# works in most JSON versions, raises in some versions
JSON.load("true")
JSON.load("123")
JSON.load("\"abc\"")

This is an known issue for:

  • Ruby 1.9.3 bundled v1.5.5 of json; Ruby 1.9.3 defaults to bundled version despite newer versions being available.

  • json_pure v2.0.0+

It is not possible to change the version of JSON loaded in the user's application. Adding an explicit dependency on json gem causes issues in environments that cannot compile the gem. We previously had a direct dependency on `json_pure`, but this broke with the v2 update.

This method allows us to detect how the `JSON.load` behaves so we know if we have to wrap scalar JSON values to parse them or not. @api private

# File lib/jmespath/lexer.rb, line 323
def self.requires_wrapping?
  begin
    JSON.load('false')
  rescue JSON::ParserError
    true
  end
end

Public Instance Methods

tokenize(expression) click to toggle source

@param [String<JMESPath>] expression @return [Array<Hash>]

# File lib/jmespath/lexer.rb, line 163
def tokenize(expression)

  tokens = []
  chars = CharacterStream.new(expression.chars.to_a)

  while chars.current
    case TRANSLATION_TABLE[chars.current]
    when nil
      tokens << Token.new(
        T_UNKNOWN,
        chars.current,
        chars.position
      )
      chars.next
    when STATE_SINGLE_CHAR
      # consume simple tokens like ".", ",", "@", etc.
      tokens << Token.new(
        SIMPLE_TOKENS[chars.current],
        chars.current,
        chars.position
      )
      chars.next
    when STATE_IDENTIFIER
      start = chars.position
      buffer = []
      begin
        buffer << chars.current
        chars.next
      end while VALID_IDENTIFIERS.include?(chars.current)
      tokens << Token.new(
        T_IDENTIFIER,
        buffer.join,
        start
      )
    when STATE_WHITESPACE
      # skip whitespace
      chars.next
    when STATE_LBRACKET
      # consume "[", "[?" and "[]"
      position = chars.position
      actual = chars.next
      if actual == ']'
        chars.next
        tokens << Token.new(T_FLATTEN, '[]', position)
      elsif actual == '?'
        chars.next
        tokens << Token.new(T_FILTER, '[?', position)
      else
        tokens << Token.new(T_LBRACKET, '[',  position)
      end
    when STATE_STRING_LITERAL
      # consume raw string literals
      t = inside(chars, "'", T_LITERAL)
      t.value = t.value.gsub("\\'", "'")
      tokens << t
    when STATE_PIPE
      # consume pipe and OR
      tokens << match_or(chars, '|', '|', T_OR, T_PIPE)
    when STATE_JSON_LITERAL
      # consume JSON literals
      token = inside(chars, '`', T_LITERAL)
      if token.type == T_LITERAL
        token.value = token.value.gsub('\\`', '`')
        token = parse_json(token)
      end
      tokens << token
    when STATE_NUMBER
      start = chars.position
      buffer = []
      begin
        buffer << chars.current
        chars.next
      end while NUMBERS.include?(chars.current)
      tokens << Token.new(
        T_NUMBER,
        buffer.join.to_i,
        start
      )
    when STATE_QUOTED_STRING
      # consume quoted identifiers
      token = inside(chars, '"', T_QUOTED_IDENTIFIER)
      if token.type == T_QUOTED_IDENTIFIER
        token.value = "\"#{token.value}\""
        token = parse_json(token, true)
      end
      tokens << token
    when STATE_EQ
      # consume equals
      tokens << match_or(chars, '=', '=', T_COMPARATOR, T_UNKNOWN)
    when STATE_AND
      tokens << match_or(chars, '&', '&', T_AND, T_EXPREF)
    when STATE_NOT
      # consume not equals
      tokens << match_or(chars, '!', '=', T_COMPARATOR, T_NOT);
    else
      # either '<' or '>'
      # consume less than and greater than
      tokens << match_or(chars, chars.current, '=', T_COMPARATOR, T_COMPARATOR)
    end
  end
  tokens << Token.new(T_EOF, nil, chars.position)
  tokens
end

Private Instance Methods

inside(chars, delim, type) click to toggle source
# File lib/jmespath/lexer.rb, line 278
def inside(chars, delim, type)
  position = chars.position
  current = chars.next
  buffer = []
  while current != delim
    if current == '\\'
      buffer << current
      current = chars.next
    end
    if current.nil?
      # unclosed delimiter
      return Token.new(T_UNKNOWN, buffer.join, position)
    end
    buffer << current
    current = chars.next
  end
  chars.next
  Token.new(type, buffer.join, position)
end
match_or(chars, current, expected, type, or_type) click to toggle source
# File lib/jmespath/lexer.rb, line 269
def match_or(chars, current, expected, type, or_type)
  if chars.next == expected
    chars.next
    Token.new(type, current + expected, chars.position - 1)
  else
    Token.new(or_type, current, chars.position - 1)
  end
end
parse_json(token, quoted = false) click to toggle source
# File lib/jmespath/lexer.rb, line 332
def parse_json(token, quoted = false)
  begin
    if quoted
      token.value = JSON.load("{\"value\":#{token.value}}")['value']
    else
      begin
        token.value = JSON.load("{\"value\":#{token.value}}")['value']
      rescue
        token.value = JSON.load(sprintf('{"value":"%s"}', token.value.lstrip))['value']
      end
    end
  rescue JSON::ParserError
    token.type = T_UNKNOWN
  end
  token
end