class JMESPath::Lexer
@api private
Constants
- NUMBERS
- SIMPLE_TOKENS
- STATE_AND
- STATE_EQ
- STATE_GT
- STATE_IDENTIFIER
- STATE_JSON_LITERAL
- STATE_LBRACKET
- STATE_LT
- STATE_NOT
- STATE_NUMBER
- STATE_PIPE
- STATE_QUOTED_STRING
- STATE_SINGLE_CHAR
- STATE_STRING_LITERAL
- STATE_WHITESPACE
- TRANSLATION_TABLE
- T_AND
- T_COLON
- T_COMMA
- T_COMPARATOR
- T_CURRENT
- T_DOT
- T_EOF
- T_EXPREF
- T_FILTER
- T_FLATTEN
- T_IDENTIFIER
- T_LBRACE
- T_LBRACKET
- T_LITERAL
- T_LPAREN
- T_NOT
- T_NUMBER
- T_OR
- T_PIPE
- T_QUOTED_IDENTIFIER
- T_RBRACE
- T_RBRACKET
- T_RPAREN
- T_STAR
- T_UNKNOWN
- VALID_IDENTIFIERS
Private Class Methods
Certain versions of Ruby and of the pure_json gem not support loading scalar JSON values, such a numbers, booleans, strings, etc. These simple values must be first wrapped inside a JSON object before calling `JSON.load`.
# works in most JSON versions, raises in some versions JSON.load("true") JSON.load("123") JSON.load("\"abc\"")
This is an known issue for:
-
Ruby 1.9.3 bundled v1.5.5 of json; Ruby 1.9.3 defaults to bundled version despite newer versions being available.
-
json_pure v2.0.0+
It is not possible to change the version of JSON loaded in the user's application. Adding an explicit dependency on json gem causes issues in environments that cannot compile the gem. We previously had a direct dependency on `json_pure`, but this broke with the v2 update.
This method allows us to detect how the `JSON.load` behaves so we know if we have to wrap scalar JSON values to parse them or not. @api private
# File lib/jmespath/lexer.rb, line 323 def self.requires_wrapping? begin JSON.load('false') rescue JSON::ParserError true end end
Public Instance Methods
@param [String<JMESPath>] expression @return [Array<Hash>]
# File lib/jmespath/lexer.rb, line 163 def tokenize(expression) tokens = [] chars = CharacterStream.new(expression.chars.to_a) while chars.current case TRANSLATION_TABLE[chars.current] when nil tokens << Token.new( T_UNKNOWN, chars.current, chars.position ) chars.next when STATE_SINGLE_CHAR # consume simple tokens like ".", ",", "@", etc. tokens << Token.new( SIMPLE_TOKENS[chars.current], chars.current, chars.position ) chars.next when STATE_IDENTIFIER start = chars.position buffer = [] begin buffer << chars.current chars.next end while VALID_IDENTIFIERS.include?(chars.current) tokens << Token.new( T_IDENTIFIER, buffer.join, start ) when STATE_WHITESPACE # skip whitespace chars.next when STATE_LBRACKET # consume "[", "[?" and "[]" position = chars.position actual = chars.next if actual == ']' chars.next tokens << Token.new(T_FLATTEN, '[]', position) elsif actual == '?' chars.next tokens << Token.new(T_FILTER, '[?', position) else tokens << Token.new(T_LBRACKET, '[', position) end when STATE_STRING_LITERAL # consume raw string literals t = inside(chars, "'", T_LITERAL) t.value = t.value.gsub("\\'", "'") tokens << t when STATE_PIPE # consume pipe and OR tokens << match_or(chars, '|', '|', T_OR, T_PIPE) when STATE_JSON_LITERAL # consume JSON literals token = inside(chars, '`', T_LITERAL) if token.type == T_LITERAL token.value = token.value.gsub('\\`', '`') token = parse_json(token) end tokens << token when STATE_NUMBER start = chars.position buffer = [] begin buffer << chars.current chars.next end while NUMBERS.include?(chars.current) tokens << Token.new( T_NUMBER, buffer.join.to_i, start ) when STATE_QUOTED_STRING # consume quoted identifiers token = inside(chars, '"', T_QUOTED_IDENTIFIER) if token.type == T_QUOTED_IDENTIFIER token.value = "\"#{token.value}\"" token = parse_json(token, true) end tokens << token when STATE_EQ # consume equals tokens << match_or(chars, '=', '=', T_COMPARATOR, T_UNKNOWN) when STATE_AND tokens << match_or(chars, '&', '&', T_AND, T_EXPREF) when STATE_NOT # consume not equals tokens << match_or(chars, '!', '=', T_COMPARATOR, T_NOT); else # either '<' or '>' # consume less than and greater than tokens << match_or(chars, chars.current, '=', T_COMPARATOR, T_COMPARATOR) end end tokens << Token.new(T_EOF, nil, chars.position) tokens end
Private Instance Methods
# File lib/jmespath/lexer.rb, line 278 def inside(chars, delim, type) position = chars.position current = chars.next buffer = [] while current != delim if current == '\\' buffer << current current = chars.next end if current.nil? # unclosed delimiter return Token.new(T_UNKNOWN, buffer.join, position) end buffer << current current = chars.next end chars.next Token.new(type, buffer.join, position) end
# File lib/jmespath/lexer.rb, line 269 def match_or(chars, current, expected, type, or_type) if chars.next == expected chars.next Token.new(type, current + expected, chars.position - 1) else Token.new(or_type, current, chars.position - 1) end end
# File lib/jmespath/lexer.rb, line 332 def parse_json(token, quoted = false) begin if quoted token.value = JSON.load("{\"value\":#{token.value}}")['value'] else begin token.value = JSON.load("{\"value\":#{token.value}}")['value'] rescue token.value = JSON.load(sprintf('{"value":"%s"}', token.value.lstrip))['value'] end end rescue JSON::ParserError token.type = T_UNKNOWN end token end