001// License: GPL. For details, see LICENSE file.
002package org.openstreetmap.josm.data.osm.search;
003
004import static org.openstreetmap.josm.tools.I18n.marktr;
005import static org.openstreetmap.josm.tools.I18n.tr;
006
007import java.io.IOException;
008import java.io.Reader;
009import java.util.Arrays;
010import java.util.List;
011import java.util.Objects;
012
013import org.openstreetmap.josm.tools.JosmRuntimeException;
014
015/**
016 * This class is used to parse a search string and split it into tokens.
017 * It provides methods to parse numbers and extract strings.
018 * @since 12656 (moved from actions.search package)
019 */
020public class PushbackTokenizer {
021
022    /**
023     * A range of long numbers. Immutable
024     */
025    public static class Range {
026        private final long start;
027        private final long end;
028
029        /**
030         * Create a new range
031         * @param start The start
032         * @param end The end (inclusive)
033         */
034        public Range(long start, long end) {
035            this.start = start;
036            this.end = end;
037        }
038
039        /**
040         * @return The start
041         */
042        public long getStart() {
043            return start;
044        }
045
046        /**
047         * @return The end (inclusive)
048         */
049        public long getEnd() {
050            return end;
051        }
052
053        @Override
054        public String toString() {
055            return "Range [start=" + start + ", end=" + end + ']';
056        }
057    }
058
059    private final Reader search;
060
061    private Token currentToken;
062    private String currentText;
063    private Long currentNumber;
064    private Long currentRange;
065    private int c;
066    private boolean isRange;
067
068    /**
069     * Creates a new {@link PushbackTokenizer}
070     * @param search The search string reader to read the tokens from
071     */
072    public PushbackTokenizer(Reader search) {
073        this.search = search;
074        getChar();
075    }
076
077    /**
078     * The token types that may be read
079     */
080    public enum Token {
081        /**
082         * Not token (-)
083         */
084        NOT(marktr("<not>")),
085        /**
086         * Or token (or) (|)
087         */
088        OR(marktr("<or>")),
089        /**
090         * Xor token (xor) (^)
091         */
092        XOR(marktr("<xor>")),
093        /**
094         * opening parentheses token (
095         */
096        LEFT_PARENT(marktr("<left parent>")),
097        /**
098         * closing parentheses token )
099         */
100        RIGHT_PARENT(marktr("<right parent>")),
101        /**
102         * Colon :
103         */
104        COLON(marktr("<colon>")),
105        /**
106         * The equals sign (=)
107         */
108        EQUALS(marktr("<equals>")),
109        /**
110         * A text
111         */
112        KEY(marktr("<key>")),
113        /**
114         * A question mark (?)
115         */
116        QUESTION_MARK(marktr("<question mark>")),
117        /**
118         * Marks the end of the input
119         */
120        EOF(marktr("<end-of-file>")),
121        /**
122         * Less than sign (&lt;)
123         */
124        LESS_THAN("<less-than>"),
125        /**
126         * Greater than sign (&gt;)
127         */
128        GREATER_THAN("<greater-than>");
129
130        Token(String name) {
131            this.name = name;
132        }
133
134        private final String name;
135
136        @Override
137        public String toString() {
138            return tr(name);
139        }
140    }
141
142    private void getChar() {
143        try {
144            c = search.read();
145        } catch (IOException e) {
146            throw new JosmRuntimeException(e.getMessage(), e);
147        }
148    }
149
150    private static final List<Character> SPECIAL_CHARS = Arrays.asList('"', ':', '(', ')', '|', '^', '=', '?', '<', '>');
151    private static final List<Character> SPECIAL_CHARS_QUOTED = Arrays.asList('"');
152
153    private String getString(boolean quoted) {
154        List<Character> sChars = quoted ? SPECIAL_CHARS_QUOTED : SPECIAL_CHARS;
155        StringBuilder s = new StringBuilder();
156        boolean escape = false;
157        while (c != -1 && (escape || (!sChars.contains((char) c) && (quoted || !Character.isWhitespace(c))))) {
158            if (c == '\\' && !escape) {
159                escape = true;
160            } else {
161                s.append((char) c);
162                escape = false;
163            }
164            getChar();
165        }
166        return s.toString();
167    }
168
169    private String getString() {
170        return getString(false);
171    }
172
173    /**
174     * The token returned is <code>null</code> or starts with an identifier character:
175     * - for an '-'. This will be the only character
176     * : for an key. The value is the next token
177     * | for "OR"
178     * ^ for "XOR"
179     * ' ' for anything else.
180     * @return The next token in the stream.
181     */
182    public Token nextToken() {
183        if (currentToken != null) {
184            Token result = currentToken;
185            currentToken = null;
186            return result;
187        }
188
189        while (Character.isWhitespace(c)) {
190            getChar();
191        }
192        switch (c) {
193        case -1:
194            getChar();
195            return Token.EOF;
196        case ':':
197            getChar();
198            return Token.COLON;
199        case '=':
200            getChar();
201            return Token.EQUALS;
202        case '<':
203            getChar();
204            return Token.LESS_THAN;
205        case '>':
206            getChar();
207            return Token.GREATER_THAN;
208        case '(':
209            getChar();
210            return Token.LEFT_PARENT;
211        case ')':
212            getChar();
213            return Token.RIGHT_PARENT;
214        case '|':
215            getChar();
216            return Token.OR;
217        case '^':
218            getChar();
219            return Token.XOR;
220        case '&':
221            getChar();
222            return nextToken();
223        case '?':
224            getChar();
225            return Token.QUESTION_MARK;
226        case '"':
227            getChar();
228            currentText = getString(true);
229            getChar();
230            return Token.KEY;
231        default:
232            String prefix = "";
233            if (c == '-') {
234                getChar();
235                if (!Character.isDigit(c))
236                    return Token.NOT;
237                prefix = "-";
238            }
239            currentText = prefix + getString();
240            if ("or".equalsIgnoreCase(currentText))
241                return Token.OR;
242            else if ("xor".equalsIgnoreCase(currentText))
243                return Token.XOR;
244            else if ("and".equalsIgnoreCase(currentText))
245                return nextToken();
246            // try parsing number
247            try {
248                currentNumber = Long.valueOf(currentText);
249            } catch (NumberFormatException e) {
250                currentNumber = null;
251            }
252            // if text contains "-", try parsing a range
253            int pos = currentText.indexOf('-', 1);
254            isRange = pos > 0;
255            if (isRange) {
256                try {
257                    currentNumber = Long.valueOf(currentText.substring(0, pos));
258                } catch (NumberFormatException e) {
259                    currentNumber = null;
260                }
261                try {
262                    currentRange = Long.valueOf(currentText.substring(pos + 1));
263                } catch (NumberFormatException e) {
264                    currentRange = null;
265                    }
266                } else {
267                    currentRange = null;
268                }
269            return Token.KEY;
270        }
271    }
272
273    /**
274     * Reads the next token if it is equal to the given, suggested token
275     * @param token The token the next one should be equal to
276     * @return <code>true</code> if it has been read
277     */
278    public boolean readIfEqual(Token token) {
279        Token nextTok = nextToken();
280        if (Objects.equals(nextTok, token))
281            return true;
282        currentToken = nextTok;
283        return false;
284    }
285
286    /**
287     * Reads the next token. If it is a text, return that text. If not, advance
288     * @return the text or <code>null</code> if the reader was advanced
289     */
290    public String readTextOrNumber() {
291        Token nextTok = nextToken();
292        if (nextTok == Token.KEY)
293            return currentText;
294        currentToken = nextTok;
295        return null;
296    }
297
298    /**
299     * Reads a number
300     * @param errorMessage The error if the number cannot be read
301     * @return The number that was found
302     * @throws SearchParseError if there is no number
303     */
304    public long readNumber(String errorMessage) throws SearchParseError {
305        if ((nextToken() == Token.KEY) && (currentNumber != null))
306            return currentNumber;
307        else
308            throw new SearchParseError(errorMessage);
309    }
310
311    /**
312     * Gets the last number that was read
313     * @return The last number
314     */
315    public long getReadNumber() {
316        return (currentNumber != null) ? currentNumber : 0;
317    }
318
319    /**
320     * Reads a range of numbers
321     * @param errorMessage The error if the input is malformed
322     * @return The range that was found
323     * @throws SearchParseError If the input is not as expected for a range
324     */
325    public Range readRange(String errorMessage) throws SearchParseError {
326        if (nextToken() != Token.KEY || (currentNumber == null && currentRange == null)) {
327            throw new SearchParseError(errorMessage);
328        } else if (!isRange && currentNumber != null) {
329            if (currentNumber >= 0) {
330                return new Range(currentNumber, currentNumber);
331            } else {
332                return new Range(0, Math.abs(currentNumber));
333            }
334        } else if (isRange && currentRange == null) {
335            return new Range(currentNumber, Long.MAX_VALUE);
336        } else if (currentNumber != null && currentRange != null) {
337            return new Range(currentNumber, currentRange);
338        } else {
339            throw new SearchParseError(errorMessage);
340        }
341    }
342
343    /**
344     * Gets the last text that was found
345     * @return The text
346     */
347    public String getText() {
348        return currentText;
349    }
350}