class POSIX_REGULAR_EXPRESSION_BUILDER

Features exported to POSIX_REGULAR_EXPRESSION_BUILDER

Parses POSIX regular expressions and build its matchable form

regular-expression ::= alternative alternative ::= sequence [ '|' sequence ]... sequence ::= term [ term ]... term ::= factor [ repeat-spec ] repeat-spec ::= '?' | '*' | '+' | '{' integer [',' [integer]] '}' factor ::= group | union | '.' | '^' | '$' | escaped | text group ::= '(' alternative ')' union ::= '[' union ']' union ::= '[' ['^'] union_term... ']' union_term ::= union_factor ['-' union_factor] union_factor::= '[.' TEXT '.]' | '[:' CLASS ':]' | '[:<:]' | '[:>:]' | CHARACTER escaped ::= '\' CHARACTER text ::= A SEQUENCE NOT FOLLOWED BY EITHER '*', '+', '?', '{' OF NOT ESCAPED CHARACTERS

Direct parents

conformant parents

BACKTRACKING_REGULAR_EXPRESSION_BUILDER

Known children

conformant children

PERL5_REGULAR_EXPRESSION_BUILDER

Summary

creation features

exported features

parsing

parsing

sub parts of union

make

behaviours

parsing

results

make

basic

error managment

scanning

assertions

character classes

character class naming

and/or basics

Details

internal_parse

Main parse of a POSIX regular expression.

require

  • at_first_position: position = expression.lower

ensure

    parse_alternative

    Parses an alternative of sequences. alternative ::= sequence [ '|' sequence ]...

    require

    • has_no_error: not has_error
    • not_at_end: not end_of_input

    ensure

    • state_known: has_error or else end_of_input or else last_character = ')'

    parse_sequence

    Parses a sequence of terms. sequence ::= term [ term ]...

    require

    • has_no_error: not has_error
    • not_at_end: not end_of_input
    • end_excluded: last_character /= '|' and then last_character /= ')'

    ensure

    • state_known: has_error or else end_of_input or else last_character = '|' or else last_character = ')'

    parse_term

    Parses a term. term ::= factor [ repeat-spec ] repeat-spec ::= '?' | '*' | '+' | '{' integer [',' [integer]] '}'

    require

    • has_no_error: not has_error
    • not_at_end: not end_of_input
    • end_excluded: last_character /= '|' and then last_character /= ')'

    ensure

      parse_factor

      Parses a factor. factor ::= group | union | '.' | '^' | '$' | escaped | text

      require

      • has_no_error: not has_error
      • not_at_end: not end_of_input
      • end_excluded: last_character /= '|' and then last_character /= ')'

      ensure

        parse_group

        Parses a group. group ::= '(' alternative ')'

        require

        • has_no_error: not has_error
        • not_at_end: not end_of_input
        • begin_with_open_parenthesis: last_character = '('

        ensure

          parse_escaped

          Parses an escaped character. escaped ::= '\' CHARACTER

          require

          • has_no_error: not has_error
          • not_at_end: not end_of_input
          • begin_with_escape: last_character = '\'

          ensure

            parse_text

            Parses a text. text ::= A SEQUENCE NOT FOLLOWED BY EITHER '*', '+', '?', '{' OF NOT ESCAPED CHARACTERS

            require

            • has_no_error: not has_error
            • not_at_end: not end_of_input
            • current_character_is_valid: valid_last_character and then not (once "([.^$\*+?{").has(last_character)

            ensure

              parse_union

              Parses a union. union ::= '[' ['^'] union_term... ']'

              require

              • has_no_error: not has_error
              • not_at_end: not end_of_input
              • begin_with_open_bracket: last_character = '['

              ensure

                parse_union_term

                Parses a union term. union_term ::= union_factor ['-' union_factor]

                require

                • has_no_error: not has_error
                • not_at_end: not end_of_input

                ensure

                  parse_union_factor

                  Parses a union factor. union_factor::= '[.' TEXT '.]' | '[:' CLASS ':]' | '[:<:]' | '[:>:]' | CHARACTER

                  require

                  • has_no_error: not has_error
                  • not_at_end: not end_of_input

                  read_embedded

                  Parses the text embedded in one of '[.' TEXT '.]', '[:' TEXT ':]' or '[=' TEXT '=]'. The parsed text is put in feature 'last_string'.

                  require

                  • has_no_error: not has_error
                  • not_at_end: not end_of_input
                  • previous_character_is_open_brace: valid_previous_character and then previous_character = '['
                  • current_character_is_valid: valid_last_character and then (once ".:=").has(last_character)

                  recorded_character: CHARACTER

                  Last union_factor's character recorded.

                  recorded_item: BACKTRACKING_NODE

                  Last union_factor's item (complex expression) recorded.

                  set_recorded_character (value: CHARACTER)

                  Records the union_factor's character 'value'.

                  ensure

                  • recorded_item = Void
                  • recorded_character = value

                  set_recorded_item (value: BACKTRACKING_NODE)

                  Records the union_factor's item (complex expression) 'value'.

                  require

                  • item_not_void: value /= Void

                  ensure

                  • recorded_item /= Void
                  • recorded_item = value

                  emit_recorded

                  Emits the last union_factor's recorded character or item, depending of its kind.

                  ensure

                    make

                    Initialise the attributes.

                    is_case_insensitive: BOOLEAN

                    Is the match case insensitive? Default is False

                    is_case_sensitive: BOOLEAN

                    Is the match case sensitive? Default is True

                    set_case_sensitive

                    Set the match as case sensitive.

                    ensure

                    • definition: is_case_insensitive = False and is_case_sensitive = True

                    set_case_insensitive

                    Set the match as case insensitive.

                    ensure

                    • definition: is_case_insensitive = True and is_case_sensitive = False

                    does_any_match_newline: BOOLEAN

                    Does the "any character" mark match a newline? Default is False

                    set_any_match_newline

                    The "any character" mark will match a newline.

                    ensure

                    • definition: does_any_match_newline = True

                    set_any_dont_match_newline

                    The "any character" mark will not match a newline.

                    ensure

                    • definition: does_any_match_newline = False

                    does_match_line_boundary: BOOLEAN

                    Does the begin/end marks match line boundary? Default is False

                    does_match_text_boundary: BOOLEAN

                    Does the begin/end marks match text boundary? Default is True

                    ensure

                    • definition: Result = not does_match_line_boundary

                    set_match_line_boundary

                    The begin/end marks will match line boundary.

                    ensure

                    • definition: does_match_line_boundary = True and does_match_text_boundary = False

                    set_match_text_boundary

                    The begin/end marks will match text boundary.

                    ensure

                    • definition: does_match_line_boundary = False and does_match_text_boundary = True

                    set_default_options

                    Set the default options

                    ensure

                    • is_case_sensitive
                    • not does_any_match_newline
                    • does_match_text_boundary

                    parse_expression (expr: STRING)

                    Set the expression to parse and parse it. When no error the result if put in feature 'last_regular_expression'. If there is an error, a human readable explanation is retrievable by the feature 'last_error'.

                    require

                    • expression_not_void: expr /= Void

                    ensure

                    • error_or_result: has_error xor has_result

                    parse

                    Parse the current expression. The result if any is got through 'last_regular_expression'

                    require

                    • expression_not_void: expression /= Void

                    ensure

                    • error_or_result: has_error xor has_result

                    has_result: BOOLEAN

                    Did the last 'parse' or 'parse_expression' produced a result in 'last_regular_expression'?

                    ensure

                    • definition: Result = last_pattern.is_valid

                    last_pattern: BACKTRACKING_REGULAR_EXPRESSION_PATTERN

                    The last regular expression pattern built by 'parse' or 'parse_expression'

                    make

                    Initialise the attributes.

                    scanned_string: STRING

                    The expression being currently build.

                    set_scanned_string (string: STRING)

                    Set the 'scanned_string' with 'string'.

                    ensure

                    • has_no_error: not has_error
                    • definition: scanned_string = string
                    • at_the_begin: position = scanned_string.lower

                    has_error: BOOLEAN

                    True when an error was encountered

                    clear_error

                    Remove the error flag

                    ensure

                    • has_no_error: not has_error

                    last_error: STRING

                    Returns a string recorded for the error.

                    require

                    • has_error: has_error

                    ensure

                    • not_void: Result /= Void

                    set_error (message: STRING)

                    Set has_error and last_error. The explaining error string 'last_error' is created as follow: "Error at position 'position': 'message'.".

                    require

                    • message_not_void: message /= Void
                    • has_no_error: not has_error

                    ensure

                    • has_error: has_error

                    position: INTEGER

                    The scanned position. It is the position of 'last_character'.

                    last_character: CHARACTER

                    The scanned character. The last character readden from 'scanned_string'.

                    valid_last_character: BOOLEAN

                    True when 'last_character' is valid. Is like 'scanned_string.valid_index(position)'

                    valid_previous_character: BOOLEAN

                    True if the position-1 is a valid position.

                    require

                    • scanned_string /= Void

                    ensure

                    • definition: Result = scanned_string.valid_index(position - 1)

                    previous_character: CHARACTER

                    The character at position-1.

                    require

                    • valid_previous_character

                    ensure

                    • definition: Result = scanned_string.item(position - 1)

                    valid_next_character: BOOLEAN

                    True if the position+1 is a valid position.

                    require

                    • scanned_string /= Void

                    ensure

                    • definition: Result = scanned_string.valid_index(position + 1)

                    next_character: CHARACTER

                    The character at position+1.

                    require

                    • valid_next_character

                    ensure

                    • definition: Result = scanned_string.item(position + 1)

                    end_of_input: BOOLEAN

                    True when all the characters of 'scanned_string' are scanned.

                    ensure

                    • implies_last_character_not_valid: Result implies not valid_last_character

                    goto_position (pos: INTEGER)

                    Change the currently scanned position to 'pos'. Updates 'last_character' and 'valid_last_character' to reflect the new position value.

                    require

                    • has_no_error: not has_error
                    • scanned_string /= Void

                    ensure

                    • has_no_error: not has_error
                    • position_set: position = pos
                    • validity_updated: valid_last_character = scanned_string.valid_index(position)
                    • character_updated: valid_last_character implies last_character = scanned_string.item(position)

                    read_character

                    Reads the next character.

                    require

                    • has_no_error: not has_error
                    • not_at_end: not end_of_input

                    ensure

                    • next_position: position > old position
                    • has_no_error: not has_error

                    read_integer

                    Reads an integer value beginning at the currently scanned position. The readen value is stored in 'last_integer'.

                    require

                    • has_no_error: not has_error
                    • not_at_end: not end_of_input
                    • begin_with_a_digit: last_character.is_decimal_digit

                    ensure

                    • has_no_error: not has_error
                    • digits_eaten: end_of_input or else not last_character.is_decimal_digit

                    saved_position: INTEGER

                    The saved position (only one is currently enougth).

                    save_position

                    Saves the current scanning position.

                    require

                    • not_at_end: not end_of_input

                    ensure

                    • not_at_end: not end_of_input
                    • position_kept: position = old position
                    • saved_position_set: saved_position = position

                    restore_saved_position

                    Restore the scanning position to the last saved one.

                    ensure

                    • position_restored: position = old saved_position
                    • not_at_end: not end_of_input

                    last_string: STRING

                    A string buffer.

                    last_integer: INTEGER

                    An integer buffer.

                    the_any_character_item: REGULAR_EXPRESSION_ITEM_ANY
                    the_not_end_of_line_item: REGULAR_EXPRESSION_ITEM_NOT_END_OF_LINE
                    the_begin_of_line_item: REGULAR_EXPRESSION_ITEM_BEGIN_OF_LINE
                    the_end_of_line_item: REGULAR_EXPRESSION_ITEM_END_OF_LINE
                    the_begin_of_text_item: REGULAR_EXPRESSION_ITEM_BEGIN_OF_TEXT
                    the_real_end_of_text_item: REGULAR_EXPRESSION_ITEM_END_OF_TEXT
                    the_end_of_text_item: REGULAR_EXPRESSION_ITEM_END_OF_TEXT
                    the_begin_of_word_item: REGULAR_EXPRESSION_ITEM_BEGIN_OF_WORD
                    the_end_of_word_item: REGULAR_EXPRESSION_ITEM_END_OF_WORD
                    the_is_posix_alnum_item: REGULAR_EXPRESSION_ITEM_IS_POSIX_ALNUM
                    the_is_posix_alpha_item: REGULAR_EXPRESSION_ITEM_IS_POSIX_ALPHA
                    the_is_posix_ascii_item: REGULAR_EXPRESSION_ITEM_IS_POSIX_ASCII
                    the_is_posix_blank_item: REGULAR_EXPRESSION_ITEM_IS_POSIX_BLANK
                    the_is_posix_cntrl_item: REGULAR_EXPRESSION_ITEM_IS_POSIX_CNTRL
                    the_is_posix_digit_item: REGULAR_EXPRESSION_ITEM_IS_POSIX_DIGIT
                    the_is_posix_graph_item: REGULAR_EXPRESSION_ITEM_IS_POSIX_GRAPH
                    the_is_posix_lower_item: REGULAR_EXPRESSION_ITEM_IS_POSIX_LOWER
                    the_is_posix_print_item: REGULAR_EXPRESSION_ITEM_IS_POSIX_PRINT
                    the_is_posix_punct_item: REGULAR_EXPRESSION_ITEM_IS_POSIX_PUNCT
                    the_is_posix_space_item: REGULAR_EXPRESSION_ITEM_IS_POSIX_SPACE
                    the_is_posix_upper_item: REGULAR_EXPRESSION_ITEM_IS_POSIX_UPPER
                    the_is_posix_word_item: REGULAR_EXPRESSION_ITEM_IS_POSIX_WORD
                    the_is_posix_xdigit_item: REGULAR_EXPRESSION_ITEM_IS_POSIX_XDIGIT
                    has_named_posix_item (name: STRING): BOOLEAN

                    True if 'name' is for a valid posix character class

                    require

                    • name_not_void: name /= Void

                    named_posix_item (name: STRING): REGULAR_EXPRESSION_ITEM

                    the item for the valid posix character class 'name'

                    require

                    • name_not_void: name /= Void
                    • good_name: has_named_posix_item(name)

                    ensure

                    • good_result: Result /= Void

                    has_named_perl_item (name: STRING): BOOLEAN

                    True if 'name' is for a valid perl character class

                    require

                    • name_not_void: name /= Void

                    named_perl_item (name: STRING): REGULAR_EXPRESSION_ITEM

                    the item for the valid perl character class 'name'

                    require

                    • name_not_void: name /= Void
                    • good_name: has_named_perl_item(name)

                    ensure

                    • good_result: Result /= Void

                    the_cut_node: BACKTRACKING_NODE_CUT
                    the_true_node: BACKTRACKING_NODE_TRUE
                    the_false_node: BACKTRACKING_NODE_FALSE
                    the_cut_and_false_node: BACKTRACKING_NODE_CUT_AND_FALSE

                    Class invariant