001/* RuleBasedCollator.java -- Concrete Collator Class
002   Copyright (C) 1998, 1999, 2000, 2001, 2003, 2004, 2005  Free Software Foundation, Inc.
003
004This file is part of GNU Classpath.
005
006GNU Classpath is free software; you can redistribute it and/or modify
007it under the terms of the GNU General Public License as published by
008the Free Software Foundation; either version 2, or (at your option)
009any later version.
010
011GNU Classpath is distributed in the hope that it will be useful, but
012WITHOUT ANY WARRANTY; without even the implied warranty of
013MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014General Public License for more details.
015
016You should have received a copy of the GNU General Public License
017along with GNU Classpath; see the file COPYING.  If not, write to the
018Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
01902110-1301 USA.
020
021Linking this library statically or dynamically with other modules is
022making a combined work based on this library.  Thus, the terms and
023conditions of the GNU General Public License cover the whole
024combination.
025
026As a special exception, the copyright holders of this library give you
027permission to link this library with independent modules to produce an
028executable, regardless of the license terms of these independent
029modules, and to copy and distribute the resulting executable under
030terms of your choice, provided that you also meet, for each linked
031independent module, the terms and conditions of the license of that
032module.  An independent module is a module which is not derived from
033or based on this library.  If you modify this library, you may extend
034this exception to your version of the library, but you are not
035obligated to do so.  If you do not wish to do so, delete this
036exception statement from your version. */
037
038
039package java.text;
040
041import gnu.classpath.NotImplementedException;
042
043import java.util.ArrayList;
044import java.util.HashMap;
045
046/* Written using "Java Class Libraries", 2nd edition, plus online
047 * API docs for JDK 1.2 from http://www.javasoft.com.
048 * Status: Believed complete and correct
049 */
050
051/**
052 * This class is a concrete subclass of <code>Collator</code> suitable
053 * for string collation in a wide variety of languages.  An instance of
054 * this class is normally returned by the <code>getInstance</code> method
055 * of <code>Collator</code> with rules predefined for the requested
056 * locale.  However, an instance of this class can be created manually
057 * with any desired rules.
058 * <p>
059 * Rules take the form of a <code>String</code> with the following syntax
060 * <ul>
061 * <li> Modifier: '@'</li>
062 * <li> Relation: '&lt;' | ';' | ',' | '=' : &lt;text&gt;</li>
063 * <li> Reset: '&amp;' : &lt;text&gt;</li>
064 * </ul>
065 * The modifier character indicates that accents sort backward as is the
066 * case with French.  The modifier applies to all rules <b>after</b>
067 * the modifier but before the next primary sequence. If placed at the end
068 * of the sequence if applies to all unknown accented character.
069 * The relational operators specify how the text
070 * argument relates to the previous term.  The relation characters have
071 * the following meanings:
072 * <ul>
073 * <li>'&lt;' - The text argument is greater than the prior term at the primary
074 * difference level.</li>
075 * <li>';' - The text argument is greater than the prior term at the secondary
076 * difference level.</li>
077 * <li>',' - The text argument is greater than the prior term at the tertiary
078 * difference level.</li>
079 * <li>'=' - The text argument is equal to the prior term</li>
080 * </ul>
081 * <p>
082 * As for the text argument itself, this is any sequence of Unicode
083 * characters not in the following ranges: 0x0009-0x000D, 0x0020-0x002F,
084 * 0x003A-0x0040, 0x005B-0x0060, and 0x007B-0x007E. If these characters are
085 * desired, they must be enclosed in single quotes.  If any whitespace is
086 * encountered, it is ignored.  (For example, "a b" is equal to "ab").
087 * <p>
088 * The reset operation inserts the following rule at the point where the
089 * text argument to it exists in the previously declared rule string.  This
090 * makes it easy to add new rules to an existing string by simply including
091 * them in a reset sequence at the end.  Note that the text argument, or
092 * at least the first character of it, must be present somewhere in the
093 * previously declared rules in order to be inserted properly.  If this
094 * is not satisfied, a <code>ParseException</code> will be thrown.
095 * <p>
096 * This system of configuring <code>RuleBasedCollator</code> is needlessly
097 * complex and the people at Taligent who developed it (along with the folks
098 * at Sun who accepted it into the Java standard library) deserve a slow
099 * and agonizing death.
100 * <p>
101 * Here are a couple of example of rule strings:
102 * <p>
103 * "&lt; a &lt; b &lt; c" - This string says that a is greater than b which is
104 * greater than c, with all differences being primary differences.
105 * <p>
106 * "&lt; a,A &lt; b,B &lt; c,C" - This string says that 'A' is greater than 'a' with
107 * a tertiary strength comparison.  Both 'b' and 'B' are greater than 'a' and
108 * 'A' during a primary strength comparison.  But 'B' is greater than 'b'
109 * under a tertiary strength comparison.
110 * <p>
111 * "&lt; a &lt; c &amp; a &lt; b " - This sequence is identical in function to the
112 * "&lt; a &lt; b &lt; c" rule string above.  The '&amp;' reset symbol indicates that
113 * the rule "&lt; b" is to be inserted after the text argument "a" in the
114 * previous rule string segment.
115 * <p>
116 * "&lt; a &lt; b &amp; y &lt; z" - This is an error.  The character 'y' does not appear
117 * anywhere in the previous rule string segment so the rule following the
118 * reset rule cannot be inserted.
119 * <p>
120 * "&lt; a &amp; A @ &lt; e &amp; E &lt; f&amp; F" - This sequence is equivalent to the following
121 * "&lt; a &amp; A &lt; E &amp; e &lt; f &amp; F".
122 * <p>
123 * For a description of the various comparison strength types, see the
124 * documentation for the <code>Collator</code> class.
125 * <p>
126 * As an additional complication to this already overly complex rule scheme,
127 * if any characters precede the first rule, these characters are considered
128 * ignorable.  They will be treated as if they did not exist during
129 * comparisons.  For example, "- &lt; a &lt; b ..." would make '-' an ignorable
130 * character such that the strings "high-tech" and "hightech" would
131 * be considered identical.
132 * <p>
133 * A <code>ParseException</code> will be thrown for any of the following
134 * conditions:
135 * <ul>
136 * <li>Unquoted punctuation characters in a text argument.</li>
137 * <li>A relational or reset operator not followed by a text argument</li>
138 * <li>A reset operator where the text argument is not present in
139 * the previous rule string section.</li>
140 * </ul>
141 *
142 * @author Aaron M. Renn (arenn@urbanophile.com)
143 * @author Tom Tromey (tromey@cygnus.com)
144 * @author Guilhem Lavaux (guilhem@kaffe.org)
145 */
146public class RuleBasedCollator extends Collator
147{
148  /**
149   * This class describes what rank has a character (or a sequence of characters)
150   * in the lexicographic order. Each element in a rule has a collation element.
151   */
152  static final class CollationElement
153  {
154    final String key;
155    final int primary;
156    final short secondary;
157    final short tertiary;
158    final short equality;
159    final boolean ignore;
160    final String expansion;
161
162    CollationElement(String key, int primary, short secondary, short tertiary,
163                     short equality, String expansion, boolean ignore)
164    {
165      this.key = key;
166      this.primary = primary;
167      this.secondary = secondary;
168      this.tertiary = tertiary;
169      this.equality = equality;
170      this.ignore = ignore;
171      this.expansion = expansion;
172    }
173
174    int getValue()
175    {
176      return (primary << 16) + (secondary << 8) + tertiary;
177    }
178  }
179
180  /**
181   * Basic collation instruction (internal format) to build the series of
182   * collation elements. It contains an instruction which specifies the new
183   * state of the generator. The sequence of instruction should not contain
184   * RESET (it is used by
185   * {@link #mergeRules(int,java.lang.String,java.util.ArrayList,java.util.ArrayList)})
186   * as a temporary state while merging two sets of instructions.
187   */
188  private static final class CollationSorter
189  {
190    static final int GREATERP = 0;
191    static final int GREATERS = 1;
192    static final int GREATERT = 2;
193    static final int EQUAL = 3;
194    static final int RESET = 4;
195    static final int INVERSE_SECONDARY = 5;
196
197    final int comparisonType;
198    final String textElement;
199    final int hashText;
200    final int offset;
201    final boolean ignore;
202
203    String expansionOrdering;
204
205    private CollationSorter(final int comparisonType, final String textElement,
206                            final int offset, final boolean ignore)
207    {
208      this.comparisonType = comparisonType;
209      this.textElement = textElement;
210      this.offset = offset;
211      this.ignore = ignore;
212      hashText = textElement.hashCode();
213    }
214  }
215
216  /**
217   * This is the original rule string.
218   */
219  private String rules;
220
221  /**
222   * This is the table of collation element values
223   */
224  private CollationElement[] ce_table;
225
226  /**
227   * Quick-prefix finder.
228   */
229  HashMap<String,CollationElement> prefix_tree;
230
231  /**
232   * This is the value of the last sequence entered into
233   * <code>ce_table</code>. It is used to compute the
234   * ordering value of unspecified character.
235   */
236  private int last_primary_value;
237
238  /**
239   * This is the value of the last secondary sequence of the
240   * primary 0, entered into
241   * <code>ce_table</code>. It is used to compute the
242   * ordering value of an unspecified accented character.
243   */
244  private int last_tertiary_value;
245
246  /**
247   * This variable is true if accents need to be sorted
248   * in the other direction.
249   */
250  private boolean inverseAccentComparison;
251
252  /**
253   * This collation element is special to unknown sequence.
254   * The JDK uses it to mark and sort the characters which has
255   * no collation rules.
256   */
257  static final CollationElement SPECIAL_UNKNOWN_SEQ =
258    new CollationElement("", (short) 32767, (short) 0, (short) 0,
259                         (short) 0, null, false);
260
261  /**
262   * This method initializes a new instance of <code>RuleBasedCollator</code>
263   * with the specified collation rules.  Note that an application normally
264   * obtains an instance of <code>RuleBasedCollator</code> by calling the
265   * <code>getInstance</code> method of <code>Collator</code>.  That method
266   * automatically loads the proper set of rules for the desired locale.
267   *
268   * @param rules The collation rule string.
269   *
270   * @exception ParseException If the rule string contains syntax errors.
271   */
272  public RuleBasedCollator(String rules) throws ParseException
273  {
274    if (rules.equals(""))
275      throw new ParseException("empty rule set", 0);
276
277    this.rules = rules;
278
279    buildCollationVector(parseString(rules));
280    buildPrefixAccess();
281  }
282
283  /**
284   * This method returns the number of common characters at the beginning
285   * of the string of the two parameters.
286   *
287   * @param prefix A string considered as a prefix to test against
288   * the other string.
289   * @param s A string to test the prefix against.
290   * @return The number of common characters.
291   */
292  static int findPrefixLength(String prefix, String s)
293  {
294    int index;
295    int len = prefix.length();
296
297    for (index = 0; index < len && index < s.length(); ++index)
298      {
299        if (prefix.charAt(index) != s.charAt(index))
300          return index;
301      }
302
303
304    return index;
305  }
306
307  /**
308   * Here we are merging two sets of sorting instructions: 'patch' into 'main'. This methods
309   * checks whether it is possible to find an anchor point for the rules to be merged and
310   * then insert them at that precise point.
311   *
312   * @param offset Offset in the string containing rules of the beginning of the rules
313   * being merged in.
314   * @param starter Text of the rules being merged.
315   * @param main Repository of all already parsed rules.
316   * @param patch Rules to be merged into the repository.
317   * @throws ParseException if it is impossible to find an anchor point for the new rules.
318   */
319  private void mergeRules(int offset, String starter, ArrayList<CollationSorter> main,
320                          ArrayList<CollationSorter> patch)
321    throws ParseException
322  {
323    int insertion_point = -1;
324    int max_length = 0;
325
326    /* We must check that no rules conflict with another already present. If it
327     * is the case delete the old rule.
328     */
329
330    /* For the moment good old O(N^2) algorithm.
331     */
332    for (int i = 0; i < patch.size(); i++)
333      {
334        int j = 0;
335
336        while (j < main.size())
337          {
338            CollationSorter rule1 = patch.get(i);
339            CollationSorter rule2 = main.get(j);
340
341            if (rule1.textElement.equals(rule2.textElement))
342              main.remove(j);
343            else
344              j++;
345          }
346      }
347
348    // Find the insertion point... O(N)
349    for (int i = 0; i < main.size(); i++)
350      {
351        CollationSorter sorter = main.get(i);
352        int length = findPrefixLength(starter, sorter.textElement);
353
354        if (length > max_length)
355          {
356            max_length = length;
357            insertion_point = i+1;
358          }
359      }
360
361    if (insertion_point < 0)
362      throw new ParseException("no insertion point found for " + starter, offset);
363
364    if (max_length < starter.length())
365      {
366        /*
367         * We need to expand the first entry. It must be sorted
368         * like if it was the reference key itself (like the spec
369         * said. So the first entry is special: the element is
370         * replaced by the specified text element for the sorting.
371         * This text replace the old one for comparisons. However
372         * to preserve the behaviour we replace the first key (corresponding
373         * to the found prefix) by a new code rightly ordered in the
374         * sequence. The rest of the subsequence must be appended
375         * to the end of the sequence.
376         */
377        CollationSorter sorter = patch.get(0);
378
379        sorter.expansionOrdering = starter.substring(max_length); // Skip the first good prefix element
380
381        main.add(insertion_point, sorter);
382
383        /*
384         * This is a new set of rules. Append to the list.
385         */
386        patch.remove(0);
387        insertion_point++;
388      }
389
390    // Now insert all elements of patch at the insertion point.
391    for (int i = 0; i < patch.size(); i++)
392      main.add(i+insertion_point, patch.get(i));
393  }
394
395  /**
396   * This method parses a string and build a set of sorting instructions. The parsing
397   * may only be partial on the case the rules are to be merged sometime later.
398   *
399   * @param stop_on_reset If this parameter is true then the parser stops when it
400   * encounters a reset instruction. In the other case, it tries to parse the subrules
401   * and merged it in the same repository.
402   * @param v Output vector for the set of instructions.
403   * @param base_offset Offset in the string to begin parsing.
404   * @param rules Rules to be parsed.
405   * @return -1 if the parser reached the end of the string, an integer representing the
406   * offset in the string at which it stopped parsing.
407   * @throws ParseException if something turned wrong during the parsing. To get details
408   * decode the message.
409   */
410  private int subParseString(boolean stop_on_reset, ArrayList<CollationSorter> v,
411                             int base_offset, String rules)
412    throws ParseException
413  {
414    boolean ignoreChars = (base_offset == 0);
415    int operator = -1;
416    StringBuilder sb = new StringBuilder();
417    boolean doubleQuote = false;
418    boolean eatingChars = false;
419    boolean nextIsModifier = false;
420    boolean isModifier = false;
421    int i;
422
423main_parse_loop:
424    for (i = 0; i < rules.length(); i++)
425      {
426        char c = rules.charAt(i);
427        int type = -1;
428
429        if (!eatingChars &&
430            ((c >= 0x09 && c <= 0x0D) || (c == 0x20)))
431              continue;
432
433        isModifier = nextIsModifier;
434        nextIsModifier = false;
435
436        if (eatingChars && c != '\'')
437          {
438            doubleQuote = false;
439            sb.append(c);
440            continue;
441          }
442        if (doubleQuote && eatingChars)
443          {
444            sb.append(c);
445            doubleQuote = false;
446            continue;
447          }
448
449        switch (c)
450          {
451          case '!':
452            throw new ParseException
453              ("Modifier '!' is not yet supported by Classpath", i + base_offset);
454          case '<':
455            type = CollationSorter.GREATERP;
456            break;
457          case ';':
458            type = CollationSorter.GREATERS;
459            break;
460          case ',':
461            type = CollationSorter.GREATERT;
462            break;
463          case '=':
464            type = CollationSorter.EQUAL;
465            break;
466          case '\'':
467            eatingChars = !eatingChars;
468            doubleQuote = true;
469            break;
470          case '@':
471            if (ignoreChars)
472              throw new ParseException
473                ("comparison list has not yet been started. You may only use"
474                 + "(<,;=&)", i + base_offset);
475            // Inverse the order of secondaries from now on.
476            nextIsModifier = true;
477            type = CollationSorter.INVERSE_SECONDARY;
478            break;
479          case '&':
480            type = CollationSorter.RESET;
481            if (stop_on_reset)
482              break main_parse_loop;
483            break;
484          default:
485            if (operator < 0)
486              throw new ParseException
487                ("operator missing at " + (i + base_offset), i + base_offset);
488            if (! eatingChars
489                && ((c >= 0x21 && c <= 0x2F)
490                    || (c >= 0x3A && c <= 0x40)
491                    || (c >= 0x5B && c <= 0x60)
492                    || (c >= 0x7B && c <= 0x7E)))
493              throw new ParseException
494                ("unquoted punctuation character '" + c + "'", i + base_offset);
495
496            //type = ignoreChars ? CollationSorter.IGNORE : -1;
497            sb.append(c);
498            break;
499          }
500
501        if (type  < 0)
502          continue;
503
504        if (operator < 0)
505          {
506            operator = type;
507            continue;
508          }
509
510        if (sb.length() == 0 && !isModifier)
511          throw new ParseException
512            ("text element empty at " + (i+base_offset), i+base_offset);
513
514        if (operator == CollationSorter.RESET)
515          {
516            /* Reposition in the sorting list at the position
517             * indicated by the text element.
518             */
519            String subrules = rules.substring(i);
520            ArrayList<CollationSorter> sorted_rules = new ArrayList<CollationSorter>();
521            int idx;
522
523            // Parse the subrules but do not iterate through all
524            // sublist. This is the privilege of the first call.
525            idx = subParseString(true, sorted_rules, base_offset+i, subrules);
526
527            // Merge new parsed rules into the list.
528            mergeRules(base_offset+i, sb.toString(), v, sorted_rules);
529            sb.setLength(0);
530
531            // Reset state to none.
532            operator = -1;
533            type = -1;
534            // We have found a new subrule at 'idx' but it has not been parsed.
535            if (idx >= 0)
536              {
537                i += idx-1;
538                continue main_parse_loop;
539              }
540            else
541                // No more rules.
542                break main_parse_loop;
543          }
544
545        String textElement = sb.toString();
546        if (operator == CollationSorter.GREATERP)
547          ignoreChars = false;
548        CollationSorter sorter = new CollationSorter(operator, textElement,
549                                                     base_offset + rules.length(),
550                                                     ignoreChars);
551        sb.setLength(0);
552
553        v.add(sorter);
554        operator = type;
555      }
556
557    if (operator >= 0)
558      {
559        int pos = rules.length() + base_offset;
560
561        if ((sb.length() != 0 && nextIsModifier)
562            || (sb.length() == 0 && !nextIsModifier && !eatingChars))
563          throw new ParseException("text element empty at " + pos, pos);
564
565        if (operator == CollationSorter.GREATERP)
566          ignoreChars = false;
567
568        CollationSorter sorter = new CollationSorter(operator, sb.toString(),
569                                                     base_offset+pos, ignoreChars);
570        v.add(sorter);
571      }
572
573    if (i == rules.length())
574      return -1;
575    else
576      return i;
577  }
578
579  /**
580   * This method creates a copy of this object.
581   *
582   * @return A copy of this object.
583   */
584  public Object clone()
585  {
586    return super.clone();
587  }
588
589  /**
590   * This method completely parses a string 'rules' containing sorting rules.
591   *
592   * @param rules String containing the rules to be parsed.
593   * @return A set of sorting instructions stored in a Vector.
594   * @throws ParseException if something turned wrong during the parsing. To get details
595   * decode the message.
596   */
597  private ArrayList<CollationSorter> parseString(String rules)
598    throws ParseException
599  {
600    ArrayList<CollationSorter> v = new ArrayList<CollationSorter>();
601
602    // result of the first subParseString is not absolute (may be -1 or a
603    // positive integer). But we do not care.
604    subParseString(false, v, 0, rules);
605
606    return v;
607  }
608
609  /**
610   * This method uses the sorting instructions built by {@link #parseString}
611   * to build collation elements which can be directly used to sort strings.
612   *
613   * @param parsedElements Parsed instructions stored in a ArrayList.
614   * @throws ParseException if the order of the instructions are not valid.
615   */
616  private void buildCollationVector(ArrayList<CollationSorter> parsedElements)
617    throws ParseException
618  {
619    int primary_seq = 0;
620    int last_tertiary_seq = 0;
621    short secondary_seq = 0;
622    short tertiary_seq = 0;
623    short equality_seq = 0;
624    boolean inverseComparisons = false;
625    final boolean DECREASING = false;
626    final boolean INCREASING = true;
627    boolean secondaryType = INCREASING;
628    ArrayList<CollationElement> v = new ArrayList<CollationElement>();
629
630    // elts is completely sorted.
631element_loop:
632    for (int i = 0; i < parsedElements.size(); i++)
633      {
634        CollationSorter elt = parsedElements.get(i);
635
636        switch (elt.comparisonType)
637          {
638          case CollationSorter.GREATERP:
639            primary_seq++;
640            if (inverseComparisons)
641              {
642                secondary_seq = Short.MAX_VALUE;
643                secondaryType = DECREASING;
644              }
645            else
646              {
647                secondary_seq = 0;
648                secondaryType = INCREASING;
649              }
650            tertiary_seq = 0;
651            equality_seq = 0;
652            inverseComparisons = false;
653            break;
654          case CollationSorter.GREATERS:
655            if (secondaryType == DECREASING)
656              secondary_seq--;
657            else
658              secondary_seq++;
659            tertiary_seq = 0;
660            equality_seq = 0;
661            break;
662          case CollationSorter.INVERSE_SECONDARY:
663            inverseComparisons = true;
664            continue element_loop;
665          case CollationSorter.GREATERT:
666            tertiary_seq++;
667            if (primary_seq == 0)
668              last_tertiary_seq = tertiary_seq;
669            equality_seq = 0;
670            break;
671          case CollationSorter.EQUAL:
672            equality_seq++;
673            break;
674          case CollationSorter.RESET:
675            throw new ParseException
676              ("Invalid reached state 'RESET'. Internal error", elt.offset);
677          default:
678            throw new ParseException
679              ("Invalid unknown state '" + elt.comparisonType + "'", elt.offset);
680          }
681
682        v.add(new CollationElement(elt.textElement, primary_seq,
683                                   secondary_seq, tertiary_seq,
684                                   equality_seq, elt.expansionOrdering, elt.ignore));
685      }
686
687    this.inverseAccentComparison = inverseComparisons;
688
689    ce_table = v.toArray(new CollationElement[v.size()]);
690
691    last_primary_value = primary_seq+1;
692    last_tertiary_value = last_tertiary_seq+1;
693  }
694
695  /**
696   * Build a tree where all keys are the texts of collation elements and data is
697   * the collation element itself. The tree is used when extracting all prefix
698   * for a given text.
699   */
700  private void buildPrefixAccess()
701  {
702    prefix_tree = new HashMap<String,CollationElement>();
703
704    for (int i = 0; i < ce_table.length; i++)
705      {
706        CollationElement e = ce_table[i];
707
708        prefix_tree.put(e.key, e);
709      }
710  }
711
712  /**
713   * This method returns an integer which indicates whether the first
714   * specified <code>String</code> is less than, greater than, or equal to
715   * the second.  The value depends not only on the collation rules in
716   * effect, but also the strength and decomposition settings of this object.
717   *
718   * @param source The first <code>String</code> to compare.
719   * @param target A second <code>String</code> to compare to the first.
720   *
721   * @return A negative integer if source &lt; target, a positive integer
722   * if source &gt; target, or 0 if source == target.
723   */
724  public int compare(String source, String target)
725  {
726    CollationElementIterator cs, ct;
727    CollationElement ord1block = null;
728    CollationElement ord2block = null;
729    boolean advance_block_1 = true;
730    boolean advance_block_2 = true;
731
732    cs = getCollationElementIterator(source);
733    ct = getCollationElementIterator(target);
734
735    for(;;)
736      {
737        int ord1;
738        int ord2;
739
740        /*
741         * We have to check whether the characters are ignorable.
742         * If it is the case then forget them.
743         */
744        if (advance_block_1)
745          {
746            ord1block = cs.nextBlock();
747            if (ord1block != null && ord1block.ignore)
748              continue;
749          }
750
751        if (advance_block_2)
752          {
753            ord2block = ct.nextBlock();
754            if (ord2block != null && ord2block.ignore)
755              {
756                advance_block_1 = false;
757                continue;
758              }
759         }
760        else
761          advance_block_2 = true;
762
763        if (!advance_block_1)
764          advance_block_1 = true;
765
766        if (ord1block != null)
767          ord1 = ord1block.getValue();
768        else
769          {
770            if (ord2block == null)
771              return 0;
772            return -1;
773          }
774
775        if (ord2block == null)
776          return 1;
777
778        ord2 = ord2block.getValue();
779
780        // We know chars are totally equal, so skip
781        if (ord1 == ord2)
782          {
783            if (getStrength() == IDENTICAL)
784              if (!ord1block.key.equals(ord2block.key))
785                return ord1block.key.compareTo(ord2block.key);
786            continue;
787          }
788
789        // Check for primary strength differences
790        int prim1 = CollationElementIterator.primaryOrder(ord1);
791        int prim2 = CollationElementIterator.primaryOrder(ord2);
792
793        if (prim1 == 0 && getStrength() < TERTIARY)
794          {
795            advance_block_2 = false;
796            continue;
797          }
798        else if (prim2 == 0 && getStrength() < TERTIARY)
799          {
800            advance_block_1 = false;
801            continue;
802          }
803
804        if (prim1 < prim2)
805          return -1;
806        else if (prim1 > prim2)
807          return 1;
808        else if (getStrength() == PRIMARY)
809          continue;
810
811        // Check for secondary strength differences
812        int sec1 = CollationElementIterator.secondaryOrder(ord1);
813        int sec2 = CollationElementIterator.secondaryOrder(ord2);
814
815        if (sec1 < sec2)
816          return -1;
817        else if (sec1 > sec2)
818          return 1;
819        else if (getStrength() == SECONDARY)
820          continue;
821
822        // Check for tertiary differences
823        int tert1 = CollationElementIterator.tertiaryOrder(ord1);
824        int tert2 = CollationElementIterator.tertiaryOrder(ord2);
825
826        if (tert1 < tert2)
827          return -1;
828        else if (tert1 > tert2)
829          return 1;
830        else if (getStrength() == TERTIARY)
831          continue;
832
833        // Apparently JDK does this (at least for my test case).
834        return ord1block.key.compareTo(ord2block.key);
835      }
836  }
837
838  /**
839   * This method tests this object for equality against the specified
840   * object.  This will be true if and only if the specified object is
841   * another reference to this object.
842   *
843   * @param obj The <code>Object</code> to compare against this object.
844   *
845   * @return <code>true</code> if the specified object is equal to this object,
846   * <code>false</code> otherwise.
847   */
848  public boolean equals(Object obj)
849  {
850    if (obj == this)
851      return true;
852    else
853      return false;
854  }
855
856  /**
857   * This method builds a default collation element without invoking
858   * the database created from the rules passed to the constructor.
859   *
860   * @param c Character which needs a collation element.
861   * @return A valid brand new CollationElement instance.
862   */
863  CollationElement getDefaultElement(char c)
864  {
865    int v;
866
867    // Preliminary support for generic accent sorting inversion (I don't know if all
868    // characters in the range should be sorted backward). This is the place
869    // to fix this if needed.
870    if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361))
871      v = 0x0361 - ((int) c - 0x02B9);
872    else
873      v = (short) c;
874    return new CollationElement("" + c, last_primary_value + v,
875                                (short) 0, (short) 0, (short) 0, null, false);
876  }
877
878  /**
879   * This method builds a default collation element for an accented character
880   * without invoking the database created from the rules passed to the constructor.
881   *
882   * @param c Character which needs a collation element.
883   * @return A valid brand new CollationElement instance.
884   */
885  CollationElement getDefaultAccentedElement(char c)
886  {
887    int v;
888
889    // Preliminary support for generic accent sorting inversion (I don't know if all
890    // characters in the range should be sorted backward). This is the place
891    // to fix this if needed.
892    if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361))
893      v = 0x0361 - ((int) c - 0x02B9);
894    else
895      v = (short) c;
896    return new CollationElement("" + c, (short) 0,
897                                (short) 0, (short) (last_tertiary_value + v), (short) 0, null, false);
898  }
899
900  /**
901   * This method returns an instance for <code>CollationElementIterator</code>
902   * for the specified <code>String</code> under the collation rules for this
903   * object.
904   *
905   * @param source The <code>String</code> to return the
906   * <code>CollationElementIterator</code> instance for.
907   *
908   * @return A <code>CollationElementIterator</code> for the specified
909   * <code>String</code>.
910   */
911  public CollationElementIterator getCollationElementIterator(String source)
912  {
913    return new CollationElementIterator(this, source);
914  }
915
916  /**
917   * This method returns an instance of <code>CollationElementIterator</code>
918   * for the <code>String</code> represented by the specified
919   * <code>CharacterIterator</code>.
920   *
921   * @param source The <code>CharacterIterator</code> with the desired <code>String</code>.
922   *
923   * @return A <code>CollationElementIterator</code> for the specified <code>String</code>.
924   */
925  public CollationElementIterator getCollationElementIterator(CharacterIterator source)
926  {
927    return new CollationElementIterator(this, source);
928  }
929
930  /**
931   * This method returns an instance of <code>CollationKey</code> for the
932   * specified <code>String</code>.  The object returned will have a
933   * more efficient mechanism for its comparison function that could
934   * provide speed benefits if multiple comparisons are performed, such
935   * as during a sort.
936   *
937   * @param source The <code>String</code> to create a <code>CollationKey</code> for.
938   *
939   * @return A <code>CollationKey</code> for the specified <code>String</code>.
940   */
941  public CollationKey getCollationKey(String source)
942  {
943    CollationElementIterator cei = getCollationElementIterator(source);
944    ArrayList<Integer> vect = new ArrayList<Integer>();
945
946    int ord = cei.next();
947    cei.reset(); //set to start of string
948
949    while (ord != CollationElementIterator.NULLORDER)
950      {
951        // If the primary order is null, it means this is an ignorable
952        // character.
953        if (CollationElementIterator.primaryOrder(ord) == 0)
954          {
955            ord = cei.next();
956            continue;
957          }
958        switch (getStrength())
959          {
960            case PRIMARY:
961              ord = CollationElementIterator.primaryOrder(ord);
962              break;
963
964            case SECONDARY:
965              ord = CollationElementIterator.primaryOrder(ord) << 8;
966              ord |= CollationElementIterator.secondaryOrder(ord);
967
968            default:
969               break;
970          }
971
972        vect.add(Integer.valueOf(ord));
973        ord = cei.next(); //increment to next key
974      }
975
976    Integer[] objarr = vect.toArray(new Integer[vect.size()]);
977    byte[] key = new byte[objarr.length * 4];
978
979    for (int i = 0; i < objarr.length; i++)
980      {
981        int j = objarr[i].intValue();
982        key [i * 4] = (byte) ((j & 0xFF000000) >> 24);
983        key [i * 4 + 1] = (byte) ((j & 0x00FF0000) >> 16);
984        key [i * 4 + 2] = (byte) ((j & 0x0000FF00) >> 8);
985        key [i * 4 + 3] = (byte) (j & 0x000000FF);
986      }
987
988    return new CollationKey(this, source, key);
989  }
990
991  /**
992   * This method returns a <code>String</code> containing the collation rules
993   * for this object.
994   *
995   * @return The collation rules for this object.
996   */
997  public String getRules()
998  {
999    return rules;
1000  }
1001
1002  /**
1003   * This method returns a hash value for this object.
1004   *
1005   * @return A hash value for this object.
1006   */
1007  public int hashCode()
1008  {
1009    return System.identityHashCode(this);
1010  }
1011}