001/*
002 * Copyright 2007-2018 Ping Identity Corporation
003 * All Rights Reserved.
004 */
005/*
006 * Copyright (C) 2008-2018 Ping Identity Corporation
007 *
008 * This program is free software; you can redistribute it and/or modify
009 * it under the terms of the GNU General Public License (GPLv2 only)
010 * or the terms of the GNU Lesser General Public License (LGPLv2.1 only)
011 * as published by the Free Software Foundation.
012 *
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Public License for more details.
017 *
018 * You should have received a copy of the GNU General Public License
019 * along with this program; if not, see <http://www.gnu.org/licenses>.
020 */
021package com.unboundid.ldap.matchingrules;
022
023
024
025import com.unboundid.asn1.ASN1OctetString;
026import com.unboundid.util.StaticUtils;
027import com.unboundid.util.ThreadSafety;
028import com.unboundid.util.ThreadSafetyLevel;
029
030
031
032/**
033 * This class provides an implementation of a matching rule that uses
034 * case-insensitive matching that also treats multiple consecutive (non-escaped)
035 * spaces as a single space.
036 */
037@ThreadSafety(level=ThreadSafetyLevel.COMPLETELY_THREADSAFE)
038public final class CaseIgnoreStringMatchingRule
039       extends AcceptAllSimpleMatchingRule
040{
041  /**
042   * The singleton instance that will be returned from the {@code getInstance}
043   * method.
044   */
045  private static final CaseIgnoreStringMatchingRule INSTANCE =
046       new CaseIgnoreStringMatchingRule();
047
048
049
050  /**
051   * The name for the caseIgnoreMatch equality matching rule.
052   */
053  public static final String EQUALITY_RULE_NAME = "caseIgnoreMatch";
054
055
056
057  /**
058   * The name for the caseIgnoreMatch equality matching rule, formatted in all
059   * lowercase characters.
060   */
061  static final String LOWER_EQUALITY_RULE_NAME =
062       StaticUtils.toLowerCase(EQUALITY_RULE_NAME);
063
064
065
066  /**
067   * The OID for the caseIgnoreMatch equality matching rule.
068   */
069  public static final String EQUALITY_RULE_OID = "2.5.13.2";
070
071
072
073  /**
074   * The name for the caseIgnoreOrderingMatch ordering matching rule.
075   */
076  public static final String ORDERING_RULE_NAME = "caseIgnoreOrderingMatch";
077
078
079
080  /**
081   * The name for the caseIgnoreOrderingMatch ordering matching rule, formatted
082   * in all lowercase characters.
083   */
084  static final String LOWER_ORDERING_RULE_NAME =
085       StaticUtils.toLowerCase(ORDERING_RULE_NAME);
086
087
088
089  /**
090   * The OID for the caseIgnoreOrderingMatch ordering matching rule.
091   */
092  public static final String ORDERING_RULE_OID = "2.5.13.3";
093
094
095
096  /**
097   * The name for the caseIgnoreSubstringsMatch substring matching rule.
098   */
099  public static final String SUBSTRING_RULE_NAME = "caseIgnoreSubstringsMatch";
100
101
102
103  /**
104   * The name for the caseIgnoreSubstringsMatch substring matching rule,
105   * formatted in all lowercase characters.
106   */
107  static final String LOWER_SUBSTRING_RULE_NAME =
108       StaticUtils.toLowerCase(SUBSTRING_RULE_NAME);
109
110
111
112  /**
113   * The OID for the caseIgnoreSubstringsMatch substring matching rule.
114   */
115  public static final String SUBSTRING_RULE_OID = "2.5.13.4";
116
117
118
119  /**
120   * The serial version UID for this serializable class.
121   */
122  private static final long serialVersionUID = -1293370922676445525L;
123
124
125
126  /**
127   * Creates a new instance of this case ignore string matching rule.
128   */
129  public CaseIgnoreStringMatchingRule()
130  {
131    // No implementation is required.
132  }
133
134
135
136  /**
137   * Retrieves a singleton instance of this matching rule.
138   *
139   * @return  A singleton instance of this matching rule.
140   */
141  public static CaseIgnoreStringMatchingRule getInstance()
142  {
143    return INSTANCE;
144  }
145
146
147
148  /**
149   * {@inheritDoc}
150   */
151  @Override()
152  public String getEqualityMatchingRuleName()
153  {
154    return EQUALITY_RULE_NAME;
155  }
156
157
158
159  /**
160   * {@inheritDoc}
161   */
162  @Override()
163  public String getEqualityMatchingRuleOID()
164  {
165    return EQUALITY_RULE_OID;
166  }
167
168
169
170  /**
171   * {@inheritDoc}
172   */
173  @Override()
174  public String getOrderingMatchingRuleName()
175  {
176    return ORDERING_RULE_NAME;
177  }
178
179
180
181  /**
182   * {@inheritDoc}
183   */
184  @Override()
185  public String getOrderingMatchingRuleOID()
186  {
187    return ORDERING_RULE_OID;
188  }
189
190
191
192  /**
193   * {@inheritDoc}
194   */
195  @Override()
196  public String getSubstringMatchingRuleName()
197  {
198    return SUBSTRING_RULE_NAME;
199  }
200
201
202
203  /**
204   * {@inheritDoc}
205   */
206  @Override()
207  public String getSubstringMatchingRuleOID()
208  {
209    return SUBSTRING_RULE_OID;
210  }
211
212
213
214  /**
215   * {@inheritDoc}
216   */
217  @Override()
218  public boolean valuesMatch(final ASN1OctetString value1,
219                             final ASN1OctetString value2)
220  {
221    // Try to use a quick, no-copy determination if possible.  If this fails,
222    // then we'll fall back on a more thorough, but more costly, approach.
223    final byte[] value1Bytes = value1.getValue();
224    final byte[] value2Bytes = value2.getValue();
225    if (value1Bytes.length == value2Bytes.length)
226    {
227      for (int i=0; i< value1Bytes.length; i++)
228      {
229        final byte b1 = value1Bytes[i];
230        final byte b2 = value2Bytes[i];
231
232        if (((b1 & 0x7F) != (b1 & 0xFF)) ||
233            ((b2 & 0x7F) != (b2 & 0xFF)))
234        {
235          return normalize(value1).equals(normalize(value2));
236        }
237        else if (b1 != b2)
238        {
239          if ((b1 == ' ') || (b2 == ' '))
240          {
241            return normalize(value1).equals(normalize(value2));
242          }
243          else if (Character.isUpperCase((char) b1))
244          {
245            final char c = Character.toLowerCase((char) b1);
246            if (c != ((char) b2))
247            {
248              return false;
249            }
250          }
251          else if (Character.isUpperCase((char) b2))
252          {
253            final char c = Character.toLowerCase((char) b2);
254            if (c != ((char) b1))
255            {
256              return false;
257            }
258          }
259          else
260          {
261            return false;
262          }
263        }
264      }
265
266      // If we've gotten to this point, then the values must be equal.
267      return true;
268    }
269    else
270    {
271      return normalizeInternal(value1, false, (byte) 0x00).equals(
272                  normalizeInternal(value2, false, (byte) 0x00));
273    }
274  }
275
276
277
278  /**
279   * {@inheritDoc}
280   */
281  @Override()
282  public ASN1OctetString normalize(final ASN1OctetString value)
283  {
284    return normalizeInternal(value, false, (byte) 0x00);
285  }
286
287
288
289  /**
290   * {@inheritDoc}
291   */
292  @Override()
293  public ASN1OctetString normalizeSubstring(final ASN1OctetString value,
294                                            final byte substringType)
295  {
296    return normalizeInternal(value, true, substringType);
297  }
298
299
300
301  /**
302   * Normalizes the provided value for use in either an equality or substring
303   * matching operation.
304   *
305   * @param  value          The value to be normalized.
306   * @param  isSubstring    Indicates whether the value should be normalized as
307   *                        part of a substring assertion rather than an
308   *                        equality assertion.
309   * @param  substringType  The substring type for the element, if it is to be
310   *                        part of a substring assertion.
311   *
312   * @return  The appropriately normalized form of the provided value.
313   */
314  private static ASN1OctetString normalizeInternal(final ASN1OctetString value,
315                                                   final boolean isSubstring,
316                                                   final byte substringType)
317  {
318    final byte[] valueBytes = value.getValue();
319    if (valueBytes.length == 0)
320    {
321      return value;
322    }
323
324    final boolean trimInitial;
325    final boolean trimFinal;
326    if (isSubstring)
327    {
328      switch (substringType)
329      {
330        case SUBSTRING_TYPE_SUBINITIAL:
331          trimInitial = true;
332          trimFinal   = false;
333          break;
334
335        case SUBSTRING_TYPE_SUBFINAL:
336          trimInitial = false;
337          trimFinal   = true;
338          break;
339
340        default:
341          trimInitial = false;
342          trimFinal   = false;
343          break;
344      }
345    }
346    else
347    {
348      trimInitial = true;
349      trimFinal   = true;
350    }
351
352    // Count the number of duplicate spaces in the value, and determine whether
353    // there are any non-space characters.  Also, see if there are any non-ASCII
354    // characters.
355    boolean containsNonSpace = false;
356    boolean lastWasSpace = trimInitial;
357    int numDuplicates = 0;
358    for (final byte b : valueBytes)
359    {
360      if ((b & 0x7F) != (b & 0xFF))
361      {
362        return normalizeNonASCII(value, trimInitial, trimFinal);
363      }
364
365      if (b == ' ')
366      {
367        if (lastWasSpace)
368        {
369          numDuplicates++;
370        }
371        else
372        {
373          lastWasSpace = true;
374        }
375      }
376      else
377      {
378        containsNonSpace = true;
379        lastWasSpace = false;
380      }
381    }
382
383    if (! containsNonSpace)
384    {
385      return new ASN1OctetString(" ");
386    }
387
388    if (lastWasSpace && trimFinal)
389    {
390      numDuplicates++;
391    }
392
393
394    // Create a new byte array to hold the normalized value.
395    lastWasSpace = trimInitial;
396    int targetPos = 0;
397    final byte[] normalizedBytes = new byte[valueBytes.length - numDuplicates];
398    for (int i=0; i < valueBytes.length; i++)
399    {
400      switch (valueBytes[i])
401      {
402        case ' ':
403          if (lastWasSpace || (trimFinal && (i == (valueBytes.length - 1))))
404          {
405            // No action is required.
406          }
407          else
408          {
409            // This condition is needed to handle the special case in which
410            // there are multiple spaces at the end of the value.
411            if (targetPos < normalizedBytes.length)
412            {
413              normalizedBytes[targetPos++] = ' ';
414              lastWasSpace = true;
415            }
416          }
417
418          break;
419        case 'A':
420          normalizedBytes[targetPos++] = 'a';
421          lastWasSpace = false;
422          break;
423        case 'B':
424          normalizedBytes[targetPos++] = 'b';
425          lastWasSpace = false;
426          break;
427        case 'C':
428          normalizedBytes[targetPos++] = 'c';
429          lastWasSpace = false;
430          break;
431        case 'D':
432          normalizedBytes[targetPos++] = 'd';
433          lastWasSpace = false;
434          break;
435        case 'E':
436          normalizedBytes[targetPos++] = 'e';
437          lastWasSpace = false;
438          break;
439        case 'F':
440          normalizedBytes[targetPos++] = 'f';
441          lastWasSpace = false;
442          break;
443        case 'G':
444          normalizedBytes[targetPos++] = 'g';
445          lastWasSpace = false;
446          break;
447        case 'H':
448          normalizedBytes[targetPos++] = 'h';
449          lastWasSpace = false;
450          break;
451        case 'I':
452          normalizedBytes[targetPos++] = 'i';
453          lastWasSpace = false;
454          break;
455        case 'J':
456          normalizedBytes[targetPos++] = 'j';
457          lastWasSpace = false;
458          break;
459        case 'K':
460          normalizedBytes[targetPos++] = 'k';
461          lastWasSpace = false;
462          break;
463        case 'L':
464          normalizedBytes[targetPos++] = 'l';
465          lastWasSpace = false;
466          break;
467        case 'M':
468          normalizedBytes[targetPos++] = 'm';
469          lastWasSpace = false;
470          break;
471        case 'N':
472          normalizedBytes[targetPos++] = 'n';
473          lastWasSpace = false;
474          break;
475        case 'O':
476          normalizedBytes[targetPos++] = 'o';
477          lastWasSpace = false;
478          break;
479        case 'P':
480          normalizedBytes[targetPos++] = 'p';
481          lastWasSpace = false;
482          break;
483        case 'Q':
484          normalizedBytes[targetPos++] = 'q';
485          lastWasSpace = false;
486          break;
487        case 'R':
488          normalizedBytes[targetPos++] = 'r';
489          lastWasSpace = false;
490          break;
491        case 'S':
492          normalizedBytes[targetPos++] = 's';
493          lastWasSpace = false;
494          break;
495        case 'T':
496          normalizedBytes[targetPos++] = 't';
497          lastWasSpace = false;
498          break;
499        case 'U':
500          normalizedBytes[targetPos++] = 'u';
501          lastWasSpace = false;
502          break;
503        case 'V':
504          normalizedBytes[targetPos++] = 'v';
505          lastWasSpace = false;
506          break;
507        case 'W':
508          normalizedBytes[targetPos++] = 'w';
509          lastWasSpace = false;
510          break;
511        case 'X':
512          normalizedBytes[targetPos++] = 'x';
513          lastWasSpace = false;
514          break;
515        case 'Y':
516          normalizedBytes[targetPos++] = 'y';
517          lastWasSpace = false;
518          break;
519        case 'Z':
520          normalizedBytes[targetPos++] = 'z';
521          lastWasSpace = false;
522          break;
523        default:
524          normalizedBytes[targetPos++] = valueBytes[i];
525          lastWasSpace = false;
526          break;
527      }
528    }
529
530
531    return new ASN1OctetString(normalizedBytes);
532  }
533
534
535
536  /**
537   * Normalizes the provided value a string representation, properly handling
538   * any non-ASCII characters.
539   *
540   * @param  value        The value to be normalized.
541   * @param  trimInitial  Indicates whether to trim off all leading spaces at
542   *                      the beginning of the value.
543   * @param  trimFinal    Indicates whether to trim off all trailing spaces at
544   *                      the end of the value.
545   *
546   * @return  The normalized form of the value.
547   */
548  private static ASN1OctetString normalizeNonASCII(final ASN1OctetString value,
549                                                   final boolean trimInitial,
550                                                   final boolean trimFinal)
551  {
552    final StringBuilder buffer = new StringBuilder(value.stringValue());
553
554    int pos = 0;
555    boolean lastWasSpace = trimInitial;
556    while (pos < buffer.length())
557    {
558      final char c = buffer.charAt(pos++);
559      if (c == ' ')
560      {
561        if (lastWasSpace || (trimFinal && (pos >= buffer.length())))
562        {
563          buffer.deleteCharAt(--pos);
564        }
565        else
566        {
567          lastWasSpace = true;
568        }
569      }
570      else
571      {
572        if (Character.isHighSurrogate(c))
573        {
574          if (pos < buffer.length())
575          {
576            final char c2 = buffer.charAt(pos++);
577            if (Character.isLowSurrogate(c2))
578            {
579              final int codePoint = Character.toCodePoint(c, c2);
580              if (Character.isUpperCase(codePoint))
581              {
582                final int lowerCaseCodePoint = Character.toLowerCase(codePoint);
583                buffer.setCharAt((pos-2),
584                     Character.highSurrogate(lowerCaseCodePoint));
585                buffer.setCharAt((pos-1),
586                     Character.lowSurrogate(lowerCaseCodePoint));
587              }
588            }
589          }
590        }
591        else if (Character.isUpperCase(c))
592        {
593          buffer.setCharAt((pos-1), Character.toLowerCase(c));
594        }
595
596        lastWasSpace = false;
597      }
598    }
599
600    // It is possible that there could be an extra space at the end.  If that's
601    // the case, then remove it.
602    if (trimFinal && (buffer.length() > 0) &&
603        (buffer.charAt(buffer.length() - 1) == ' '))
604    {
605      buffer.deleteCharAt(buffer.length() - 1);
606    }
607
608    return new ASN1OctetString(buffer.toString());
609  }
610}