/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package java.text;
import java.util.Comparator;
import java.util.Locale;
import libcore.icu.ICU;
import libcore.icu.RuleBasedCollatorICU;
/**
* Performs locale-sensitive string comparison. A concrete subclass,
* {@link RuleBasedCollator}, allows customization of the collation ordering by
* the use of rule sets.
*
PRIMARY strength: Typically, this is used to denote differences between
* base characters (for example, "a" < "b"). It is the strongest difference.
* For example, dictionaries are divided into different sections by base
* character.
*
SECONDARY strength: Accents in the characters are considered secondary
* differences (for example, "as" < "às" < "at"). Other differences
* between letters can also be considered secondary differences, depending on
* the language. A secondary difference is ignored when there is a primary
* difference anywhere in the strings.
*
TERTIARY strength: Upper and lower case differences in characters are
* distinguished at tertiary strength (for example, "ao" < "Ao" <
* "aò"). In addition, a variant of a letter differs from the base form
* on the tertiary strength (such as "A" and "Ⓐ"). Another example is the
* difference between large and small Kana. A tertiary difference is ignored
* when there is a primary or secondary difference anywhere in the strings.
*
IDENTICAL strength: When all other strengths are equal, the IDENTICAL
* strength is used as a tiebreaker. The Unicode code point values of the NFD
* form of each string are compared, just in case there is no difference. For
* example, Hebrew cantellation marks are only distinguished at this strength.
* This strength should be used sparingly, as only code point value differences
* between two strings are an extremely rare occurrence. Using this strength
* substantially decreases the performance for both comparison and collation key
* generation APIs. This strength also increases the size of the collation key.
*
*
* This {@code Collator} deals only with two decomposition modes, the canonical
* decomposition mode and one that does not use any decomposition. The
* compatibility decomposition mode
* {@code java.text.Collator.FULL_DECOMPOSITION} is not supported here. If the
* canonical decomposition mode is set, {@code Collator} handles un-normalized
* text properly, producing the same results as if the text were normalized in
* NFD. If canonical decomposition is turned off, it is the user's
* responsibility to ensure that all text is already in the appropriate form
* before performing a comparison or before getting a {@link CollationKey}.
*
* Examples:
*
*
*
* // Get the Collator for US English and set its strength to PRIMARY
* Collator usCollator = Collator.getInstance(Locale.US);
* usCollator.setStrength(Collator.PRIMARY);
* if (usCollator.compare("abc", "ABC") == 0) {
* System.out.println("Strings are equivalent");
* }
*
*
*
*
* The following example shows how to compare two strings using the collator for
* the default locale.
*
*
*
* // Compare two strings in the default locale
* Collator myCollator = Collator.getInstance();
* myCollator.setDecomposition(Collator.NO_DECOMPOSITION);
* if (myCollator.compare("\u00e0\u0325", "a\u0325\u0300") != 0) {
* System.out.println("\u00e0\u0325 is not equal to a\u0325\u0300 without decomposition");
* myCollator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
* if (myCollator.compare("\u00e0\u0325", "a\u0325\u0300") != 0) {
* System.out.println("Error: \u00e0\u0325 should be equal to a\u0325\u0300 with decomposition");
* } else {
* System.out.println("\u00e0\u0325 is equal to a\u0325\u0300 with decomposition");
* }
* } else {
* System.out.println("Error: \u00e0\u0325 should be not equal to a\u0325\u0300 without decomposition");
* }
*
*
*
*
* @see RuleBasedCollator
* @see CollationKey
*/
public abstract class Collator implements Comparator