/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package java.text; import java.util.Locale; import libcore.icu.ICU; import libcore.icu.NativeBreakIterator; /** * Locates boundaries in text. This class defines a protocol for objects that * break up a piece of natural-language text according to a set of criteria. * Instances or subclasses of {@code BreakIterator} can be provided, for * example, to break a piece of text into words, sentences, or logical * characters according to the conventions of some language or group of * languages. We provide four built-in types of {@code BreakIterator}: *
* {@code BreakIterator} accesses the text it analyzes through a * {@link CharacterIterator}, which makes it possible to use {@code * BreakIterator} to analyze text in any text-storage vehicle that provides a * {@code CharacterIterator} interface. *
* Note: Some types of {@code BreakIterator} can take a long time to * create, and instances of {@code BreakIterator} are not currently cached by * the system. For optimal performance, keep instances of {@code BreakIterator} * around as long as it makes sense. For example, when word-wrapping a document, * don't create and destroy a new {@code BreakIterator} for each line. Create * one break iterator for the whole document (or whatever stretch of text you're * wrapping) and use it to do the whole job of wrapping the text. *
* Examples: *
* Creating and using text boundaries: *
* *** public static void main(String args[]) { * if (args.length == 1) { * String stringToExamine = args[0]; * //print each word in order * BreakIterator boundary = BreakIterator.getWordInstance(); * boundary.setText(stringToExamine); * printEachForward(boundary, stringToExamine); * //print each sentence in reverse order * boundary = BreakIterator.getSentenceInstance(Locale.US); * boundary.setText(stringToExamine); * printEachBackward(boundary, stringToExamine); * printFirst(boundary, stringToExamine); * printLast(boundary, stringToExamine); * } * } ** *
* Print each element in order: *
* *** public static void printEachForward(BreakIterator boundary, String source) { * int start = boundary.first(); * for (int end = boundary.next(); end != BreakIterator.DONE; start = end, end = boundary.next()) { * System.out.println(source.substring(start, end)); * } * } ** *
* Print each element in reverse order: *
* *** public static void printEachBackward(BreakIterator boundary, String source) { * int end = boundary.last(); * for (int start = boundary.previous(); start != BreakIterator.DONE; end = start, start = boundary * .previous()) { * System.out.println(source.substring(start, end)); * } * } ** *
* Print the first element: *
* *** public static void printFirst(BreakIterator boundary, String source) { * int start = boundary.first(); * int end = boundary.next(); * System.out.println(source.substring(start, end)); * } ** *
* Print the last element: *
* *** public static void printLast(BreakIterator boundary, String source) { * int end = boundary.last(); * int start = boundary.previous(); * System.out.println(source.substring(start, end)); * } ** *
* Print the element at a specified position: *
* *** public static void printAt(BreakIterator boundary, int pos, String source) { * int end = boundary.following(pos); * int start = boundary.previous(); * System.out.println(source.substring(start, end)); * } ** *
* Find the next word: *
* *** public static int nextWordStartAfter(int pos, String text) { * BreakIterator wb = BreakIterator.getWordInstance(); * wb.setText(text); * int last = wb.following(pos); * int current = wb.next(); * while (current != BreakIterator.DONE) { * for (int p = last; p < current; p++) { * if (Character.isLetter(text.charAt(p))) * return last; * } * last = current; * current = wb.next(); * } * return BreakIterator.DONE; * } ** *
* The iterator returned by {@code BreakIterator.getWordInstance()} is unique in * that the break positions it returns don't represent both the start and end of * the thing being iterated over. That is, a sentence-break iterator returns * breaks that each represent the end of one sentence and the beginning of the * next. With the word-break iterator, the characters between two boundaries * might be a word, or they might be the punctuation or whitespace between two * words. The above code uses a simple heuristic to determine which boundary is * the beginning of a word: If the characters between this boundary and the next * boundary include at least one letter (this can be an alphabetical letter, a * CJK ideograph, a Hangul syllable, a Kana character, etc.), then the text * between this boundary and the next is a word; otherwise, it's the material * between words.) * * @see CharacterIterator */ public abstract class BreakIterator implements Cloneable { /** * This constant is returned by iterate methods like {@code previous()} or * {@code next()} if they have returned all valid boundaries. */ public static final int DONE = -1; // the wrapped ICU implementation NativeBreakIterator wrapped; /** * Default constructor, for use by subclasses. */ protected BreakIterator() { } /* * wrapping constructor */ BreakIterator(NativeBreakIterator iterator) { wrapped = iterator; } /** * Returns an array of locales for which custom {@code BreakIterator} instances * are available. *
Note that Android does not support user-supplied locale service providers. */ public static Locale[] getAvailableLocales() { return ICU.getAvailableBreakIteratorLocales(); } /** * Returns a new instance of {@code BreakIterator} to iterate over * characters using the user's default locale. * See "Be wary of the default locale". * @return a new instance of {@code BreakIterator} using the default locale. */ public static BreakIterator getCharacterInstance() { return getCharacterInstance(Locale.getDefault()); } /** * Returns a new instance of {@code BreakIterator} to iterate over * characters using the given locale. * * @param where * the given locale. * @return a new instance of {@code BreakIterator} using the given locale. */ public static BreakIterator getCharacterInstance(Locale where) { return new RuleBasedBreakIterator(NativeBreakIterator.getCharacterInstance(where)); } /** * Returns a new instance of {{@code BreakIterator} to iterate over * line breaks using the user's default locale. * See "Be wary of the default locale". * @return a new instance of {@code BreakIterator} using the default locale. */ public static BreakIterator getLineInstance() { return getLineInstance(Locale.getDefault()); } /** * Returns a new instance of {@code BreakIterator} to iterate over * line breaks using the given locale. * * @param where * the given locale. * @return a new instance of {@code BreakIterator} using the given locale. * @throws NullPointerException if {@code where} is {@code null}. */ public static BreakIterator getLineInstance(Locale where) { return new RuleBasedBreakIterator(NativeBreakIterator.getLineInstance(where)); } /** * Returns a new instance of {@code BreakIterator} to iterate over * sentence-breaks using the default locale. * See "Be wary of the default locale". * @return a new instance of {@code BreakIterator} using the default locale. */ public static BreakIterator getSentenceInstance() { return getSentenceInstance(Locale.getDefault()); } /** * Returns a new instance of {@code BreakIterator} to iterate over * sentence-breaks using the given locale. * * @param where * the given locale. * @return a new instance of {@code BreakIterator} using the given locale. * @throws NullPointerException if {@code where} is {@code null}. */ public static BreakIterator getSentenceInstance(Locale where) { return new RuleBasedBreakIterator(NativeBreakIterator.getSentenceInstance(where)); } /** * Returns a new instance of {@code BreakIterator} to iterate over * word-breaks using the default locale. * See "Be wary of the default locale". * @return a new instance of {@code BreakIterator} using the default locale. */ public static BreakIterator getWordInstance() { return getWordInstance(Locale.getDefault()); } /** * Returns a new instance of {@code BreakIterator} to iterate over * word-breaks using the given locale. * * @param where * the given locale. * @return a new instance of {@code BreakIterator} using the given locale. * @throws NullPointerException if {@code where} is {@code null}. */ public static BreakIterator getWordInstance(Locale where) { return new RuleBasedBreakIterator(NativeBreakIterator.getWordInstance(where)); } /** * Indicates whether the given offset is a boundary position. If this method * returns true, the current iteration position is set to the given * position; if the function returns false, the current iteration position * is set as though {@link #following(int)} had been called. * * @param offset * the given offset to check. * @return {@code true} if the given offset is a boundary position; {@code * false} otherwise. */ public boolean isBoundary(int offset) { return wrapped.isBoundary(offset); } /** * Returns the position of last boundary preceding the given offset, and * sets the current position to the returned value, or {@code DONE} if the * given offset specifies the starting position. * * @param offset * the given start position to be searched for. * @return the position of the last boundary preceding the given offset. * @throws IllegalArgumentException * if the offset is invalid. */ public int preceding(int offset) { return wrapped.preceding(offset); } /** * Sets the new text string to be analyzed, the current position will be * reset to the beginning of this new string, and the old string will be * lost. * * @param newText * the new text string to be analyzed. */ public void setText(String newText) { wrapped.setText(newText); } /** * Returns this iterator's current position. * * @return this iterator's current position. */ public abstract int current(); /** * Sets this iterator's current position to the first boundary and returns * that position. * * @return the position of the first boundary. */ public abstract int first(); /** * Sets the position of the first boundary to the one following the given * offset and returns this position. Returns {@code DONE} if there is no * boundary after the given offset. * * @param offset * the given position to be searched for. * @return the position of the first boundary following the given offset. * @throws IllegalArgumentException * if the offset is invalid. */ public abstract int following(int offset); /** * Returns a {@code CharacterIterator} which represents the text being * analyzed. Please note that the returned value is probably the internal * iterator used by this object. If the invoker wants to modify the status * of the returned iterator, it is recommended to first create a clone of * the iterator returned. * * @return a {@code CharacterIterator} which represents the text being * analyzed. */ public abstract CharacterIterator getText(); /** * Sets this iterator's current position to the last boundary and returns * that position. * * @return the position of last boundary. */ public abstract int last(); /** * Sets this iterator's current position to the next boundary after the * current position, and returns this position. Returns {@code DONE} if no * boundary was found after the current position. * * @return the position of last boundary. */ public abstract int next(); /** * Sets this iterator's current position to the next boundary after the * given position, and returns that position. Returns {@code DONE} if no * boundary was found after the given position. * * @param n * the given position. * @return the position of last boundary. */ public abstract int next(int n); /** * Sets this iterator's current position to the previous boundary before the * current position and returns that position. Returns {@code DONE} if * no boundary was found before the current position. * * @return the position of last boundary. */ public abstract int previous(); /** * Sets the new text to be analyzed by the given {@code CharacterIterator}. * The position will be reset to the beginning of the new text, and other * status information of this iterator will be kept. * * @param newText * the {@code CharacterIterator} referring to the text to be * analyzed. */ public abstract void setText(CharacterIterator newText); /** * Returns a copy of this iterator. */ @Override public Object clone() { try { BreakIterator cloned = (BreakIterator) super.clone(); cloned.wrapped = (NativeBreakIterator) wrapped.clone(); return cloned; } catch (CloneNotSupportedException e) { throw new AssertionError(e); } } }