/* * Copyright (C) 2014 The Android Open Source Project * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this * particular file as subject to the "Classpath" exception as provided * by Oracle in the LICENSE file that accompanied this code. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ package java.net; import android.icu.text.IDNA; /** * Provides methods to convert internationalized domain names (IDNs) between * a normal Unicode representation and an ASCII Compatible Encoding (ACE) representation. * Internationalized domain names can use characters from the entire range of * Unicode, while traditional domain names are restricted to ASCII characters. * ACE is an encoding of Unicode strings that uses only ASCII characters and * can be used with software (such as the Domain Name System) that only * understands traditional domain names. * *

Internationalized domain names are defined in RFC 3490. * RFC 3490 defines two operations: ToASCII and ToUnicode. These 2 operations employ * Nameprep algorithm, which is a * profile of Stringprep, and * Punycode algorithm to convert * domain name string back and forth. * *

The behavior of aforementioned conversion process can be adjusted by various flags: *

If the ALLOW_UNASSIGNED flag is used, the domain name string to be converted * can contain code points that are unassigned in Unicode 3.2, which is the * Unicode version on which IDN conversion is based. If the flag is not used, * the presence of such unassigned code points is treated as an error. *
If the USE_STD3_ASCII_RULES flag is used, ASCII strings are checked against RFC 1122 and RFC 1123. * It is an error if they don't meet the requirements. *

* These flags can be logically OR'ed together. * *

The security consideration is important with respect to internationalization * domain name support. For example, English domain names may be homographed * - maliciously misspelled by substitution of non-Latin letters. * Unicode Technical Report #36 * discusses security issues of IDN support as well as possible solutions. * Applications are responsible for taking adequate security measures when using * international domain names. * * @author Edward Wang * @since 1.6 * */ public final class IDN { /** * Flag to allow processing of unassigned code points */ public static final int ALLOW_UNASSIGNED = 0x01; /** * Flag to turn on the check against STD-3 ASCII rules */ public static final int USE_STD3_ASCII_RULES = 0x02; private IDN() { } /** * Translates a string from Unicode to ASCII Compatible Encoding (ACE), * as defined by the ToASCII operation of RFC 3490. * *

ToASCII operation can fail. ToASCII fails if any step of it fails. * If ToASCII operation fails, an IllegalArgumentException will be thrown. * In this case, the input string should not be used in an internationalized domain name. * *

A label is an individual part of a domain name. The original ToASCII operation, * as defined in RFC 3490, only operates on a single label. This method can handle * both label and entire domain name, by assuming that labels in a domain name are * always separated by dots. The following characters are recognized as dots: * \u002E (full stop), \u3002 (ideographic full stop), \uFF0E (fullwidth full stop), * and \uFF61 (halfwidth ideographic full stop). if dots are * used as label separators, this method also changes all of them to \u002E (full stop) * in output translated string. * * @param input the string to be processed * @param flag process flag; can be 0 or any logical OR of possible flags * * @return the translated {@code String} * * @throws IllegalArgumentException if the input string doesn't conform to RFC 3490 specification */ public static String toASCII(String input, int flag) { try { return IDNA.convertIDNToASCII(input, flag).toString(); } catch (android.icu.text.StringPrepParseException e) { throw new IllegalArgumentException("Invalid input to toASCII: " + input, e); } } /** * Translates a string from Unicode to ASCII Compatible Encoding (ACE), * as defined by the ToASCII operation of RFC 3490. * *

This convenience method works as if by invoking the * two-argument counterpart as follows: *

* {@link #toASCII(String, int) toASCII}(input, 0); *

* * @param input the string to be processed * * @return the translated {@code String} * * @throws IllegalArgumentException if the input string doesn't conform to RFC 3490 specification */ public static String toASCII(String input) { return toASCII(input, 0); } /** * Translates a string from ASCII Compatible Encoding (ACE) to Unicode, * as defined by the ToUnicode operation of RFC 3490. * *

ToUnicode never fails. In case of any error, the input string is returned unmodified. * *

A label is an individual part of a domain name. The original ToUnicode operation, * as defined in RFC 3490, only operates on a single label. This method can handle * both label and entire domain name, by assuming that labels in a domain name are * always separated by dots. The following characters are recognized as dots: * \u002E (full stop), \u3002 (ideographic full stop), \uFF0E (fullwidth full stop), * and \uFF61 (halfwidth ideographic full stop). * * @param input the string to be processed * @param flag process flag; can be 0 or any logical OR of possible flags * * @return the translated {@code String} */ public static String toUnicode(String input, int flag) { try { // ICU only translates separators to ASCII for toASCII. // Java expects the translation for toUnicode too. return convertFullStop(IDNA.convertIDNToUnicode(input, flag)).toString(); } catch (android.icu.text.StringPrepParseException e) { // The RI documentation explicitly states that if the conversion was unsuccessful // the original string is returned. return input; } } private static boolean isLabelSeperator(char c) { return (c == '\u3002' || c == '\uff0e' || c == '\uff61'); } private static StringBuffer convertFullStop(StringBuffer input) { for (int i = 0; i < input.length(); i++) { if (isLabelSeperator(input.charAt(i))) { input.setCharAt(i, '.'); } } return input; } /** * Translates a string from ASCII Compatible Encoding (ACE) to Unicode, * as defined by the ToUnicode operation of RFC 3490. * *

This convenience method works as if by invoking the * two-argument counterpart as follows: *

* {@link #toUnicode(String, int) toUnicode}(input, 0); *

* * @param input the string to be processed * * @return the translated {@code String} */ public static String toUnicode(String input) { return toUnicode(input, 0); } }