/*
* Copyright (C) 2007 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package android.net;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import java.util.StringTokenizer;
/**
*
* Sanitizes the Query portion of a URL. Simple example:
*
* UrlQuerySanitizer sanitizer = new UrlQuerySanitizer();
* sanitizer.setAllowUnregisteredParamaters(true);
* sanitizer.parseUrl("http://example.com/?name=Joe+User");
* String name = sanitizer.getValue("name"));
* // name now contains "Joe_User"
*
*
* Register ValueSanitizers to customize the way individual
* parameters are sanitized:
*
* UrlQuerySanitizer sanitizer = new UrlQuerySanitizer();
* sanitizer.registerParamater("name", UrlQuerySanitizer.createSpaceLegal());
* sanitizer.parseUrl("http://example.com/?name=Joe+User");
* String name = sanitizer.getValue("name"));
* // name now contains "Joe User". (The string is first decoded, which
* // converts the '+' to a ' '. Then the string is sanitized, which
* // converts the ' ' to an '_'. (The ' ' is converted because the default
* unregistered parameter sanitizer does not allow any special characters,
* and ' ' is a special character.)
*
*
* There are several ways to create ValueSanitizers. In order of increasing
* sophistication:
*
* Note: can't use {@link String#trim} because {@link String#trim} has a * different definition of whitespace than we want. * @param value the string to trim * @return the trimmed string */ private String trimWhitespace(String value) { int start = 0; int last = value.length() - 1; int end = last; while (start <= end && isWhitespace(value.charAt(start))) { start++; } while (end >= start && isWhitespace(value.charAt(end))) { end--; } if (start == 0 && end == last) { return value; } return value.substring(start, end + 1); } /** * Check if c is whitespace. * @param c character to test * @return true if c is a whitespace character */ private boolean isWhitespace(char c) { switch(c) { case ' ': case '\t': case '\f': case '\n': case '\r': case 11: /* VT */ return true; default: return false; } } /** * Check whether an individual character is legal. Uses the * flag bit-set passed into the constructor. * @param c * @return true if c is a legal character */ private boolean characterIsLegal(char c) { switch(c) { case ' ' : return (mFlags & SPACE_OK) != 0; case '\t': case '\f': case '\n': case '\r': case 11: /* VT */ return (mFlags & OTHER_WHITESPACE_OK) != 0; case '\"': return (mFlags & DQUOTE_OK) != 0; case '\'': return (mFlags & SQUOTE_OK) != 0; case '<' : return (mFlags & LT_OK) != 0; case '>' : return (mFlags & GT_OK) != 0; case '&' : return (mFlags & AMP_OK) != 0; case '%' : return (mFlags & PCT_OK) != 0; case '\0': return (mFlags & NUL_OK) != 0; default : return (c >= 32 && c < 127) || ((c >= 128) && ((mFlags & NON_7_BIT_ASCII_OK) != 0)); } } } /** * Get the current value sanitizer used when processing * unregistered parameter values. *
* Note: The default unregistered parameter value sanitizer is * one that doesn't allow any special characters, similar to what * is returned by calling createAllIllegal. * * @return the current ValueSanitizer used to sanitize unregistered * parameter values. */ public ValueSanitizer getUnregisteredParameterValueSanitizer() { return mUnregisteredParameterValueSanitizer; } /** * Set the value sanitizer used when processing unregistered * parameter values. * @param sanitizer set the ValueSanitizer used to sanitize unregistered * parameter values. */ public void setUnregisteredParameterValueSanitizer( ValueSanitizer sanitizer) { mUnregisteredParameterValueSanitizer = sanitizer; } // Private fields for singleton sanitizers: private static final ValueSanitizer sAllIllegal = new IllegalCharacterValueSanitizer( IllegalCharacterValueSanitizer.ALL_ILLEGAL); private static final ValueSanitizer sAllButNulLegal = new IllegalCharacterValueSanitizer( IllegalCharacterValueSanitizer.ALL_BUT_NUL_LEGAL); private static final ValueSanitizer sAllButWhitespaceLegal = new IllegalCharacterValueSanitizer( IllegalCharacterValueSanitizer.ALL_BUT_WHITESPACE_LEGAL); private static final ValueSanitizer sURLLegal = new IllegalCharacterValueSanitizer( IllegalCharacterValueSanitizer.URL_LEGAL); private static final ValueSanitizer sUrlAndSpaceLegal = new IllegalCharacterValueSanitizer( IllegalCharacterValueSanitizer.URL_AND_SPACE_LEGAL); private static final ValueSanitizer sAmpLegal = new IllegalCharacterValueSanitizer( IllegalCharacterValueSanitizer.AMP_LEGAL); private static final ValueSanitizer sAmpAndSpaceLegal = new IllegalCharacterValueSanitizer( IllegalCharacterValueSanitizer.AMP_AND_SPACE_LEGAL); private static final ValueSanitizer sSpaceLegal = new IllegalCharacterValueSanitizer( IllegalCharacterValueSanitizer.SPACE_LEGAL); private static final ValueSanitizer sAllButNulAndAngleBracketsLegal = new IllegalCharacterValueSanitizer( IllegalCharacterValueSanitizer.ALL_BUT_NUL_AND_ANGLE_BRACKETS_LEGAL); /** * Return a value sanitizer that does not allow any special characters, * and also does not allow script URLs. * @return a value sanitizer */ public static final ValueSanitizer getAllIllegal() { return sAllIllegal; } /** * Return a value sanitizer that allows everything except Nul ('\0') * characters. Script URLs are allowed. * @return a value sanitizer */ public static final ValueSanitizer getAllButNulLegal() { return sAllButNulLegal; } /** * Return a value sanitizer that allows everything except Nul ('\0') * characters, space (' '), and other whitespace characters. * Script URLs are allowed. * @return a value sanitizer */ public static final ValueSanitizer getAllButWhitespaceLegal() { return sAllButWhitespaceLegal; } /** * Return a value sanitizer that allows all the characters used by * encoded URLs. Does not allow script URLs. * @return a value sanitizer */ public static final ValueSanitizer getUrlLegal() { return sURLLegal; } /** * Return a value sanitizer that allows all the characters used by * encoded URLs and allows spaces, which are not technically legal * in encoded URLs, but commonly appear anyway. * Does not allow script URLs. * @return a value sanitizer */ public static final ValueSanitizer getUrlAndSpaceLegal() { return sUrlAndSpaceLegal; } /** * Return a value sanitizer that does not allow any special characters * except ampersand ('&'). Does not allow script URLs. * @return a value sanitizer */ public static final ValueSanitizer getAmpLegal() { return sAmpLegal; } /** * Return a value sanitizer that does not allow any special characters * except ampersand ('&') and space (' '). Does not allow script URLs. * @return a value sanitizer */ public static final ValueSanitizer getAmpAndSpaceLegal() { return sAmpAndSpaceLegal; } /** * Return a value sanitizer that does not allow any special characters * except space (' '). Does not allow script URLs. * @return a value sanitizer */ public static final ValueSanitizer getSpaceLegal() { return sSpaceLegal; } /** * Return a value sanitizer that allows any special characters * except angle brackets ('<' and '>') and Nul ('\0'). * Allows script URLs. * @return a value sanitizer */ public static final ValueSanitizer getAllButNulAndAngleBracketsLegal() { return sAllButNulAndAngleBracketsLegal; } /** * Constructs a UrlQuerySanitizer. *
* Defaults: *
* Because the URL is parsed before the constructor returns, there isn't * a chance to configure the sanitizer to change the parsing behavior. *
*
* UrlQuerySanitizer sanitizer = new UrlQuerySanitizer(myUrl);
* String name = sanitizer.getValue("name");
*
*
* Defaults: *
* Note: Do not modify this set. Treat it as a read-only set.
* @return all the parameters found in the current query.
*/
public Set Note: Do not modify this list. Treat it as a read-only list.
* Registering a non-null value sanitizer for a particular parameter
* makes that parameter a registered parameter.
* @param parameter an unencoded parameter name
* @param valueSanitizer the value sanitizer to use for a particular
* parameter. May be null in order to unregister that parameter.
* @see #getAllowUnregisteredParamaters()
*/
public void registerParameter(String parameter,
ValueSanitizer valueSanitizer) {
if (valueSanitizer == null) {
mSanitizers.remove(parameter);
}
mSanitizers.put(parameter, valueSanitizer);
}
/**
* Register a value sanitizer for an array of parameters.
* @param parameters An array of unencoded parameter names.
* @param valueSanitizer
* @see #registerParameter
*/
public void registerParameters(String[] parameters,
ValueSanitizer valueSanitizer) {
int length = parameters.length;
for(int i = 0; i < length; i++) {
mSanitizers.put(parameters[i], valueSanitizer);
}
}
/**
* Set whether or not unregistered parameters are allowed. If they
* are not allowed, then they will be dropped when a query is sanitized.
*
* Defaults to false.
* @param allowUnregisteredParamaters true to allow unregistered parameters.
* @see #getAllowUnregisteredParamaters()
*/
public void setAllowUnregisteredParamaters(
boolean allowUnregisteredParamaters) {
mAllowUnregisteredParamaters = allowUnregisteredParamaters;
}
/**
* Get whether or not unregistered parameters are allowed. If not
* allowed, they will be dropped when a query is parsed.
* @return true if unregistered parameters are allowed.
* @see #setAllowUnregisteredParamaters(boolean)
*/
public boolean getAllowUnregisteredParamaters() {
return mAllowUnregisteredParamaters;
}
/**
* Set whether or not the first occurrence of a repeated parameter is
* preferred. True means the first repeated parameter is preferred.
* False means that the last repeated parameter is preferred.
*
* The preferred parameter is the one that is returned when getParameter
* is called.
*
* defaults to false.
* @param preferFirstRepeatedParameter True if the first repeated
* parameter is preferred.
* @see #getPreferFirstRepeatedParameter()
*/
public void setPreferFirstRepeatedParameter(
boolean preferFirstRepeatedParameter) {
mPreferFirstRepeatedParameter = preferFirstRepeatedParameter;
}
/**
* Get whether or not the first occurrence of a repeated parameter is
* preferred.
* @return true if the first occurrence of a repeated parameter is
* preferred.
* @see #setPreferFirstRepeatedParameter(boolean)
*/
public boolean getPreferFirstRepeatedParameter() {
return mPreferFirstRepeatedParameter;
}
/**
* Parse an escaped parameter-value pair. The default implementation
* unescapes both the parameter and the value, then looks up the
* effective value sanitizer for the parameter and uses it to sanitize
* the value. If all goes well then addSanitizedValue is called with
* the unescaped parameter and the sanitized unescaped value.
* @param parameter an escaped parameter
* @param value an unsanitzied escaped value
*/
protected void parseEntry(String parameter, String value) {
String unescapedParameter = unescape(parameter);
ValueSanitizer valueSanitizer =
getEffectiveValueSanitizer(unescapedParameter);
if (valueSanitizer == null) {
return;
}
String unescapedValue = unescape(value);
String sanitizedValue = valueSanitizer.sanitize(unescapedValue);
addSanitizedEntry(unescapedParameter, sanitizedValue);
}
/**
* Record a sanitized parameter-value pair. Override if you want to
* do additional filtering or validation.
* @param parameter an unescaped parameter
* @param value a sanitized unescaped value
*/
protected void addSanitizedEntry(String parameter, String value) {
mEntriesList.add(
new ParameterValuePair(parameter, value));
if (mPreferFirstRepeatedParameter) {
if (mEntries.containsKey(parameter)) {
return;
}
}
mEntries.put(parameter, value);
}
/**
* Get the value sanitizer for a parameter. Returns null if there
* is no value sanitizer registered for the parameter.
* @param parameter the unescaped parameter
* @return the currently registered value sanitizer for this parameter.
* @see #registerParameter(String, android.net.UrlQuerySanitizer.ValueSanitizer)
*/
public ValueSanitizer getValueSanitizer(String parameter) {
return mSanitizers.get(parameter);
}
/**
* Get the effective value sanitizer for a parameter. Like getValueSanitizer,
* except if there is no value sanitizer registered for a parameter, and
* unregistered paramaters are allowed, then the default value sanitizer is
* returned.
* @param parameter an unescaped parameter
* @return the effective value sanitizer for a parameter.
*/
public ValueSanitizer getEffectiveValueSanitizer(String parameter) {
ValueSanitizer sanitizer = getValueSanitizer(parameter);
if (sanitizer == null && mAllowUnregisteredParamaters) {
sanitizer = getUnregisteredParameterValueSanitizer();
}
return sanitizer;
}
/**
* Unescape an escaped string.
*
*
* @param string the escaped string
* @return the unescaped string.
*/
public String unescape(String string) {
// Early exit if no escaped characters.
int firstEscape = string.indexOf('%');
if ( firstEscape < 0) {
firstEscape = string.indexOf('+');
if (firstEscape < 0) {
return string;
}
}
int length = string.length();
StringBuilder stringBuilder = new StringBuilder(length);
stringBuilder.append(string.substring(0, firstEscape));
for (int i = firstEscape; i < length; i++) {
char c = string.charAt(i);
if (c == '+') {
c = ' ';
}
else if ( c == '%' && i + 2 < length) {
char c1 = string.charAt(i + 1);
char c2 = string.charAt(i + 2);
if (isHexDigit(c1) && isHexDigit(c2)) {
c = (char) (decodeHexDigit(c1) * 16 + decodeHexDigit(c2));
i += 2;
}
}
stringBuilder.append(c);
}
return stringBuilder.toString();
}
/**
* Test if a character is a hexidecimal digit. Both upper case and lower
* case hex digits are allowed.
* @param c the character to test
* @return true if c is a hex digit.
*/
protected boolean isHexDigit(char c) {
return decodeHexDigit(c) >= 0;
}
/**
* Convert a character that represents a hexidecimal digit into an integer.
* If the character is not a hexidecimal digit, then -1 is returned.
* Both upper case and lower case hex digits are allowed.
* @param c the hexidecimal digit.
* @return the integer value of the hexidecimal digit.
*/
protected int decodeHexDigit(char c) {
if (c >= '0' && c <= '9') {
return c - '0';
}
else if (c >= 'A' && c <= 'F') {
return c - 'A' + 10;
}
else if (c >= 'a' && c <= 'f') {
return c - 'a' + 10;
}
else {
return -1;
}
}
/**
* Clear the existing entries. Called to get ready to parse a new
* query string.
*/
protected void clear() {
mEntries.clear();
mEntriesList.clear();
}
}