/*
* Copyright (C) 2007 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package android.text;
import com.android.internal.util.ArrayUtils;
import org.ccil.cowan.tagsoup.HTMLSchema;
import org.ccil.cowan.tagsoup.Parser;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import android.app.ActivityThread;
import android.app.Application;
import android.content.res.ColorStateList;
import android.content.res.Resources;
import android.graphics.Color;
import android.graphics.Typeface;
import android.graphics.drawable.Drawable;
import android.text.style.AbsoluteSizeSpan;
import android.text.style.AlignmentSpan;
import android.text.style.BackgroundColorSpan;
import android.text.style.BulletSpan;
import android.text.style.CharacterStyle;
import android.text.style.ForegroundColorSpan;
import android.text.style.ImageSpan;
import android.text.style.ParagraphStyle;
import android.text.style.QuoteSpan;
import android.text.style.RelativeSizeSpan;
import android.text.style.StrikethroughSpan;
import android.text.style.StyleSpan;
import android.text.style.SubscriptSpan;
import android.text.style.SuperscriptSpan;
import android.text.style.TextAppearanceSpan;
import android.text.style.TypefaceSpan;
import android.text.style.URLSpan;
import android.text.style.UnderlineSpan;
import java.io.IOException;
import java.io.StringReader;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* This class processes HTML strings into displayable styled text.
* Not all HTML tags are supported.
*/
public class Html {
/**
* Retrieves images for HTML <img> tags.
*/
public static interface ImageGetter {
/**
* This method is called when the HTML parser encounters an
* <img> tag. The source
argument is the
* string from the "src" attribute; the return value should be
* a Drawable representation of the image or null
* for a generic replacement image. Make sure you call
* setBounds() on your Drawable if it doesn't already have
* its bounds set.
*/
public Drawable getDrawable(String source);
}
/**
* Is notified when HTML tags are encountered that the parser does
* not know how to interpret.
*/
public static interface TagHandler {
/**
* This method will be called whenn the HTML parser encounters
* a tag that it does not know how to interpret.
*/
public void handleTag(boolean opening, String tag,
Editable output, XMLReader xmlReader);
}
/**
* Option for {@link #toHtml(Spanned, int)}: Wrap consecutive lines of text delimited by '\n'
* inside <p> elements. {@link BulletSpan}s are ignored.
*/
public static final int TO_HTML_PARAGRAPH_LINES_CONSECUTIVE = 0x00000000;
/**
* Option for {@link #toHtml(Spanned, int)}: Wrap each line of text delimited by '\n' inside a
* <p> or a <li> element. This allows {@link ParagraphStyle}s attached to be
* encoded as CSS styles within the corresponding <p> or <li> element.
*/
public static final int TO_HTML_PARAGRAPH_LINES_INDIVIDUAL = 0x00000001;
/**
* Flag indicating that texts inside <p> elements will be separated from other texts with
* one newline character by default.
*/
public static final int FROM_HTML_SEPARATOR_LINE_BREAK_PARAGRAPH = 0x00000001;
/**
* Flag indicating that texts inside <h1>~<h6> elements will be separated from
* other texts with one newline character by default.
*/
public static final int FROM_HTML_SEPARATOR_LINE_BREAK_HEADING = 0x00000002;
/**
* Flag indicating that texts inside <li> elements will be separated from other texts
* with one newline character by default.
*/
public static final int FROM_HTML_SEPARATOR_LINE_BREAK_LIST_ITEM = 0x00000004;
/**
* Flag indicating that texts inside <ul> elements will be separated from other texts
* with one newline character by default.
*/
public static final int FROM_HTML_SEPARATOR_LINE_BREAK_LIST = 0x00000008;
/**
* Flag indicating that texts inside <div> elements will be separated from other texts
* with one newline character by default.
*/
public static final int FROM_HTML_SEPARATOR_LINE_BREAK_DIV = 0x00000010;
/**
* Flag indicating that texts inside <blockquote> elements will be separated from other
* texts with one newline character by default.
*/
public static final int FROM_HTML_SEPARATOR_LINE_BREAK_BLOCKQUOTE = 0x00000020;
/**
* Flag indicating that CSS color values should be used instead of those defined in
* {@link Color}.
*/
public static final int FROM_HTML_OPTION_USE_CSS_COLORS = 0x00000100;
/**
* Flags for {@link #fromHtml(String, int, ImageGetter, TagHandler)}: Separate block-level
* elements with blank lines (two newline characters) in between. This is the legacy behavior
* prior to N.
*/
public static final int FROM_HTML_MODE_LEGACY = 0x00000000;
/**
* Flags for {@link #fromHtml(String, int, ImageGetter, TagHandler)}: Separate block-level
* elements with line breaks (single newline character) in between. This inverts the
* {@link Spanned} to HTML string conversion done with the option
* {@link #TO_HTML_PARAGRAPH_LINES_INDIVIDUAL}.
*/
public static final int FROM_HTML_MODE_COMPACT =
FROM_HTML_SEPARATOR_LINE_BREAK_PARAGRAPH
| FROM_HTML_SEPARATOR_LINE_BREAK_HEADING
| FROM_HTML_SEPARATOR_LINE_BREAK_LIST_ITEM
| FROM_HTML_SEPARATOR_LINE_BREAK_LIST
| FROM_HTML_SEPARATOR_LINE_BREAK_DIV
| FROM_HTML_SEPARATOR_LINE_BREAK_BLOCKQUOTE;
/**
* The bit which indicates if lines delimited by '\n' will be grouped into <p> elements.
*/
private static final int TO_HTML_PARAGRAPH_FLAG = 0x00000001;
private Html() { }
/**
* Returns displayable styled text from the provided HTML string with the legacy flags
* {@link #FROM_HTML_MODE_LEGACY}.
*
* @deprecated use {@link #fromHtml(String, int)} instead.
*/
@Deprecated
public static Spanned fromHtml(String source) {
return fromHtml(source, FROM_HTML_MODE_LEGACY, null, null);
}
/**
* Returns displayable styled text from the provided HTML string. Any <img> tags in the
* HTML will display as a generic replacement image which your program can then go through and
* replace with real images.
*
*
This uses TagSoup to handle real HTML, including all of the brokenness found in the wild. */ public static Spanned fromHtml(String source, int flags) { return fromHtml(source, flags, null, null); } /** * Lazy initialization holder for HTML parser. This class will * a) be preloaded by the zygote, or b) not loaded until absolutely * necessary. */ private static class HtmlParser { private static final HTMLSchema schema = new HTMLSchema(); } /** * Returns displayable styled text from the provided HTML string with the legacy flags * {@link #FROM_HTML_MODE_LEGACY}. * * @deprecated use {@link #fromHtml(String, int, ImageGetter, TagHandler)} instead. */ @Deprecated public static Spanned fromHtml(String source, ImageGetter imageGetter, TagHandler tagHandler) { return fromHtml(source, FROM_HTML_MODE_LEGACY, imageGetter, tagHandler); } /** * Returns displayable styled text from the provided HTML string. Any <img> tags in the * HTML will use the specified ImageGetter to request a representation of the image (use null * if you don't want this) and the specified TagHandler to handle unknown tags (specify null if * you don't want this). * *
This uses TagSoup to handle real HTML, including all of the brokenness found in the wild. */ public static Spanned fromHtml(String source, int flags, ImageGetter imageGetter, TagHandler tagHandler) { Parser parser = new Parser(); try { parser.setProperty(Parser.schemaProperty, HtmlParser.schema); } catch (org.xml.sax.SAXNotRecognizedException e) { // Should not happen. throw new RuntimeException(e); } catch (org.xml.sax.SAXNotSupportedException e) { // Should not happen. throw new RuntimeException(e); } HtmlToSpannedConverter converter = new HtmlToSpannedConverter(source, imageGetter, tagHandler, parser, flags); return converter.convert(); } /** * @deprecated use {@link #toHtml(Spanned, int)} instead. */ @Deprecated public static String toHtml(Spanned text) { return toHtml(text, TO_HTML_PARAGRAPH_LINES_CONSECUTIVE); } /** * Returns an HTML representation of the provided Spanned text. A best effort is * made to add HTML tags corresponding to spans. Also note that HTML metacharacters * (such as "<" and "&") within the input text are escaped. * * @param text input text to convert * @param option one of {@link #TO_HTML_PARAGRAPH_LINES_CONSECUTIVE} or * {@link #TO_HTML_PARAGRAPH_LINES_INDIVIDUAL} * @return string containing input converted to HTML */ public static String toHtml(Spanned text, int option) { StringBuilder out = new StringBuilder(); withinHtml(out, text, option); return out.toString(); } /** * Returns an HTML escaped representation of the given plain text. */ public static String escapeHtml(CharSequence text) { StringBuilder out = new StringBuilder(); withinStyle(out, text, 0, text.length()); return out.toString(); } private static void withinHtml(StringBuilder out, Spanned text, int option) { if ((option & TO_HTML_PARAGRAPH_FLAG) == TO_HTML_PARAGRAPH_LINES_CONSECUTIVE) { encodeTextAlignmentByDiv(out, text, option); return; } withinDiv(out, text, 0, text.length(), option); } private static void encodeTextAlignmentByDiv(StringBuilder out, Spanned text, int option) { int len = text.length(); int next; for (int i = 0; i < len; i = next) { next = text.nextSpanTransition(i, len, ParagraphStyle.class); ParagraphStyle[] style = text.getSpans(i, next, ParagraphStyle.class); String elements = " "; boolean needDiv = false; for(int j = 0; j < style.length; j++) { if (style[j] instanceof AlignmentSpan) { Layout.Alignment align = ((AlignmentSpan) style[j]).getAlignment(); needDiv = true; if (align == Layout.Alignment.ALIGN_CENTER) { elements = "align=\"center\" " + elements; } else if (align == Layout.Alignment.ALIGN_OPPOSITE) { elements = "align=\"right\" " + elements; } else { elements = "align=\"left\" " + elements; } } } if (needDiv) { out.append("
"); } withinBlockquote(out, text, i, next, option); for (QuoteSpan quote : quotes) { out.append("\n"); } } } private static String getTextDirection(Spanned text, int start, int end) { final int len = end - start; final byte[] levels = ArrayUtils.newUnpaddedByteArray(len); final char[] buffer = TextUtils.obtain(len); TextUtils.getChars(text, start, end, buffer, 0); int paraDir = AndroidBidi.bidi(Layout.DIR_REQUEST_DEFAULT_LTR, buffer, levels, len, false /* no info */); switch(paraDir) { case Layout.DIR_RIGHT_TO_LEFT: return " dir=\"rtl\""; case Layout.DIR_LEFT_TO_RIGHT: default: return " dir=\"ltr\""; } } private static String getTextStyles(Spanned text, int start, int end, boolean forceNoVerticalMargin, boolean includeTextAlign) { String margin = null; String textAlign = null; if (forceNoVerticalMargin) { margin = "margin-top:0; margin-bottom:0;"; } if (includeTextAlign) { final AlignmentSpan[] alignmentSpans = text.getSpans(start, end, AlignmentSpan.class); // Only use the last AlignmentSpan with flag SPAN_PARAGRAPH for (int i = alignmentSpans.length - 1; i >= 0; i--) { AlignmentSpan s = alignmentSpans[i]; if ((text.getSpanFlags(s) & Spanned.SPAN_PARAGRAPH) == Spanned.SPAN_PARAGRAPH) { final Layout.Alignment alignment = s.getAlignment(); if (alignment == Layout.Alignment.ALIGN_NORMAL) { textAlign = "text-align:start;"; } else if (alignment == Layout.Alignment.ALIGN_CENTER) { textAlign = "text-align:center;"; } else if (alignment == Layout.Alignment.ALIGN_OPPOSITE) { textAlign = "text-align:end;"; } break; } } } if (margin == null && textAlign == null) { return ""; } final StringBuilder style = new StringBuilder(" style=\""); if (margin != null && textAlign != null) { style.append(margin).append(" ").append(textAlign); } else if (margin != null) { style.append(margin); } else if (textAlign != null) { style.append(textAlign); } return style.append("\"").toString(); } private static void withinBlockquote(StringBuilder out, Spanned text, int start, int end, int option) { if ((option & TO_HTML_PARAGRAPH_FLAG) == TO_HTML_PARAGRAPH_LINES_CONSECUTIVE) { withinBlockquoteConsecutive(out, text, start, end); } else { withinBlockquoteIndividual(out, text, start, end); } } private static void withinBlockquoteIndividual(StringBuilder out, Spanned text, int start, int end) { boolean isInList = false; int next; for (int i = start; i <= end; i = next) { next = TextUtils.indexOf(text, '\n', i, end); if (next < 0) { next = end; } if (next == i) { if (isInList) { // Current paragraph is no longer a list item; close the previously opened list isInList = false; out.append("\n"); } out.append("
");
int next;
for (int i = start; i < end; i = next) {
next = TextUtils.indexOf(text, '\n', i, end);
if (next < 0) {
next = end;
}
int nl = 0;
while (next < end && text.charAt(next) == '\n') {
nl++;
next++;
}
withinParagraph(out, text, i, next - nl);
if (nl == 1) {
out.append("
\n");
} else {
for (int j = 2; j < nl; j++) {
out.append("
");
}
if (next != end) {
/* Paragraph should be closed and reopened */
out.append("
"); } } } out.append("
\n"); } private static void withinParagraph(StringBuilder out, Spanned text, int start, int end) { int next; for (int i = start; i < end; i = next) { next = text.nextSpanTransition(i, end, CharacterStyle.class); CharacterStyle[] style = text.getSpans(i, next, CharacterStyle.class); for (int j = 0; j < style.length; j++) { if (style[j] instanceof StyleSpan) { int s = ((StyleSpan) style[j]).getStyle(); if ((s & Typeface.BOLD) != 0) { out.append(""); } if ((s & Typeface.ITALIC) != 0) { out.append(""); } } if (style[j] instanceof TypefaceSpan) { String s = ((TypefaceSpan) style[j]).getFamily(); if ("monospace".equals(s)) { out.append(""); } } if (style[j] instanceof SuperscriptSpan) { out.append(""); } if (style[j] instanceof SubscriptSpan) { out.append(""); } if (style[j] instanceof UnderlineSpan) { out.append(""); } if (style[j] instanceof StrikethroughSpan) { out.append(""); } if (style[j] instanceof URLSpan) { out.append(""); } if (style[j] instanceof ImageSpan) { out.append(""); // Don't output the dummy character underlying the image. i = next; } if (style[j] instanceof AbsoluteSizeSpan) { AbsoluteSizeSpan s = ((AbsoluteSizeSpan) style[j]); float sizeDip = s.getSize(); if (!s.getDip()) { Application application = ActivityThread.currentApplication(); sizeDip /= application.getResources().getDisplayMetrics().density; } // px in CSS is the equivalance of dip in Android out.append(String.format("", sizeDip)); } if (style[j] instanceof RelativeSizeSpan) { float sizeEm = ((RelativeSizeSpan) style[j]).getSizeChange(); out.append(String.format("", sizeEm)); } if (style[j] instanceof ForegroundColorSpan) { int color = ((ForegroundColorSpan) style[j]).getForegroundColor(); out.append(String.format("", 0xFFFFFF & color)); } if (style[j] instanceof BackgroundColorSpan) { int color = ((BackgroundColorSpan) style[j]).getBackgroundColor(); out.append(String.format("", 0xFFFFFF & color)); } } withinStyle(out, text, i, next); for (int j = style.length - 1; j >= 0; j--) { if (style[j] instanceof BackgroundColorSpan) { out.append(""); } if (style[j] instanceof ForegroundColorSpan) { out.append(""); } if (style[j] instanceof RelativeSizeSpan) { out.append(""); } if (style[j] instanceof AbsoluteSizeSpan) { out.append(""); } if (style[j] instanceof URLSpan) { out.append(""); } if (style[j] instanceof StrikethroughSpan) { out.append(""); } if (style[j] instanceof UnderlineSpan) { out.append(""); } if (style[j] instanceof SubscriptSpan) { out.append(""); } if (style[j] instanceof SuperscriptSpan) { out.append(""); } if (style[j] instanceof TypefaceSpan) { String s = ((TypefaceSpan) style[j]).getFamily(); if (s.equals("monospace")) { out.append(""); } } if (style[j] instanceof StyleSpan) { int s = ((StyleSpan) style[j]).getStyle(); if ((s & Typeface.BOLD) != 0) { out.append(""); } if ((s & Typeface.ITALIC) != 0) { out.append(""); } } } } } private static void withinStyle(StringBuilder out, CharSequence text, int start, int end) { for (int i = start; i < end; i++) { char c = text.charAt(i); if (c == '<') { out.append("<"); } else if (c == '>') { out.append(">"); } else if (c == '&') { out.append("&"); } else if (c >= 0xD800 && c <= 0xDFFF) { if (c < 0xDC00 && i + 1 < end) { char d = text.charAt(i + 1); if (d >= 0xDC00 && d <= 0xDFFF) { i++; int codepoint = 0x010000 | (int) c - 0xD800 << 10 | (int) d - 0xDC00; out.append("").append(codepoint).append(";"); } } } else if (c > 0x7E || c < ' ') { out.append("").append((int) c).append(";"); } else if (c == ' ') { while (i + 1 < end && text.charAt(i + 1) == ' ') { out.append(" "); i++; } out.append(' '); } else { out.append(c); } } } } class HtmlToSpannedConverter implements ContentHandler { private static final float[] HEADING_SIZES = { 1.5f, 1.4f, 1.3f, 1.2f, 1.1f, 1f, }; private String mSource; private XMLReader mReader; private SpannableStringBuilder mSpannableStringBuilder; private Html.ImageGetter mImageGetter; private Html.TagHandler mTagHandler; private int mFlags; private static Pattern sTextAlignPattern; private static Pattern sForegroundColorPattern; private static Pattern sBackgroundColorPattern; private static Pattern sTextDecorationPattern; /** * Name-value mapping of HTML/CSS colors which have different values in {@link Color}. */ private static final Map