/*
* Copyright (C) 2007 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package android.text;
import android.graphics.Color;
import com.android.internal.util.ArrayUtils;
import org.ccil.cowan.tagsoup.HTMLSchema;
import org.ccil.cowan.tagsoup.Parser;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import android.content.res.ColorStateList;
import android.content.res.Resources;
import android.graphics.Typeface;
import android.graphics.drawable.Drawable;
import android.text.style.AbsoluteSizeSpan;
import android.text.style.AlignmentSpan;
import android.text.style.CharacterStyle;
import android.text.style.ForegroundColorSpan;
import android.text.style.ImageSpan;
import android.text.style.ParagraphStyle;
import android.text.style.QuoteSpan;
import android.text.style.RelativeSizeSpan;
import android.text.style.StrikethroughSpan;
import android.text.style.StyleSpan;
import android.text.style.SubscriptSpan;
import android.text.style.SuperscriptSpan;
import android.text.style.TextAppearanceSpan;
import android.text.style.TypefaceSpan;
import android.text.style.URLSpan;
import android.text.style.UnderlineSpan;
import java.io.IOException;
import java.io.StringReader;
/**
* This class processes HTML strings into displayable styled text.
* Not all HTML tags are supported.
*/
public class Html {
/**
* Retrieves images for HTML <img> tags.
*/
public static interface ImageGetter {
/**
* This methos is called when the HTML parser encounters an
* <img> tag. The source
argument is the
* string from the "src" attribute; the return value should be
* a Drawable representation of the image or null
* for a generic replacement image. Make sure you call
* setBounds() on your Drawable if it doesn't already have
* its bounds set.
*/
public Drawable getDrawable(String source);
}
/**
* Is notified when HTML tags are encountered that the parser does
* not know how to interpret.
*/
public static interface TagHandler {
/**
* This method will be called whenn the HTML parser encounters
* a tag that it does not know how to interpret.
*/
public void handleTag(boolean opening, String tag,
Editable output, XMLReader xmlReader);
}
private Html() { }
/**
* Returns displayable styled text from the provided HTML string.
* Any <img> tags in the HTML will display as a generic
* replacement image which your program can then go through and
* replace with real images.
*
*
This uses TagSoup to handle real HTML, including all of the brokenness found in the wild.
*/
public static Spanned fromHtml(String source) {
return fromHtml(source, null, null);
}
/**
* Lazy initialization holder for HTML parser. This class will
* a) be preloaded by the zygote, or b) not loaded until absolutely
* necessary.
*/
private static class HtmlParser {
private static final HTMLSchema schema = new HTMLSchema();
}
/**
* Returns displayable styled text from the provided HTML string.
* Any <img> tags in the HTML will use the specified ImageGetter
* to request a representation of the image (use null if you don't
* want this) and the specified TagHandler to handle unknown tags
* (specify null if you don't want this).
*
*
This uses TagSoup to handle real HTML, including all of the brokenness found in the wild.
*/
public static Spanned fromHtml(String source, ImageGetter imageGetter,
TagHandler tagHandler) {
Parser parser = new Parser();
try {
parser.setProperty(Parser.schemaProperty, HtmlParser.schema);
} catch (org.xml.sax.SAXNotRecognizedException e) {
// Should not happen.
throw new RuntimeException(e);
} catch (org.xml.sax.SAXNotSupportedException e) {
// Should not happen.
throw new RuntimeException(e);
}
HtmlToSpannedConverter converter =
new HtmlToSpannedConverter(source, imageGetter, tagHandler,
parser);
return converter.convert();
}
/**
* Returns an HTML representation of the provided Spanned text.
*/
public static String toHtml(Spanned text) {
StringBuilder out = new StringBuilder();
withinHtml(out, text);
return out.toString();
}
/**
* Returns an HTML escaped representation of the given plain text.
*/
public static String escapeHtml(CharSequence text) {
StringBuilder out = new StringBuilder();
withinStyle(out, text, 0, text.length());
return out.toString();
}
private static void withinHtml(StringBuilder out, Spanned text) {
int len = text.length();
int next;
for (int i = 0; i < text.length(); i = next) {
next = text.nextSpanTransition(i, len, ParagraphStyle.class);
ParagraphStyle[] style = text.getSpans(i, next, ParagraphStyle.class);
String elements = " ";
boolean needDiv = false;
for(int j = 0; j < style.length; j++) {
if (style[j] instanceof AlignmentSpan) {
Layout.Alignment align =
((AlignmentSpan) style[j]).getAlignment();
needDiv = true;
if (align == Layout.Alignment.ALIGN_CENTER) {
elements = "align=\"center\" " + elements;
} else if (align == Layout.Alignment.ALIGN_OPPOSITE) {
elements = "align=\"right\" " + elements;
} else {
elements = "align=\"left\" " + elements;
}
}
}
if (needDiv) {
out.append("
");
}
withinDiv(out, text, i, next);
if (needDiv) {
out.append("
");
}
}
}
private static void withinDiv(StringBuilder out, Spanned text,
int start, int end) {
int next;
for (int i = start; i < end; i = next) {
next = text.nextSpanTransition(i, end, QuoteSpan.class);
QuoteSpan[] quotes = text.getSpans(i, next, QuoteSpan.class);
for (QuoteSpan quote : quotes) {
out.append("");
}
withinBlockquote(out, text, i, next);
for (QuoteSpan quote : quotes) {
out.append("
\n");
}
}
}
private static String getOpenParaTagWithDirection(Spanned text, int start, int end) {
final int len = end - start;
final byte[] levels = ArrayUtils.newUnpaddedByteArray(len);
final char[] buffer = TextUtils.obtain(len);
TextUtils.getChars(text, start, end, buffer, 0);
int paraDir = AndroidBidi.bidi(Layout.DIR_REQUEST_DEFAULT_LTR, buffer, levels, len,
false /* no info */);
switch(paraDir) {
case Layout.DIR_RIGHT_TO_LEFT:
return "";
case Layout.DIR_LEFT_TO_RIGHT:
default:
return "
";
}
}
private static void withinBlockquote(StringBuilder out, Spanned text,
int start, int end) {
out.append(getOpenParaTagWithDirection(text, start, end));
int next;
for (int i = start; i < end; i = next) {
next = TextUtils.indexOf(text, '\n', i, end);
if (next < 0) {
next = end;
}
int nl = 0;
while (next < end && text.charAt(next) == '\n') {
nl++;
next++;
}
withinParagraph(out, text, i, next - nl, nl, next == end);
}
out.append("
\n");
}
private static void withinParagraph(StringBuilder out, Spanned text,
int start, int end, int nl,
boolean last) {
int next;
for (int i = start; i < end; i = next) {
next = text.nextSpanTransition(i, end, CharacterStyle.class);
CharacterStyle[] style = text.getSpans(i, next,
CharacterStyle.class);
for (int j = 0; j < style.length; j++) {
if (style[j] instanceof StyleSpan) {
int s = ((StyleSpan) style[j]).getStyle();
if ((s & Typeface.BOLD) != 0) {
out.append("");
}
if ((s & Typeface.ITALIC) != 0) {
out.append("");
}
}
if (style[j] instanceof TypefaceSpan) {
String s = ((TypefaceSpan) style[j]).getFamily();
if (s.equals("monospace")) {
out.append("");
}
}
if (style[j] instanceof SuperscriptSpan) {
out.append("");
}
if (style[j] instanceof SubscriptSpan) {
out.append("");
}
if (style[j] instanceof UnderlineSpan) {
out.append("");
}
if (style[j] instanceof StrikethroughSpan) {
out.append("");
}
if (style[j] instanceof URLSpan) {
out.append("");
}
if (style[j] instanceof ImageSpan) {
out.append("");
// Don't output the dummy character underlying the image.
i = next;
}
if (style[j] instanceof AbsoluteSizeSpan) {
out.append("");
}
if (style[j] instanceof ForegroundColorSpan) {
out.append("");
}
}
withinStyle(out, text, i, next);
for (int j = style.length - 1; j >= 0; j--) {
if (style[j] instanceof ForegroundColorSpan) {
out.append("");
}
if (style[j] instanceof AbsoluteSizeSpan) {
out.append("");
}
if (style[j] instanceof URLSpan) {
out.append("");
}
if (style[j] instanceof StrikethroughSpan) {
out.append("");
}
if (style[j] instanceof UnderlineSpan) {
out.append("");
}
if (style[j] instanceof SubscriptSpan) {
out.append("");
}
if (style[j] instanceof SuperscriptSpan) {
out.append("");
}
if (style[j] instanceof TypefaceSpan) {
String s = ((TypefaceSpan) style[j]).getFamily();
if (s.equals("monospace")) {
out.append("");
}
}
if (style[j] instanceof StyleSpan) {
int s = ((StyleSpan) style[j]).getStyle();
if ((s & Typeface.BOLD) != 0) {
out.append("");
}
if ((s & Typeface.ITALIC) != 0) {
out.append("");
}
}
}
}
String p = last ? "" : "\n" + getOpenParaTagWithDirection(text, start, end);
if (nl == 1) {
out.append("
\n");
} else if (nl == 2) {
out.append(p);
} else {
for (int i = 2; i < nl; i++) {
out.append("
");
}
out.append(p);
}
}
private static void withinStyle(StringBuilder out, CharSequence text,
int start, int end) {
for (int i = start; i < end; i++) {
char c = text.charAt(i);
if (c == '<') {
out.append("<");
} else if (c == '>') {
out.append(">");
} else if (c == '&') {
out.append("&");
} else if (c >= 0xD800 && c <= 0xDFFF) {
if (c < 0xDC00 && i + 1 < end) {
char d = text.charAt(i + 1);
if (d >= 0xDC00 && d <= 0xDFFF) {
i++;
int codepoint = 0x010000 | (int) c - 0xD800 << 10 | (int) d - 0xDC00;
out.append("").append(codepoint).append(";");
}
}
} else if (c > 0x7E || c < ' ') {
out.append("").append((int) c).append(";");
} else if (c == ' ') {
while (i + 1 < end && text.charAt(i + 1) == ' ') {
out.append(" ");
i++;
}
out.append(' ');
} else {
out.append(c);
}
}
}
}
class HtmlToSpannedConverter implements ContentHandler {
private static final float[] HEADER_SIZES = {
1.5f, 1.4f, 1.3f, 1.2f, 1.1f, 1f,
};
private String mSource;
private XMLReader mReader;
private SpannableStringBuilder mSpannableStringBuilder;
private Html.ImageGetter mImageGetter;
private Html.TagHandler mTagHandler;
public HtmlToSpannedConverter(
String source, Html.ImageGetter imageGetter, Html.TagHandler tagHandler,
Parser parser) {
mSource = source;
mSpannableStringBuilder = new SpannableStringBuilder();
mImageGetter = imageGetter;
mTagHandler = tagHandler;
mReader = parser;
}
public Spanned convert() {
mReader.setContentHandler(this);
try {
mReader.parse(new InputSource(new StringReader(mSource)));
} catch (IOException e) {
// We are reading from a string. There should not be IO problems.
throw new RuntimeException(e);
} catch (SAXException e) {
// TagSoup doesn't throw parse exceptions.
throw new RuntimeException(e);
}
// Fix flags and range for paragraph-type markup.
Object[] obj = mSpannableStringBuilder.getSpans(0, mSpannableStringBuilder.length(), ParagraphStyle.class);
for (int i = 0; i < obj.length; i++) {
int start = mSpannableStringBuilder.getSpanStart(obj[i]);
int end = mSpannableStringBuilder.getSpanEnd(obj[i]);
// If the last line of the range is blank, back off by one.
if (end - 2 >= 0) {
if (mSpannableStringBuilder.charAt(end - 1) == '\n' &&
mSpannableStringBuilder.charAt(end - 2) == '\n') {
end--;
}
}
if (end == start) {
mSpannableStringBuilder.removeSpan(obj[i]);
} else {
mSpannableStringBuilder.setSpan(obj[i], start, end, Spannable.SPAN_PARAGRAPH);
}
}
return mSpannableStringBuilder;
}
private void handleStartTag(String tag, Attributes attributes) {
if (tag.equalsIgnoreCase("br")) {
// We don't need to handle this. TagSoup will ensure that there's a for each
// so we can safely emite the linebreaks when we handle the close tag.
} else if (tag.equalsIgnoreCase("p")) {
handleP(mSpannableStringBuilder);
} else if (tag.equalsIgnoreCase("div")) {
handleP(mSpannableStringBuilder);
} else if (tag.equalsIgnoreCase("strong")) {
start(mSpannableStringBuilder, new Bold());
} else if (tag.equalsIgnoreCase("b")) {
start(mSpannableStringBuilder, new Bold());
} else if (tag.equalsIgnoreCase("em")) {
start(mSpannableStringBuilder, new Italic());
} else if (tag.equalsIgnoreCase("cite")) {
start(mSpannableStringBuilder, new Italic());
} else if (tag.equalsIgnoreCase("dfn")) {
start(mSpannableStringBuilder, new Italic());
} else if (tag.equalsIgnoreCase("i")) {
start(mSpannableStringBuilder, new Italic());
} else if (tag.equalsIgnoreCase("big")) {
start(mSpannableStringBuilder, new Big());
} else if (tag.equalsIgnoreCase("small")) {
start(mSpannableStringBuilder, new Small());
} else if (tag.equalsIgnoreCase("font")) {
startFont(mSpannableStringBuilder, attributes);
} else if (tag.equalsIgnoreCase("blockquote")) {
handleP(mSpannableStringBuilder);
start(mSpannableStringBuilder, new Blockquote());
} else if (tag.equalsIgnoreCase("tt")) {
start(mSpannableStringBuilder, new Monospace());
} else if (tag.equalsIgnoreCase("a")) {
startA(mSpannableStringBuilder, attributes);
} else if (tag.equalsIgnoreCase("u")) {
start(mSpannableStringBuilder, new Underline());
} else if (tag.equalsIgnoreCase("sup")) {
start(mSpannableStringBuilder, new Super());
} else if (tag.equalsIgnoreCase("sub")) {
start(mSpannableStringBuilder, new Sub());
} else if (tag.length() == 2 &&
Character.toLowerCase(tag.charAt(0)) == 'h' &&
tag.charAt(1) >= '1' && tag.charAt(1) <= '6') {
handleP(mSpannableStringBuilder);
start(mSpannableStringBuilder, new Header(tag.charAt(1) - '1'));
} else if (tag.equalsIgnoreCase("img")) {
startImg(mSpannableStringBuilder, attributes, mImageGetter);
} else if (mTagHandler != null) {
mTagHandler.handleTag(true, tag, mSpannableStringBuilder, mReader);
}
}
private void handleEndTag(String tag) {
if (tag.equalsIgnoreCase("br")) {
handleBr(mSpannableStringBuilder);
} else if (tag.equalsIgnoreCase("p")) {
handleP(mSpannableStringBuilder);
} else if (tag.equalsIgnoreCase("div")) {
handleP(mSpannableStringBuilder);
} else if (tag.equalsIgnoreCase("strong")) {
end(mSpannableStringBuilder, Bold.class, new StyleSpan(Typeface.BOLD));
} else if (tag.equalsIgnoreCase("b")) {
end(mSpannableStringBuilder, Bold.class, new StyleSpan(Typeface.BOLD));
} else if (tag.equalsIgnoreCase("em")) {
end(mSpannableStringBuilder, Italic.class, new StyleSpan(Typeface.ITALIC));
} else if (tag.equalsIgnoreCase("cite")) {
end(mSpannableStringBuilder, Italic.class, new StyleSpan(Typeface.ITALIC));
} else if (tag.equalsIgnoreCase("dfn")) {
end(mSpannableStringBuilder, Italic.class, new StyleSpan(Typeface.ITALIC));
} else if (tag.equalsIgnoreCase("i")) {
end(mSpannableStringBuilder, Italic.class, new StyleSpan(Typeface.ITALIC));
} else if (tag.equalsIgnoreCase("big")) {
end(mSpannableStringBuilder, Big.class, new RelativeSizeSpan(1.25f));
} else if (tag.equalsIgnoreCase("small")) {
end(mSpannableStringBuilder, Small.class, new RelativeSizeSpan(0.8f));
} else if (tag.equalsIgnoreCase("font")) {
endFont(mSpannableStringBuilder);
} else if (tag.equalsIgnoreCase("blockquote")) {
handleP(mSpannableStringBuilder);
end(mSpannableStringBuilder, Blockquote.class, new QuoteSpan());
} else if (tag.equalsIgnoreCase("tt")) {
end(mSpannableStringBuilder, Monospace.class,
new TypefaceSpan("monospace"));
} else if (tag.equalsIgnoreCase("a")) {
endA(mSpannableStringBuilder);
} else if (tag.equalsIgnoreCase("u")) {
end(mSpannableStringBuilder, Underline.class, new UnderlineSpan());
} else if (tag.equalsIgnoreCase("sup")) {
end(mSpannableStringBuilder, Super.class, new SuperscriptSpan());
} else if (tag.equalsIgnoreCase("sub")) {
end(mSpannableStringBuilder, Sub.class, new SubscriptSpan());
} else if (tag.length() == 2 &&
Character.toLowerCase(tag.charAt(0)) == 'h' &&
tag.charAt(1) >= '1' && tag.charAt(1) <= '6') {
handleP(mSpannableStringBuilder);
endHeader(mSpannableStringBuilder);
} else if (mTagHandler != null) {
mTagHandler.handleTag(false, tag, mSpannableStringBuilder, mReader);
}
}
private static void handleP(SpannableStringBuilder text) {
int len = text.length();
if (len >= 1 && text.charAt(len - 1) == '\n') {
if (len >= 2 && text.charAt(len - 2) == '\n') {
return;
}
text.append("\n");
return;
}
if (len != 0) {
text.append("\n\n");
}
}
private static void handleBr(SpannableStringBuilder text) {
text.append("\n");
}
private static Object getLast(Spanned text, Class kind) {
/*
* This knows that the last returned object from getSpans()
* will be the most recently added.
*/
Object[] objs = text.getSpans(0, text.length(), kind);
if (objs.length == 0) {
return null;
} else {
return objs[objs.length - 1];
}
}
private static void start(SpannableStringBuilder text, Object mark) {
int len = text.length();
text.setSpan(mark, len, len, Spannable.SPAN_MARK_MARK);
}
private static void end(SpannableStringBuilder text, Class kind,
Object repl) {
int len = text.length();
Object obj = getLast(text, kind);
int where = text.getSpanStart(obj);
text.removeSpan(obj);
if (where != len) {
text.setSpan(repl, where, len, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
}
}
private static void startImg(SpannableStringBuilder text,
Attributes attributes, Html.ImageGetter img) {
String src = attributes.getValue("", "src");
Drawable d = null;
if (img != null) {
d = img.getDrawable(src);
}
if (d == null) {
d = Resources.getSystem().
getDrawable(com.android.internal.R.drawable.unknown_image);
d.setBounds(0, 0, d.getIntrinsicWidth(), d.getIntrinsicHeight());
}
int len = text.length();
text.append("\uFFFC");
text.setSpan(new ImageSpan(d, src), len, text.length(),
Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
}
private static void startFont(SpannableStringBuilder text,
Attributes attributes) {
String color = attributes.getValue("", "color");
String face = attributes.getValue("", "face");
int len = text.length();
text.setSpan(new Font(color, face), len, len, Spannable.SPAN_MARK_MARK);
}
private static void endFont(SpannableStringBuilder text) {
int len = text.length();
Object obj = getLast(text, Font.class);
int where = text.getSpanStart(obj);
text.removeSpan(obj);
if (where != len) {
Font f = (Font) obj;
if (!TextUtils.isEmpty(f.mColor)) {
if (f.mColor.startsWith("@")) {
Resources res = Resources.getSystem();
String name = f.mColor.substring(1);
int colorRes = res.getIdentifier(name, "color", "android");
if (colorRes != 0) {
ColorStateList colors = res.getColorStateList(colorRes);
text.setSpan(new TextAppearanceSpan(null, 0, 0, colors, null),
where, len,
Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
}
} else {
int c = Color.getHtmlColor(f.mColor);
if (c != -1) {
text.setSpan(new ForegroundColorSpan(c | 0xFF000000),
where, len,
Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
}
}
}
if (f.mFace != null) {
text.setSpan(new TypefaceSpan(f.mFace), where, len,
Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
}
}
}
private static void startA(SpannableStringBuilder text, Attributes attributes) {
String href = attributes.getValue("", "href");
int len = text.length();
text.setSpan(new Href(href), len, len, Spannable.SPAN_MARK_MARK);
}
private static void endA(SpannableStringBuilder text) {
int len = text.length();
Object obj = getLast(text, Href.class);
int where = text.getSpanStart(obj);
text.removeSpan(obj);
if (where != len) {
Href h = (Href) obj;
if (h.mHref != null) {
text.setSpan(new URLSpan(h.mHref), where, len,
Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
}
}
}
private static void endHeader(SpannableStringBuilder text) {
int len = text.length();
Object obj = getLast(text, Header.class);
int where = text.getSpanStart(obj);
text.removeSpan(obj);
// Back off not to change only the text, not the blank line.
while (len > where && text.charAt(len - 1) == '\n') {
len--;
}
if (where != len) {
Header h = (Header) obj;
text.setSpan(new RelativeSizeSpan(HEADER_SIZES[h.mLevel]),
where, len, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
text.setSpan(new StyleSpan(Typeface.BOLD),
where, len, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
}
}
public void setDocumentLocator(Locator locator) {
}
public void startDocument() throws SAXException {
}
public void endDocument() throws SAXException {
}
public void startPrefixMapping(String prefix, String uri) throws SAXException {
}
public void endPrefixMapping(String prefix) throws SAXException {
}
public void startElement(String uri, String localName, String qName, Attributes attributes)
throws SAXException {
handleStartTag(localName, attributes);
}
public void endElement(String uri, String localName, String qName) throws SAXException {
handleEndTag(localName);
}
public void characters(char ch[], int start, int length) throws SAXException {
StringBuilder sb = new StringBuilder();
/*
* Ignore whitespace that immediately follows other whitespace;
* newlines count as spaces.
*/
for (int i = 0; i < length; i++) {
char c = ch[i + start];
if (c == ' ' || c == '\n') {
char pred;
int len = sb.length();
if (len == 0) {
len = mSpannableStringBuilder.length();
if (len == 0) {
pred = '\n';
} else {
pred = mSpannableStringBuilder.charAt(len - 1);
}
} else {
pred = sb.charAt(len - 1);
}
if (pred != ' ' && pred != '\n') {
sb.append(' ');
}
} else {
sb.append(c);
}
}
mSpannableStringBuilder.append(sb);
}
public void ignorableWhitespace(char ch[], int start, int length) throws SAXException {
}
public void processingInstruction(String target, String data) throws SAXException {
}
public void skippedEntity(String name) throws SAXException {
}
private static class Bold { }
private static class Italic { }
private static class Underline { }
private static class Big { }
private static class Small { }
private static class Monospace { }
private static class Blockquote { }
private static class Super { }
private static class Sub { }
private static class Font {
public String mColor;
public String mFace;
public Font(String color, String face) {
mColor = color;
mFace = face;
}
}
private static class Href {
public String mHref;
public Href(String href) {
mHref = href;
}
}
private static class Header {
private int mLevel;
public Header(int level) {
mLevel = level;
}
}
}