Element to string in HTMLDocument
Posted
by kalpesh
on Stack Overflow
See other posts from Stack Overflow
or by kalpesh
Published on 2010-05-21T14:11:27Z
Indexed on
2010/05/21
15:30 UTC
Read the original article
Hit count: 290
i have a Element object its a HTMLDocument object and i want to string value of this element.
i want this result
Christina Toth, Pharm. D.=======================
plz see below code.
public static void main(String args[]) throws Exception {
InputStream is = Nullsoft.getInputStream();
InputStreamReader isr = new InputStreamReader(is);
BufferedReader br = new BufferedReader(isr);
HTMLEditorKit htmlKit = new HTMLEditorKit();
HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument();
HTMLEditorKit.Parser parser = new ParserDelegator();
HTMLEditorKit.ParserCallback callback = htmlDoc.getReader(0);
parser.parse(br, callback, true);
// Parse
ElementIterator iterator = new ElementIterator(htmlDoc);
Element element;
while ((element = iterator.next()) != null) {
AttributeSet attributes = element.getAttributes();
Object name = attributes.getAttribute(StyleConstants.NameAttribute);
if ((name instanceof HTML.Tag)
&& ((name == HTML.Tag.DIV) || (name == HTML.Tag.H2) || (name == HTML.Tag.H3))) {
StringBuffer text = new StringBuffer();
int count = element.getElementCount();
for (int i = 0; i < count; i++) {
Element child = element.getElement(i);
AttributeSet childAttributes = child.getAttributes();
// if (childAttributes.getAttribute(StyleConstants.NameAttribute) == HTML.Tag.CONTENT)
{
int startOffset = child.getStartOffset();
int endOffset = child.getEndOffset();
int length = endOffset - startOffset;
text.append(htmlDoc.getText(startOffset, length));
}
}
System.out.println(name + ": " + text.toString());
}
}
System.exit(0);
}
public static InputStream getInputStream() {
String text = "<html>\n" +
"<head>\n" +
"<title>pg_0001</title>\n" +
"\n" +
"<style type=\"text/css\">\n" +
".ft3{font-style:normal;font-weight:bold;font-size:11px;font-family:Helvetica;color:#000000;}\n" +
"</style>\n" +
"</head>\n" +
"<body vlink=\"#FFFFFF\" link=\"#FFFFFF\" bgcolor=\"#ffffff\">\n" +
"\n" +
"\n" +
"<div style=\"position:absolute;top:597;left:252\"><nobr><span class=\"ft3\">Christina Toth, Pharm. D.</span></nobr></div>\n" +
"\n" +
"\n" +
"</body>\n" +
"</html>";
InputStream is = null;
try {
is = new ByteArrayInputStream(text.getBytes("UTF-8"));
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
return is;
}
© Stack Overflow or respective owner