本文共 1346 字,大约阅读时间需要 4 分钟。
//测试HtmlPage的用法, 遍历节点
public void testHtmlPage() {
Parser parser = null;
HtmlPage htmlPage = null;
NodeList list = null;
try {
parser = new Parser();
String inputHTML = "<html>" + "<head>" +
"<title>Welcome to the HTMLParser website</title>" +
"</head><body>Welcome to HTMLParser" +
"<table id=’table1′ >" +
"<tr><td>1-11</td><td>1-12</td><td>1-13</td>" +
"<tr><td>1-21</td><td>1-22</td><td>1-23</td>" +
"<tr><td>1-31</td><td>1-32</td><td>1-33</td></table>" +
"<table id=’table2′ >" +
"<tr><td>2-11</td><td>2-12</td><td>2-13</td>" +
"<tr><td>2-21</td><td>2-22</td><td>2-23</td>" +
"<tr><td>2-31</td><td>2-32</td><td>2-33</td></table>" +
"</body></html>";
parser.setInputHTML(inputHTML);
htmlPage = new HtmlPage(parser);
parser.visitAllNodesWith(htmlPage);
System.out.println("Title:" + htmlPage.getTitle());
list = htmlPage.getBody();
for (NodeIterator iterator=list.elements(); iterator.hasMoreNodes();) {
Node node = iterator.nextNode();
System.out.println(node.toHtml());
}
TableTag[] tables = htmlPage.getTables();
for (int i=0; i<tables.length; i++) {
TableRow[] rows = tables[i].getRows();
for (int r=0; r<rows.length; r++) {
TableColumn[] cols = rows[r].getColumns();
for (int c=0; c<cols.length; c++) {
System.out.print(cols[c].toPlainTextString() + " ");
}
System.out.println();
}
}
} catch (ParserException e) {
e.printStackTrace();
}
}
转载地址:https://blog.csdn.net/thamsyangsw/article/details/4424630 如侵犯您的版权,请留言回复原文章的地址,我们会给您删除此文章,给您带来不便请您谅解!