数据"/>
Jsoup解析html中的指定数据
- <div class="content">
- //eg1:解析百度音乐
- Document doc = Jsoup.connect(".html").get();
- Element singerListDiv = doc.getElementsByAttributeValue("class", "content").first();
- Elements links = singerListDiv.getElementsByTag("a");
- for (Element link: links) {
- String linkHref = link.attr("href");
- String linkText = link.text().trim();
- System.out.println(linkHref);
- }
- //eg2:解析万年历
- Document doc = Jsoup.connect(".asp?dt=2012-03-03").get();
- Element infoTable = doc.getElementsByAttributeValue("class", "table002").first();
- Elements tableLineInfos = infoTable.select("tr");
- for (Element lineInfo : tableLineInfos) {
- String lineInfoContent = lineInfo.select("td").last().text().trim();
- System.out.println("jsoup is :" + lineInfoContent);
- }
- //eg5:查找html元素
- File input = new File("/tmp/input.html");
- Document doc = Jsoup.parse(input, "UTF-8", "/");
- Elements links = doc.select("a[href]"); // 链接
- Elements pngs = doc.select("img[src$=.png]"); // 所有 png 的图片
- Element masthead = doc.select("div.masthead").first();// div with class=masthead
- Elements resultLinks = doc.select("h3.r > a"); // direct a after h3
- package com.mike.activity;
- import java.io.File;
- import java.io.IOException;
- import org.jsoup.Connection;
- import org.jsoup.Jsoup;
- import org.jsoup.nodes.Document;
- import org.jsoup.nodes.Element;
- import org.jsoup.select.Elements;
- import android.app.Activity;
- import android.os.Bundle;
- import android.util.Log;
- import android.view.View;
- import android.widget.TextView;
- public class JsoupDemoActivity extends Activity {
- /** Called when the activity is first created. */
- private StringBuilder sb ;
- @Override
- public void onCreate(Bundle savedInstanceState) {
- super.onCreate(savedInstanceState);
- setContentView(R.layout.main);
- sb = new StringBuilder();
- TextView textView = (TextView) findViewById(R.id.textView1);
- try {
- // //eg1:解析百度音乐
- // Document doc = Jsoup.connect(".html").get();
- // Element singerListDiv = doc.getElementsByAttributeValue("class", "content").first();
- // Elements links = singerListDiv.getElementsByTag("a");
- //
- // for (Element link: links) {
- // String linkHref = link.attr("href");
- // String linkText = link.text().trim();
- // System.out.println(linkHref);
- // }
- //
- //
- // //eg2:解析万年历
- // Document doc = Jsoup.connect(".asp?dt=2012-03-03").get();
- // Element infoTable = doc.getElementsByAttributeValue("class", "table002").first();
- // Elements tableLineInfos = infoTable.select("tr");
- // for (Element lineInfo : tableLineInfos) {
- // String lineInfoContent = lineInfo.select("td").last().text().trim();
- // System.out.println("jsoup is :" + lineInfoContent);
- // }
- //eg3:解析指定段落的内容----注意此代码中的语法:<div class="artHead">
- // Document doc = Jsoup.connect("").get();
- // Elements divs = doc.select("div.artHead");
- // for (Element div: divs) {
- // System.out.println(div.select("h3[class=artTitle]"));
- // }
- //eg4:
- // Document doc = Jsoup.connect("").get();
- // //eg5:查找html元素
- File input = new File("/tmp/input.html");
- Document doc = Jsoup.parse(input, "UTF-8", "/");
- Elements links = doc.select("a[href]"); // 链接
- Elements pngs = doc.select("img[src$=.png]"); // 所有 png 的图片
- Element masthead = doc.select("div.masthead").first();// div with class=masthead
- Elements resultLinks = doc.select("h3.r > a"); // direct a after h3
- // //test
- // File input = new File("D:/test.html");
- // Document doc = Jsoup.parse(input,"UTF-8","/");
- /*
- * 项目数据准备
- */
- //eg5:万年历:.htm
- //data1:dayFav(宜)
- // Document doc = Jsoup.connect(".htm").get();
- // Element dayFav = doc.getElementsByAttributeValue("class", "ly2").first();
- // Elements dayFavItems = dayFav.getElementsByTag("a");
- // for (Element dayFavItem:dayFavItems) {
- // System.out.println(dayFavItem.text());
- // }
- //data2:dayUnfav(忌)
- // Element dayUnfav = doc.getElementsByAttributeValue("class", "lj2").first();
- // Elements dayUnfavItems = dayUnfav.getElementsByTag("a");
- // for (Element dayUnfavItem:dayUnfavItems) {
- // System.out.println(dayUnfavItem.text());
- // }
- //data3:taiSheng(胎神)
- // Element taiSheng = doc.getElementsByAttributeValue("class", "lts2").first();
- // System.out.println(taiSheng.text());
- //data4:chong sha(冲煞信息)
- // Element chong = doc.getElementsByAttributeValue("class", "lcs").first();
- // Element sha = doc.getElementsByAttributeValue("class", "lcs").get(1);
- // System.out.println(chong.text());
- // System.out.println(sha.text());
- //data5:zhengChong(正冲)和zhiXing(值星)
- // Element zhengChong = doc.getElementsByAttributeValue("class", "lzc2").first();
- // Element zhiXing = doc.getElementsByAttributeValue("class", "lzx2").first();
- // System.out.println(zhengChong.text());
- // System.out.println(zhiXing.text());
- //data6:godFav(吉神宜趋)
- // Element godUnfav = doc.getElementsByAttributeValue("class", "js2").first();
- // System.out.println(godUnfav.text());
- //data7:godUnfav(凶神宜忌)
- // Element godFav = doc.getElementsByAttributeValue("class", "xs2").first();
- // System.out.println(godFav.text());
- //data8:pengZuBaiJi(彭祖百忌)
- // Element pengZuBaiJi = doc.getElementsByAttributeValue("class", "pz2").first();
- // System.out.println(pengZuBaiJi.text());
- //data9:wuXing(五行)
- // Element wuXing = doc.getElementsByAttributeValue("class", "wuh2").first();
- // System.out.println(wuXing.text());
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- }
- }
更多推荐
Jsoup解析html中的指定数据
发布评论