package com.zzger.model;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import com.zzger.module.queue.UrlQueue;
import com.zzger.util.HttpUtils;
import com.zzger.util.RegexUtils;
public class WebSite {
/**
* 站点url
*/
private String url;
/**
* 需要爬行的url队列
*/
private UrlQueue urls = new UrlQueue<>();
/**
* 已爬行过的页面url
*/
private List exitUrls = Collections.synchronizedList(new ArrayList<>());
private static final int TOTAL_THREADS = 12;
private final CountDownLatch mStartSignal = new CountDownLatch(1);
private final CountDownLatch mDoneSignal = new CountDownLatch(TOTAL_THREADS);
public WebSite(String url){
this.url = url;
urls.offer(url);//把网站首页加入需要爬行的队列中
}
public void guangDu(){
new Thread(new Runnable() {
@Override
public void run() {
paxing(HttpUtils.httpGet(url));
}
}).start();
}
public void paxing(String html){
if(html.lastIndexOf("下一页
")<0) return ;更多推荐
java爬网站实时数据_java爬取网站数据
发布评论