admin管理员组文章数量:1636900
使用Java爬虫
Java使用jsoup爬取b站视频,根据模糊搜索,搜到批量视频自动下载。
首先先获取到b站的search链接,https://search.bilibili/all?keyword= keyword一般是elk的搜索引擎索引。
思路:
- 首先是使用jsoup爬取到b站的动态网页下来
-
String str=""; Scanner scanner = new Scanner(System.in); System.out.println("请输入想要搜索的内容"); str=scanner.next(); Document connect = null; try { connect = Jsoup.connect("https://search.bilibili/all?keyword="+str).get(); } catch (IOException e) { throw new RuntimeException(e); } Elements li = connect.getElementsByTag("li"); for (Element element : li) { String url = element.getElementsByTag("a").attr("href"); if (url.contains("www.bilibili/video/BV")){ list.add(url.substring(url.indexOf("BV")+2,url.indexOf("?"))); } }
- 获取到搜索到的视频网页,根据标签来获取他们的链接,获取到链接之后我们拿到链接里面的BV号,然后使用BV号去一个地址拿到这个视频的cid。
-
那到BV号之后我们使用get请求 请求这个"https://api.bilibili/x/web-interface/view?bvid="+bvid;地址就会返回一个json回来,我们获取到这个json里面的cid。
获取到这个cid之后,我们就是可以根据这个cid,去拿到这个视频的真实地址以及视频的真实信息
使用cid和BV号去请求这个地址 https://api.bilibili/x/player/playurl?cid=" + cid + “&fnver=0&qn=” + qn + “&otype=json&bvid=” + bvid + "&fnval=2&player=1 会获取到一个json
这里面的qn参数是对应视频的清晰度的一个参数 //qn : 视频质量 112 -> 高清 1080P+, 80 -> 高清 1080P, 64 -> 高清 720P, 32 -> 清晰 480P, 16 -> 流畅 360P // 最高支持 1080p, 1080P+是不支持的
url就是视频的真是路径,然后使用流下载下来到电脑的固定位置
下面是完整代码
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.*;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.Map;
import java.util.Scanner;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
public class AppUrlMovie {
private static ExecutorService cacheExecutor = Executors.newCachedThreadPool();
private final static long timeMillis = System.currentTimeMillis();
static String FileName="";
public static void main(String[] args) throws MalformedURLException {
int i=0;
ArrayList<String> list = new ArrayList<>();
String str="";
Scanner scanner = new Scanner(System.in);
System.out.println("请输入想要搜索的内容");
str=scanner.next();
Document connect = null;
try {
connect = Jsoup.connect("https://search.bilibili/all?keyword="+str).get();
} catch (IOException e) {
throw new RuntimeException(e);
}
Elements li = connect.getElementsByTag("li");
for (Element element : li) {
String url = element.getElementsByTag("a").attr("href");
if (url.contains("www.bilibili/video/BV")){
list.add(url.substring(url.indexOf("BV")+2,url.indexOf("?")));
}
}
long start = System.currentTimeMillis();
DownloadVivw(list,i);
long end = System.currentTimeMillis();
//System.err.println("一共下载:"+list.size()+"份视频,总共耗时:" + (end - start) / 1000 + "s");
}
public static void DownloadVivw(final ArrayList<String> list, int i){
if (i==list.size()){
return;
}else {
//通过bv号自动下载视频
String bvid = list.get(i);
//建立连接,先获取到 cid
String cidJson = getCid(bvid);
//获取 视频真实路径
String url = getVivw(cidJson,bvid);
System.out.println("开始");
long start = System.currentTimeMillis();
downloadMovie(url,FileName);
long end = System.currentTimeMillis();
System.out.println("以下载"+(i+1)+"份视频");
System.err.println("总共耗时:" + (end - start) / 1000 + "s,线程名"+Thread.currentThread().getName());
i++;
final int finalI = i;
DownloadVivw(list, finalI);
/* cacheExecutor.execute(new Runnable() {
@Override
public void run() {
DownloadVivw(list, finalI);
}
});*/
}
long end = System.currentTimeMillis();
System.err.println("总共耗时:" + (end - timeMillis) / 1000 + "s");
}
private static String getVivw(String cidJson,String bvid) {
//qn : 视频质量 112 -> 高清 1080P+, 80 -> 高清 1080P, 64 -> 高清 720P, 32 -> 清晰 480P, 16 -> 流畅 360P
// 最高支持 1080p, 1080P+是不支持的
Integer qn = 80;
String paraUrl = "https://api.bilibili/x/player/playurl?cid=" + cidJson + "&fnver=0&qn=" + qn + "&otype=json&bvid=" + bvid + "&fnval=2&player=1";
StringBuilder json = new StringBuilder();
try {
URL urlObject = new URL(paraUrl);
URLConnection urlConnection = urlObject.openConnection();
BufferedReader in = new BufferedReader(new InputStreamReader(urlConnection.getInputStream(),"utf-8"));
String inputLine = null;
while ( (inputLine = in.readLine()) != null) {
json.append(inputLine);
}
in.close();
} catch (Exception e) {
e.printStackTrace();
}
json.toString();
Map maps = (Map) JSON.parse(String.valueOf(json));
Object o = ((Map) maps.get("data")).get("durl");
return (String)((JSONObject) ((JSONArray) o).get(0)).get("url");
}
private static String getCid(String avid) {
String CidUrl="https://api.bilibili/x/web-interface/view?bvid="+avid;
StringBuilder json = new StringBuilder();
try {
URL urlObject = new URL(CidUrl);
URLConnection urlConnection = urlObject.openConnection();
BufferedReader in = new BufferedReader(new InputStreamReader(urlConnection.getInputStream(),"utf-8"));
String inputLine = null;
while ( (inputLine = in.readLine()) != null) {
json.append(inputLine);
}
in.close();
} catch (Exception e) {
e.printStackTrace();
}
json.toString();
Map maps = (Map) JSON.parse(String.valueOf(json));
FileName=((Map)maps.get("data")).get("title")+".flv";
return ((Map)maps.get("data")).get("cid")+"";
}
public static void downloadMovie(String BLUrl, String fileName) {
InputStream inputStream = null;
try {
URL url = new URL(BLUrl);
URLConnection urlConnection = url.openConnection();
urlConnection.setRequestProperty("Referer", "https://www.bilibili/video/BV14S4y127Gd"); // 填需要爬取的bv号
urlConnection.setRequestProperty("Sec-Fetch-Mode", "no-cors");
urlConnection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36");
urlConnection.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");
urlConnection.setConnectTimeout(10 * 1000);
inputStream = urlConnection.getInputStream();
} catch (IOException e) {
e.printStackTrace();
}
//定义路径
String path = "C:\\file\\img\\" + fileName;
File file = new File(path);
int i = 1;
try {
BufferedInputStream bis = new BufferedInputStream(inputStream);
BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(file));
byte[] bys = new byte[1024];
int len = 0;
while ((len = bis.read(bys)) != -1) {
bos.write(bys, 0, len);
}
bis.close();
bos.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
版权声明:本文标题:Java爬虫 内容由热心网友自发贡献,该文观点仅代表作者本人, 转载请联系作者并注明出处:https://www.elefans.com/dongtai/1729230519a1191421.html, 本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容,一经查实,本站将立刻删除。
发表评论