石墨文档批量下载链接"/>
石墨文档批量下载链接
使用java实现,这里就纯粹写着玩的,他会把你的普通文档的下载链接导出单文件当中
当然你也可以自己扩展 这里导出的样子就是 就是list套map
至于下载别的方式或者啥的自己研究吧 没空写了
把里面的cookie换成自己的就行,网页f12看一下就行了cookie:后面的全是
public class Information {//文件夹public static final String INDEX_URL = "";//文件public static final String FILES_URL = "";//获取下载链接public static final String GET_URL = "";public static final String PDF = "pdf";public static final String COOKIE = "你的cookie";
}
主方法 注意这里就是测试所以把桌面的集合文件夹只拿出来一个导出
下面的sleep视为的防止频繁访问
package cy.shimo.dl;import com.alibaba.fastjson2.JSON;
import cy.shimo.pojo.GetFile;
import cy.shimo.util.HttpClientUtil;
import cy.shimo.util.Information;import java.io.BufferedWriter;
import java.io.FileWriter;
import java.lang.reflect.Type;
import java.util.*;
import java.util.stream.Collectors;import static cy.shimo.util.Information.PDF;/*** @ fileName:Download* @ description:* @ version:1.0.0*/
public class Download {public static void main(String[] args) throws Exception {//获取桌面列表的urlString url = Information.INDEX_URL;//发送String res = HttpClientUtil.doGet(url, null);//转换ArrayList<GetFile> list = getGetFiles(res);//这里只是测试单个文件夹获取 因为多个运行比较缓慢 占用资源//这里可以手动添加自己任意文件夹的 guidArrayList<GetFile> dlList1 = new ArrayList<>();dlList1.add(list.get(20));//递归获取下载地址 这个集合是下载pdf的链接//有喜欢word的朋友可以自己琢磨下//md应为受文件大小限制就没写 图片不实用ArrayList<Map> dlList = new ArrayList<>();getDlList(dlList1, dlList);//链接转pdf然后输出到指定路径System.out.println(dlList);BufferedWriter writer = new BufferedWriter(new FileWriter("output.txt"));for (Map map : dlList) {String str = map.toString();writer.write(str);writer.newLine();}writer.close();}/*** 转换为对象 抽取有用的** @param res* @return*/private static ArrayList<GetFile> getGetFiles(String res) {List<HashMap> get = JSON.parseArray(res, (Type) HashMap.class);ArrayList<GetFile> list = get.stream().map(hashMap -> new GetFile(hashMap.get("id").toString(), hashMap.get("guid").toString(), hashMap.get("name").toString(), hashMap.get("type").toString())).collect(Collectors.toCollection(ArrayList::new));return list;}/*** 递归获取** @param list* @param strings* @return*/private static void getDlList(ArrayList<GetFile> list, ArrayList<Map> strings) throws Exception {int i = 1;for (GetFile getFile : list) {//避免频繁if (i % 10 == 0) {Thread.sleep(10000);}//如果是文件夹if (getFile.getType().equals("folder")) {String url = Information.INDEX_URL;Map<String, String> map = new HashMap<>();map.put("folder", getFile.getGuid());String res = HttpClientUtil.doGet(url, map);ArrayList<GetFile> files = getGetFiles(res);getDlList(files, strings);} else {//过滤.docx文件,因为程序只能导出普通文档if (getFile.getName().endsWith(".docx")){continue;}//不是文件夹是文件的话就获取下载地址System.out.println("正在操作:" + getFile.getName());//避免操作频繁Thread.sleep(2000);String url = Information.FILES_URL;Map<String, String> map = new HashMap<>();map.put("fileGuid", getFile.getGuid());map.put("type", Information.PDF);String res = HttpClientUtil.doGet(url, map);HashMap parsed = JSON.parseObject(res, HashMap.class);//生成链接extracted(strings, parsed,getFile.getName());}i++;}}/*** 生成链接** @param strings* @param parsed* @throws Exception*/private static void extracted(ArrayList<Map> strings, HashMap parsed,String path) throws Exception {//获取他们生成的下载参数String url = Information.GET_URL;Map<String, String> map = new HashMap<>();//输出看下到底是什么挡着了System.out.println(parsed);map.put("taskId", parsed.get("taskId").toString());String res = HttpClientUtil.doGet(url, map);HashMap hashMap = JSON.parseObject(res, HashMap.class);String data = hashMap.get("data").toString();//最终的data数据HashMap parsedObject = JSON.parseObject(data, HashMap.class);//打印获取请求System.out.println(parsedObject);String dlUrl = parsedObject.get("downloadUrl").toString();Map map1 = new HashMap();map1.put(path,dlUrl);strings.add(map1);}
}
发送请求的
package cy.shimo.util;import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.conn.ConnectTimeoutException;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;import java.util.Map;public class HttpClientUtil {public static String doGet(String url, Map<String, String> param) throws Exception {// 创建Httpclient对象CloseableHttpClient httpclient = HttpClients.createDefault();CloseableHttpResponse response = null;// 创建uriURIBuilder builder = new URIBuilder(url);if (param != null) {for (String key : param.keySet()) {builder.addParameter(key, param.get(key));}}// 创建http GET请求HttpGet httpGet = new HttpGet(builder.build());httpGet.addHeader("X-Requested-With","XMLHttpRequest");httpGet.addHeader("Cookie", Information.COOKIE);// 执行请求response = httpclient.execute(httpGet);String res = EntityUtils.toString(response.getEntity());httpclient.close();response.close();return res;}}
实体类
@Data
@AllArgsConstructor
public class GetFile {private String id;private String guid;private String name;private String type;
}
更多推荐
石墨文档批量下载链接
发布评论