获取高校信息

编程入门 行业动态 更新时间:2024-10-22 02:47:19

获取<a href=https://www.elefans.com/category/jswz/34/1765964.html style=高校信息"/>

获取高校信息

说明

开发的系统中,高校信息的作用是个人选的教育经历打上标签,方便筛选优质人才。
传统一些的方式就是985/211,但是现在主流的方式是双一流。尽管如此,用985/211来筛选也存在很大程度上的合理性,所以从多个角度都进行了查询。

数据来源方面,使用了阳关高考网和中国教育在线两个网站。
阳关高考网:.do
中国教育在线:/
查询流程:
1 通过阳关高考网选择民办大学,将所有民办大学筛选出来
2 通过中国教育在线筛选出985和211院校
3 查询阳光高考网数据列表,每个学校都判断是否是民办or985/211,打上标签

技术说明

开发语言选择的是Java,pom.xml中引入了jsoup用来网络请求

		<dependency><groupId>org.jsoup</groupId><artifactId>jsoup</artifactId><version>1.12.1</version></dependency>

高校表结构设计

直接将数据内容保存即可,单一表

import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.GeneratedValue;
import javax.persistence.Id;/*** 高校信息*/
@Entity
public class University {@Id@GeneratedValueprivate Long zid;/** 创建时间 **/@Column(length=19, nullable=false)private String createTime;/** 更新时间 **/@Column(length=19)private String updateTime;/** 是否删除 1是 0否 **/@Column(nullable=false)private Integer isDelete;/** 院校名称 **/@Column(length=300, nullable=false)private String name;/** 院校所在地 **/@Column(length=300, nullable=false)private String city;/** 院校隶属 **/@Column(length=300, nullable=false)private String owner;/** 学历层次 **/@Column(length=100, nullable=false)private String level;/** 院校特性 985|211、211、民办大学 **/@Column(length=100)private String feature;/** 一流大学 1是 0否 **/@Column(nullable=false)private Integer leadingUniversity;/** 一流学科 1是 0否 **/@Column(nullable=false)private Integer leadingDisciplines;/** 是否有研究生院 1有 0无 **/@Column(nullable=false)private Integer institute;public Integer getLeadingUniversity() {return leadingUniversity;}public void setLeadingUniversity(Integer leadingUniversity) {this.leadingUniversity = leadingUniversity;}public Integer getLeadingDisciplines() {return leadingDisciplines;}public void setLeadingDisciplines(Integer leadingDisciplines) {this.leadingDisciplines = leadingDisciplines;}public Long getZid() {return zid;}public void setZid(Long zid) {this.zid = zid;}public String getCreateTime() {return createTime;}public void setCreateTime(String createTime) {this.createTime = createTime;}public String getUpdateTime() {return updateTime;}public void setUpdateTime(String updateTime) {this.updateTime = updateTime;}public Integer getIsDelete() {return isDelete;}public void setIsDelete(Integer isDelete) {this.isDelete = isDelete;}public String getName() {return name;}public void setName(String name) {this.name = name;}public String getCity() {return city;}public void setCity(String city) {this.city = city;}public String getOwner() {return owner;}public void setOwner(String owner) {this.owner = owner;}public String getLevel() {return level;}public void setLevel(String level) {this.level = level;}public String getFeature() {return feature;}public void setFeature(String feature) {this.feature = feature;}public Integer getInstitute() {return institute;}public void setInstitute(Integer institute) {this.institute = institute;}}

辅助方法

用来判断某个学校是否是985/211,之所以没直接对比学校名称,是考虑到如果某学校是985/211,那么分校也理应是985/211。

	private boolean schoolContain(String name, List<String> school) {boolean contain =false;for(String item : school) {if(name.contains(item)) {contain = true;break;}}return contain;}

代码流程

获取985院校列表

		//985院校String url = ".shtml";Document doc = Jsoup.connect(url).get();List<String> school985 = new ArrayList<>();Elements eles = doc.getElementsByTag("tbody").first().getElementsByTag("tr");for(Element ele : eles) {Elements items = ele.getElementsByTag("td");school985.add(items.get(items.size()-3).text());}

获取211院校列表

		//211院校url = ".shtml";doc = Jsoup.connect(url).get();List<String> school211 = new ArrayList<>();eles = doc.getElementsByTag("tbody").first().getElementsByTag("tr");for(Element ele : eles) {Elements items = ele.getElementsByTag("td");school211.add(items.get(items.size()-3).text());}

获取民办大学列表

		// 民办大学url = ".do?searchType=1&yxmc=&ssdm=&yxls=&xlcc=&yxjbz=2";doc = Jsoup.connect(url).get();eles = doc.getElementById("PageForm").getElementsByTag("li");Element ele = eles.get(eles.size() - 3);int count = Integer.parseInt(ele.text());List<String> minbanSchoolName = new ArrayList<>();for (int i = 0; i < count; i++) {url = ",yxjbz-2,start-" + (i * 20) + ".dhtml";doc = Jsoup.connect(url).get();eles = doc.getElementsByClass("ch-table").first().getElementsByTag("tr");for (int j = 1, num = eles.size(); j < num; j++) {Elements items = eles.get(j).getElementsByTag("td");minbanSchoolName.add(items.get(0).text());}Thread.sleep(2000);}

查询高校列表并保存到数据库

		// 查询列表数据url = ".do";doc = Jsoup.connect(url).get();eles = doc.getElementById("PageForm").getElementsByTag("li");ele = eles.get(eles.size() - 3);count = Integer.parseInt(ele.text());for (int i = 0; i < count; i++) {url = "" + (i * 20) + ".dhtml";doc = Jsoup.connect(url).get();eles = doc.getElementsByClass("ch-table").first().getElementsByTag("tr");for (int j = 1, num = eles.size(); j < num; j++) {Elements items = eles.get(j).getElementsByTag("td");University school = new University();school.setCity(items.get(1).text());school.setCreateTime(Dates.now());school.setLeadingUniversity(StringUtil.isBlank(items.get(4).text())?0:1);school.setLeadingDisciplines(StringUtil.isBlank(items.get(5).text())?0:1);if(minbanSchoolName.contains(items.get(0).text())) {school.setFeature("民办");} else {if(schoolContain(items.get(0).text(), school985)) {school.setFeature("985|211");} else if(schoolContain(items.get(0).text(), school211)) {school.setFeature("211");}}school.setInstitute(StringUtil.isBlank(items.get(6).text())?0:1);school.setIsDelete(0);school.setLevel(items.get(3).text());school.setName(items.get(0).text());school.setOwner(items.get(2).text());schoolRepository.save(school);}Thread.sleep(2000);}

完整测试类

import java.util.ArrayList;
import java.util.List;import org.jsoup.Jsoup;
import org.jsoup.internal.StringUtil;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;import com.lootaa.xcl.basedata.dao.UniversityRepository;
import com.lootaa.xcl.basedata.db.University;
import com.lootaa.xcl.basedata.util.Dates;@SpringBootTest
class XclBasedataApplicationSchoolTests {@Autowired UniversityRepository schoolRepository;@Testvoid loadSchool() throws Exception {schoolRepository.deleteAll();//985院校String url = ".shtml";Document doc = Jsoup.connect(url).get();List<String> school985 = new ArrayList<>();Elements eles = doc.getElementsByTag("tbody").first().getElementsByTag("tr");for(Element ele : eles) {Elements items = ele.getElementsByTag("td");school985.add(items.get(items.size()-3).text());}//211院校url = ".shtml";doc = Jsoup.connect(url).get();List<String> school211 = new ArrayList<>();eles = doc.getElementsByTag("tbody").first().getElementsByTag("tr");for(Element ele : eles) {Elements items = ele.getElementsByTag("td");school211.add(items.get(items.size()-3).text());}// 民办大学url = ".do?searchType=1&yxmc=&ssdm=&yxls=&xlcc=&yxjbz=2";doc = Jsoup.connect(url).get();eles = doc.getElementById("PageForm").getElementsByTag("li");Element ele = eles.get(eles.size() - 3);int count = Integer.parseInt(ele.text());List<String> minbanSchoolName = new ArrayList<>();for (int i = 0; i < count; i++) {url = ",yxjbz-2,start-" + (i * 20) + ".dhtml";doc = Jsoup.connect(url).get();eles = doc.getElementsByClass("ch-table").first().getElementsByTag("tr");for (int j = 1, num = eles.size(); j < num; j++) {Elements items = eles.get(j).getElementsByTag("td");minbanSchoolName.add(items.get(0).text());}Thread.sleep(2000);}// 查询列表数据url = ".do";doc = Jsoup.connect(url).get();eles = doc.getElementById("PageForm").getElementsByTag("li");ele = eles.get(eles.size() - 3);count = Integer.parseInt(ele.text());for (int i = 0; i < count; i++) {url = "" + (i * 20) + ".dhtml";doc = Jsoup.connect(url).get();eles = doc.getElementsByClass("ch-table").first().getElementsByTag("tr");for (int j = 1, num = eles.size(); j < num; j++) {Elements items = eles.get(j).getElementsByTag("td");University school = new University();school.setCity(items.get(1).text());school.setCreateTime(Dates.now());school.setLeadingUniversity(StringUtil.isBlank(items.get(4).text())?0:1);school.setLeadingDisciplines(StringUtil.isBlank(items.get(5).text())?0:1);if(minbanSchoolName.contains(items.get(0).text())) {school.setFeature("民办");} else {if(schoolContain(items.get(0).text(), school985)) {school.setFeature("985|211");} else if(schoolContain(items.get(0).text(), school211)) {school.setFeature("211");}}school.setInstitute(StringUtil.isBlank(items.get(6).text())?0:1);school.setIsDelete(0);school.setLevel(items.get(3).text());school.setName(items.get(0).text());school.setOwner(items.get(2).text());schoolRepository.save(school);}Thread.sleep(2000);}}private boolean schoolContain(String name, List<String> school) {boolean contain =false;for(String item : school) {if(name.contains(item)) {contain = true;break;}}return contain;}}

保存的数据

更多推荐

获取高校信息

本文发布于:2024-03-10 05:34:23,感谢您对本站的认可!
本文链接:https://www.elefans.com/category/jswz/34/1727120.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
本文标签:高校   信息

发布评论

评论列表 (有 0 条评论)
草根站长

>www.elefans.com

编程频道|电子爱好者 - 技术资讯及电子产品介绍!