package org.sxkj.system.util;
|
|
import cn.hutool.core.util.ObjectUtil;
|
import lombok.extern.slf4j.Slf4j;
|
import org.jsoup.Connection;
|
import org.jsoup.Jsoup;
|
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Element;
|
import org.jsoup.select.Elements;
|
import org.sxkj.system.entity.Region;
|
import org.sxkj.system.ssl.SSL;
|
import javax.net.ssl.HttpsURLConnection;
|
import java.text.SimpleDateFormat;
|
import java.util.ArrayList;
|
import java.util.Date;
|
import java.util.List;
|
|
import static com.baomidou.mybatisplus.extension.toolkit.Db.saveBatch;
|
|
/**
|
* 区域信息拉取 util
|
*/
|
@Slf4j
|
public class RegionUtil {
|
|
private final static String URL = "https://www.stats.gov.cn/sj/tjbz/tjyqhdmhcxhfdm/2023/index.html";
|
|
|
/**
|
* 拉取统计局数据入库
|
*/
|
public void getRegionInfoSave() {
|
Boolean provinces = getProvinces(0, URL);
|
}
|
|
/**
|
* 建立连接
|
*/
|
private static Document connect(String url) {
|
if (url == null || url.isEmpty()) {
|
throw new IllegalArgumentException("无效的url");
|
}
|
try {
|
String path = url+"?timestamp="+ System.currentTimeMillis();
|
SSL.trustAllHttpsCertificates();
|
HttpsURLConnection.setDefaultHostnameVerifier(SSL.hv);
|
Connection connect = Jsoup.connect(path);
|
connect.response().cookies();
|
connect.cookies(connect.response().cookies());
|
connect.header("User-Agent", "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:49.0) Gecko/20100101 Firefox/49.0")
|
//如果是这种方式,这里务必带上
|
.header("Connection", "close")
|
.timeout(10000);
|
return connect.get();
|
} catch (Exception e) {
|
System.out.println(url+"请求异常");
|
log.error("请求异常:" + e);
|
try {
|
Thread.sleep(30000);
|
} catch (InterruptedException ex) {
|
|
}
|
return connect(url);
|
}
|
}
|
|
/**
|
* version
|
*/
|
private static String version() {
|
SimpleDateFormat format = new SimpleDateFormat("yyyyMMdd");
|
String version = format.format(new Date());
|
return version;
|
}
|
|
/**
|
* 获取所有的省份(省级)
|
* @param url 请求地址:https://www.stats.gov.cn/sj/tjbz/tjyqhdmhcxhfdm/2023/index.html
|
* @return
|
*/
|
private Boolean getProvinces(int count,String url) {
|
List<Region> listAreas = new ArrayList<>();
|
Document connect = connect(url);
|
if(ObjectUtil.isEmpty(connect)||"请开启JavaScript并刷新该页.".equals(connect.select("strong").text())||"Please enable JavaScript and refresh the page.".equals(connect.select("strong").text())){
|
try {
|
Thread.sleep(60000);
|
} catch (InterruptedException e) {
|
|
}
|
if(ObjectUtil.isNotNull(connect)){
|
System.err.println(connect.toString());
|
}
|
System.err.println(url+"重试第"+(++count)+"次");
|
getProvinces(count,url);
|
}
|
Elements rowProvince = connect.select("tr.provincetr");
|
int i = 1;
|
for (Element provinceElement : rowProvince) {
|
Elements select = provinceElement.select("a");
|
for (Element province : select) {
|
String code = province.select("a").attr("href");
|
String name = province.text();
|
//if("江苏省".equals(name)){
|
//if(!"北京市天津市河北省山西省内蒙古自治区辽宁省吉林省黑龙江省上海市江苏省浙江省安徽省福建省江西省山东省河南省".contains(name)){
|
Region region = new Region();
|
region.setCode(code.replace(".html",""));
|
region.setName(name);
|
region.setParentCode("00");
|
region.setAncestors("00");
|
region.setProvinceCode(code.replace(".html",""));
|
region.setProvinceName(name);
|
// region.setLevel(1);
|
// region.setVersion(version());
|
region.setSort(i++);
|
listAreas.add(region);
|
|
String provinceUrl = url.replace("index.html",code);
|
// System.err.println("++++++++++++++++++++++++++开始获取"+ name +"下属市区行政区划信息++++++++++++++++++++++++");
|
List<Region> cityAreaCodeList = getCityAreaCode(0,provinceUrl,region.getAncestors()+ "," +region.getCode(),region.getProvinceCode(),region.getProvinceName());
|
listAreas.addAll(cityAreaCodeList);
|
|
// 省级循环完毕后,插入数据库,并清空listAreas
|
// logger.info(JSON.toJSONString(listAreas));
|
System.err.println("*********************************开始插入【"+ name +"】行政区划*********************************");
|
saveBatch(listAreas);
|
listAreas = new ArrayList<>();
|
System.err.println("*********************************结束插入【"+ name +"】行政区划*********************************");
|
//}
|
}
|
}
|
return true;
|
}
|
|
/**
|
* 获取市一级行政区划代码(市级)
|
* @param provinceUrl
|
* @param provinceCode
|
* @param provinceName
|
* @param ancestors
|
* @return
|
*/
|
private List<Region> getCityAreaCode(int count,String provinceUrl, String ancestors,
|
String provinceCode, String provinceName) {
|
List<Region> listAreas = new ArrayList<>();
|
Document connect = connect(provinceUrl);
|
|
if(ObjectUtil.isEmpty(connect)||"请开启JavaScript并刷新该页.".equals(connect.select("strong").text())||"Please enable JavaScript and refresh the page.".equals(connect.select("strong").text())){
|
try {
|
Thread.sleep(30000);
|
} catch (InterruptedException e) {
|
|
}
|
if(ObjectUtil.isNotNull(connect)){
|
System.err.println(connect.toString());
|
}
|
System.err.println(provinceUrl+"重试第"+(++count)+"次");
|
getCityAreaCode(count,provinceUrl,ancestors,provinceCode,provinceName);
|
}
|
|
Elements rowCity = connect.select("tr.citytr");
|
int i = 1;
|
for (Element cityElement : rowCity) {
|
String name = cityElement.select("td").text();
|
String[] split = name.split(" ");
|
Region region = new Region();
|
region.setCode(split[0].substring(0,4));
|
region.setName(split[1]);
|
region.setParentCode(provinceCode);
|
region.setAncestors(ancestors);
|
region.setProvinceCode(provinceCode);
|
region.setProvinceName(provinceName);
|
region.setCityCode(split[0].substring(0,4));
|
region.setCityName(split[1]);
|
// region.setLevel(2);
|
// region.setVersion(version());
|
region.setSort(i++);
|
|
listAreas.add(region);
|
String cityUrl = provinceUrl.replace(".html","/"+split[0].substring(0, 4)+".html");
|
System.err.println("-------------------开始获取"+split[1]+"下属区县行政区划信息-----------------------");
|
// System.out.println("市级url:"+cityUrl);
|
List<Region> cityAreaCodeList = getCountyAreaCode(0,cityUrl,
|
region.getAncestors()+ "," +region.getCode(),
|
region.getProvinceCode(),
|
region.getProvinceName(),
|
region.getCityCode(),
|
region.getCityName());
|
listAreas.addAll(cityAreaCodeList);
|
}
|
return listAreas;
|
}
|
|
/**
|
* 获取区县一级行政区划代码(区县级)
|
* @param cityUrl
|
* @param ancestors
|
* @param provinceCode
|
* @param provinceName
|
* @param cityCode
|
* @param cityName
|
* @return
|
*/
|
private List<Region> getCountyAreaCode(int count,String cityUrl, String ancestors,
|
String provinceCode, String provinceName,
|
String cityCode, String cityName){
|
List<Region> listAreas = new ArrayList<>();
|
Document connect = connect(cityUrl);
|
if(ObjectUtil.isEmpty(connect)||"请开启JavaScript并刷新该页.".equals(connect.select("strong").text())||"Please enable JavaScript and refresh the page.".equals(connect.select("strong").text())){
|
try {
|
Thread.sleep(30000);
|
} catch (InterruptedException e) {
|
|
}
|
if(ObjectUtil.isNotNull(connect)){
|
System.err.println(connect.toString());
|
}
|
System.err.println(cityUrl+"重试第"+(++count)+"次");
|
getCountyAreaCode(count,cityUrl,ancestors,provinceCode,provinceName,cityCode,cityName);
|
}
|
Elements rowCounty = connect.select("tr.countytr");
|
int i = 1;
|
for (Element countyElement : rowCounty) {
|
String codeHtml = countyElement.select("a").attr("href");
|
String name = countyElement.select("td").text();
|
String[] split = name.split(" ");
|
if(!"市辖区".equals(split[1])){
|
Region region = new Region();
|
region.setCode(split[0].substring(0,6));
|
region.setName(split[1]);
|
region.setParentCode(cityCode);
|
region.setAncestors(ancestors);
|
region.setProvinceCode(provinceCode);
|
region.setProvinceName(provinceName);
|
region.setCityCode(cityCode);
|
region.setCityName(cityName);
|
region.setDistrictCode(split[0].substring(0,6));
|
region.setDistrictName(split[1]);
|
// region.setLevel(3);
|
// region.setVersion(version());
|
region.setSort(i++);
|
|
listAreas.add(region);
|
String countyUrl = cityUrl.replace(region.getCityCode()+".html",codeHtml);
|
if(!"金门县".equals(region.getDistrictName())){
|
System.err.println("====================开始获取"+split[1]+"下属区划信息");
|
// System.out.println("区县级url:"+cityUrl);
|
List<Region> countryAreaList = getDownAreaCode(0,countyUrl,
|
region.getAncestors()+ "," +region.getCode(),
|
region.getProvinceCode(),
|
region.getProvinceName(),
|
region.getCityCode(),
|
region.getCityName(),
|
region.getDistrictCode(),
|
region.getDistrictName());
|
listAreas.addAll(countryAreaList);
|
}
|
}
|
}
|
return listAreas;
|
}
|
|
/**
|
* 获取乡镇街道数据(乡镇一级)
|
* @param countyUrl
|
* @param ancestors
|
* @param provinceCode
|
* @param provinceName
|
* @param cityCode
|
* @param cityName
|
* @param countryCode
|
* @param countryName
|
* @return
|
*/
|
private List<Region> getDownAreaCode(int count,String countyUrl, String ancestors,
|
String provinceCode, String provinceName,
|
String cityCode, String cityName,
|
String countryCode, String countryName){
|
List<Region> listAreas = new ArrayList<>();
|
Document connect = connect(countyUrl);
|
if(ObjectUtil.isEmpty(connect)||"请开启JavaScript并刷新该页.".equals(connect.select("strong").text())||"Please enable JavaScript and refresh the page.".equals(connect.select("strong").text())){
|
try {
|
Thread.sleep(30000);
|
} catch (InterruptedException e) {
|
|
}
|
if(ObjectUtil.isNotNull(connect)){
|
System.err.println(connect.toString());
|
}
|
System.err.println(countyUrl+"重试第"+(++count)+"次");
|
getDownAreaCode(count,countyUrl,ancestors,provinceCode,provinceName,cityCode,cityName,countryCode,countryName);
|
}
|
Elements rowDown = connect.select("tr.towntr");
|
int i = 1;
|
for (Element downElement : rowDown) {
|
String codeHtml = downElement.select("a").attr("href");
|
String name = downElement.select("td").text();
|
String[] split = name.split(" ");
|
Region region = new Region();
|
region.setCode(split[0].substring(0,9));
|
region.setName(split[1]);
|
region.setParentCode(countryCode);
|
region.setAncestors(ancestors);
|
region.setProvinceCode(provinceCode);
|
region.setProvinceName(provinceName);
|
region.setCityCode(cityCode);
|
region.setCityName(cityName);
|
region.setDistrictCode(countryCode);
|
region.setDistrictName(countryName);
|
region.setTownCode(split[0].substring(0,9));
|
region.setTownName(split[1]);
|
// region.setLevel(4);
|
// region.setVersion(version());
|
region.setSort(i++);
|
|
listAreas.add(region);
|
String downyUrl = countyUrl.replace(region.getDistrictCode()+".html",codeHtml);
|
System.err.println("====================开始获取"+split[1]+"下属区划信息");
|
//System.out.println("乡镇街道级URL:"+downyUrl);
|
List<Region> villageAreaCodeList = getVillageAreaCode(0,downyUrl,
|
region.getAncestors()+ "," +region.getCode(),
|
region.getProvinceCode(),
|
region.getProvinceName(),
|
region.getCityCode(),
|
region.getCityName(),
|
region.getDistrictCode(),
|
region.getDistrictName(),
|
region.getTownCode(),
|
region.getTownName());
|
listAreas.addAll(villageAreaCodeList);
|
}
|
return listAreas;
|
}
|
|
private List<Region> getVillageAreaCode(int count,String downyUrl, String ancestors,
|
String provinceCode, String provinceName,
|
String cityCode, String cityName,
|
String countryCode, String countryName,
|
String townCode,String townName){
|
List<Region> listAreas = new ArrayList<>();
|
Document connect = connect(downyUrl);
|
if(ObjectUtil.isEmpty(connect)||"请开启JavaScript并刷新该页.".equals(connect.select("strong").text())||"Please enable JavaScript and refresh the page.".equals(connect.select("strong").text())){
|
try {
|
Thread.sleep(30000);
|
} catch (InterruptedException e) {
|
|
}
|
if(ObjectUtil.isNotNull(connect)){
|
System.err.println(connect.toString());
|
}
|
System.err.println(downyUrl+"重试第"+(++count)+"次");
|
getVillageAreaCode(count,downyUrl,ancestors,provinceCode,provinceName,cityCode,cityName,countryCode,countryName,townCode,townName);
|
}
|
Elements rowDown = connect.select("tr.villagetr");
|
int i = 1;
|
for (Element downElement : rowDown) {
|
String name = downElement.select("td").text();
|
String[] split = name.split(" ");
|
|
Region region = new Region();
|
region.setCode(split[0]);
|
region.setName(split[2]);
|
region.setParentCode(townCode);
|
region.setAncestors(ancestors);
|
region.setProvinceCode(provinceCode);
|
region.setProvinceName(provinceName);
|
region.setCityCode(cityCode);
|
region.setCityName(cityName);
|
region.setDistrictCode(countryCode);
|
region.setDistrictName(countryName);
|
region.setTownCode(townCode);
|
region.setTownName(townName);
|
region.setVillageCode(split[0]);
|
region.setVillageName(split[2]);
|
// region.setType(split[1]);
|
// region.setLevel(5);
|
// region.setVersion(version());
|
region.setSort(i++);
|
|
listAreas.add(region);
|
}
|
return listAreas;
|
}
|
}
|