Skip to content

Commit ee464df

Browse files
authored
Support generating custom region.dat file from china_area_mysql (#160)
PR:#160
1 parent 20cea0e commit ee464df

File tree

11 files changed

+451
-1
lines changed

11 files changed

+451
-1
lines changed

README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,15 @@ Address(
138138

139139
也可以采用[国家的标准地址库][2] (对应的github库, [中国5级行政区域mysql库][3])
140140

141+
### 2.1.1 导入中国5级行政区域mysql库注意事项
142+
0. 本测试配置基于Server version: 8.0.21 MySQL Community Server - GPL环境,其它可能略有差异,可通过下面两个SQL确认配置是否OK
143+
```
144+
show variables like '%CHARACTER%';
145+
show variables like '%max_allowed_packet%';
146+
```
147+
1. 设置max_allowed_packet,[mysqld]下max_allowed_packet = 2000M,[mysqldump]下max_allowed_packet = 2000M
148+
2. 设置字符集,[client]下default-character-set=utf8mb4,[mysqld]下character-set-server=utf8mb4和init_connect='SET NAMES utf8mb4',[mysql]下default-character-set=utf8mb4
149+
141150

142151
## 2.2 标准化
143152
1. 首先基于正则提取出道路、建筑物号等信息

pom.xml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,19 @@
3131
<artifactId>gson</artifactId>
3232
<version>${gson.version}</version>
3333
</dependency>
34+
35+
<dependency>
36+
<groupId>commons-io</groupId>
37+
<artifactId>commons-io</artifactId>
38+
<version>2.10.0</version>
39+
<scope>test</scope>
40+
</dependency>
41+
<dependency>
42+
<groupId>com.google.guava</groupId>
43+
<artifactId>guava</artifactId>
44+
<version>30.1.1-jre</version>
45+
<scope>test</scope>
46+
</dependency>
3447

3548
<dependency>
3649
<groupId>org.jetbrains.kotlin</groupId>

src/test/java/io/patamon/geocoding/TestNormalizing.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,7 @@ class TestNormalizing {
464464
360721000000, "赣县区",
465465
360721101000, "王母渡镇",
466466
360721101000, "王母渡镇",
467-
null, village=null,
467+
null, null,
468468
null,
469469
null,
470470
null,
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
package io.patamon.geocoding.region;
2+
3+
import java.io.IOException;
4+
5+
import io.patamon.geocoding.region.model.RegionEntity;
6+
import io.patamon.geocoding.region.util.OutUtil;
7+
8+
public class Main {
9+
10+
// 导入数据库成功后,执行china.sql,插入数据项:【中国】
11+
public static void main(String[] args) throws IOException {
12+
long start = System.currentTimeMillis();
13+
String pathname = "~/Documents/cnarea" + 20210707 + ".dat";
14+
RegionDatFileHelper.writeDatFile(pathname);
15+
long end = System.currentTimeMillis();
16+
OutUtil.info(String.format("cost %s ms", end - start));
17+
RegionEntity regionEntity = RegionDatFileHelper.readDatFile(pathname);
18+
OutUtil.info(regionEntity.toString());
19+
}
20+
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
2+
[![Java 8 CI](https://github.com/IceMimosa/geocoding/actions/workflows/java8.yml/badge.svg)](https://github.com/IceMimosa/geocoding/actions/workflows/java8.yml)
3+
4+
# 介绍
5+
项目目前采用的是 [淘宝物流4级地址](!https://lsp.wuliu.taobao.com/locationservice/addr/output_address_town.do)的标准地址库,即`classpath:src/main/resources/core/region.dat`中的数据,
6+
本package下代码可将 [中国5级行政区域](!https://github.com/kakuilan/china_area_mysql) 处理为兼容geocoding的标准地址库。
7+
8+
### 使用步骤
9+
1、成功导入china_area_mysql到数据库
10+
2、执行本package下sql/china.sql插`中国`数据
11+
3、修改本package下util/JdbcUtil.java中的jdbc相关参数
12+
4、执行本package下Maine类中main方法
13+
5、将生成的dat文件改名为region.dat并放入`classpath:src/main/resources/core/`
14+
15+
### 注意事项
16+
本测试配置基于Server version: 8.0.21 MySQL Community Server - GPL环境,其它可能略有差异,可通过下面两个SQL确认配置是否OK
17+
```
18+
show variables like '%CHARACTER%';
19+
show variables like '%max_allowed_packet%';
20+
```
21+
1. 设置max_allowed_packet,[mysqld]下max_allowed_packet = 2000M,[mysqldump]下max_allowed_packet = 2000M
22+
2. 设置字符集,[client]下default-character-set=utf8mb4,[mysqld]下character-set-server=utf8mb4和init_connect='SET NAMES utf8mb4',[mysql]下default-character-set=utf8mb4
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
package io.patamon.geocoding.region;
2+
3+
import java.io.ByteArrayInputStream;
4+
import java.io.ByteArrayOutputStream;
5+
import java.io.File;
6+
import java.io.IOException;
7+
import java.sql.Connection;
8+
import java.util.Base64;
9+
import java.util.List;
10+
import java.util.zip.GZIPInputStream;
11+
import java.util.zip.GZIPOutputStream;
12+
13+
import org.apache.commons.io.IOUtils;
14+
15+
import com.google.common.collect.Lists;
16+
import com.google.common.io.Files;
17+
import com.google.gson.Gson;
18+
19+
import io.patamon.geocoding.model.RegionType;
20+
import io.patamon.geocoding.region.model.RegionEntity;
21+
import io.patamon.geocoding.region.util.JdbcUtil;
22+
import kotlin.text.Charsets;
23+
24+
public class RegionDatFileHelper {
25+
26+
final static List<String> provinceLevelCity1 = Lists.newArrayList("北京市", "天津市", "上海市", "重庆市");
27+
28+
public static void writeDatFile(String pathname) throws IOException {
29+
Connection conn = JdbcUtil.getConnection();
30+
if (conn == null) return;
31+
List<RegionEntity> china = Lists.newArrayList();
32+
List<RegionEntity> provinces = RegionSqlHelper.findProvinces(conn);
33+
for (int i = 0; i < provinces.size(); i++) {
34+
RegionEntity province = provinces.get(i);
35+
List<RegionEntity> list = RegionSqlHelper.findByProvince(conn, province.getShortName() + "%");
36+
if (i == 0) {
37+
List<RegionEntity> tree = parseProvince(list);
38+
china.add(tree.get(0));
39+
} else {
40+
List<RegionEntity> tree = parseProvince(list);
41+
china.get(0).getChildren().add(tree.get(0));
42+
}
43+
}
44+
JdbcUtil.free(conn);
45+
Gson gson = new Gson();
46+
47+
byte[] context = encode(gson.toJson(china.get(0)));
48+
write(pathname, new String(context, Charsets.UTF_8));
49+
}
50+
51+
private static List<RegionEntity> parseProvince(List<RegionEntity> list) {
52+
List<RegionEntity> province = Lists.newArrayList();
53+
54+
for (RegionEntity entity : list) {
55+
if (entity.getParentId().equals(0L)) {
56+
if (entity.getChildren() == null) entity.setChildren(Lists.newArrayList());
57+
entity.setType(of(entity.getId(), entity.getLevel(), entity.getName()));
58+
province.add(entity);
59+
}
60+
}
61+
62+
for (RegionEntity item : province) {
63+
item = recursive(item, list, province.size());
64+
}
65+
66+
return province;
67+
}
68+
69+
private static RegionEntity recursive(RegionEntity parent, List<RegionEntity> list, int j) {
70+
for (int i = j; i < list.size(); i++) {
71+
RegionEntity entity = list.get(i);
72+
if (parent.getId().equals(entity.getParentId())) {
73+
entity = recursive(entity, list, i + 1);
74+
entity.setType(of(entity.getId(), entity.getLevel(), entity.getName()));
75+
if (parent.getChildren() == null) parent.setChildren(Lists.newArrayList());
76+
parent.getChildren().add(entity);
77+
}
78+
}
79+
return parent;
80+
}
81+
82+
private static void write(final String fileName, final String contents) throws IOException {
83+
File file = new File(fileName);
84+
file.deleteOnExit();
85+
file.createNewFile();
86+
Files.write(contents.getBytes(), file);
87+
}
88+
89+
private static RegionType of(Long id, int level, String name) {
90+
if (id.equals(100000000000L)) return RegionType.Country;
91+
if (level == 0) {
92+
if (provinceLevelCity1.contains(name)) return RegionType.ProvinceLevelCity1;
93+
return RegionType.Province;
94+
}
95+
if (level == 1) {
96+
if ("直辖区".equalsIgnoreCase(name)) return RegionType.ProvinceLevelCity2;
97+
if ("直辖县".equalsIgnoreCase(name)) return RegionType.CityLevelDistrict;
98+
return RegionType.City;
99+
}
100+
if (level == 2) return RegionType.District;
101+
if (level == 3) {
102+
if (name.matches("乡$")) return RegionType.Town;
103+
if (name.matches("镇$")) return RegionType.Town;
104+
return RegionType.PlatformL4;
105+
}
106+
if (level == 4) return RegionType.Village;
107+
return RegionType.Undefined;
108+
}
109+
110+
public static RegionEntity readDatFile(String file) throws IOException {
111+
byte[] byteArray = Files.toByteArray(new File(file));
112+
String json = new String(byteArray);
113+
return new Gson().fromJson(decode(json), RegionEntity.class);
114+
}
115+
116+
private static String decode(String str) throws IOException {
117+
byte decodedByteArray[] = Base64.getMimeDecoder().decode(str);
118+
GZIPInputStream gzipis = new GZIPInputStream(new ByteArrayInputStream(decodedByteArray));
119+
return new String(IOUtils.toByteArray(gzipis), Charsets.UTF_8);
120+
}
121+
122+
private static byte[] encode(String str) throws IOException {
123+
ByteArrayOutputStream out = new ByteArrayOutputStream();
124+
GZIPOutputStream gzipos = new GZIPOutputStream(out);
125+
gzipos.write(str.getBytes(Charsets.UTF_8));
126+
gzipos.close();
127+
return Base64.getMimeEncoder().encode(out.toByteArray());
128+
}
129+
}
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
package io.patamon.geocoding.region;
2+
3+
import java.sql.Connection;
4+
import java.sql.PreparedStatement;
5+
import java.sql.ResultSet;
6+
import java.sql.SQLException;
7+
import java.util.List;
8+
9+
import com.google.common.collect.Lists;
10+
11+
import io.patamon.geocoding.region.model.RegionEntity;
12+
import io.patamon.geocoding.region.util.JdbcUtil;
13+
import io.patamon.geocoding.region.util.OutUtil;
14+
15+
public class RegionSqlHelper {
16+
17+
private static final String sqlFindAllProvinces = "select `level`, area_code as id, parent_code as parentId, "
18+
+ "`name` as `name`, short_name as shortName, merger_name as `alias`, zip_code as zip "
19+
+ "from cnarea_2020 where parent_code = 0 order by area_code";
20+
21+
private static final String sqlFindByProvince = "select `level`, area_code as id, parent_code as parentId, "
22+
+ "`name` as `name`, short_name as shortName, merger_name as `alias`, zip_code as zip "
23+
+ "from cnarea_2020 where merger_name like ? order by `level`, parent_code, area_code";
24+
25+
public static List<RegionEntity> findProvinces(Connection conn) {
26+
PreparedStatement pstmt = null;
27+
ResultSet rs = null;
28+
try {
29+
pstmt = conn.prepareStatement(sqlFindAllProvinces);
30+
rs = pstmt.executeQuery();
31+
OutUtil.info(sqlFindAllProvinces);
32+
return convert(rs);
33+
} catch (SQLException sqle) {
34+
OutUtil.err("Exception: RegionEntityHelper.findProvinces " + sqle.getMessage());
35+
}finally {
36+
JdbcUtil.free(rs, pstmt);
37+
}
38+
return Lists.newArrayList();
39+
}
40+
41+
42+
public static List<RegionEntity> findByProvince(Connection conn, String name) {
43+
PreparedStatement pstmt = null;
44+
ResultSet rs = null;
45+
try {
46+
pstmt = conn.prepareStatement(sqlFindByProvince);
47+
pstmt.setString(1, name);
48+
rs = pstmt.executeQuery();
49+
OutUtil.info(sqlFindByProvince.replace("?", "'" + name + "'"));
50+
return convert(rs);
51+
} catch (SQLException sqle) {
52+
OutUtil.err("Exception: RegionEntityHelper.findByProvince " + sqle.getMessage());
53+
} finally {
54+
JdbcUtil.free(rs, pstmt);
55+
}
56+
return Lists.newArrayList();
57+
}
58+
59+
private static List<RegionEntity> convert(ResultSet rs) throws SQLException {
60+
List<RegionEntity> list = Lists.newArrayList();
61+
while (rs != null && rs.next()) {
62+
RegionEntity regionEntity = new RegionEntity();
63+
regionEntity.setAlias(rs.getString("alias"));
64+
regionEntity.setId(rs.getLong("id"));
65+
regionEntity.setLevel(rs.getInt("level"));
66+
regionEntity.setName(rs.getString("name"));
67+
regionEntity.setParentId(rs.getLong("parentId"));
68+
regionEntity.setShortName(rs.getString("shortName"));
69+
regionEntity.setZip(rs.getString("zip"));
70+
list.add(regionEntity);
71+
}
72+
return list;
73+
}
74+
}

0 commit comments

Comments
 (0)