Skip to content

Commit c1fc379

Browse files
committed
Deep deduplication for text CIDR list
1 parent c053afa commit c1fc379

File tree

3 files changed

+69
-3
lines changed

3 files changed

+69
-3
lines changed

.github/workflows/periodical-update.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,11 @@ jobs:
4141
run: |
4242
curl -LR -o dist/ipip_net.txt "https://raw.githubusercontent.com/17mon/china_ip_list/master/china_ip_list.txt"
4343
curl -LR -o dist/chunzhen.txt "https://raw.githubusercontent.com/metowolf/iplist/master/data/country/CN.txt"
44+
echo >> dist/chunzhen.txt # ensure newline at ending
4445
45-
- name: Merge and IP lists and remove duplicates
46+
- name: Merge and IP lists and deep deduplication
4647
run: |
47-
awk 'FNR==1{print ""}{print}' dist/*.txt > dist/merge.txt
48-
awk '!seen[$0]++' dist/merge.txt > dist/CN-ip-cidr.txt
48+
cat dist/*.txt | dist/dedup > dist/CN-ip-cidr.txt
4949
5050
- name: Generate GeoIP2 database
5151
run: |

build.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
go build -o dist/ipip2mmdb main.go ip2cidr.go
22
go build -o dist/verify_ip verify/verify_ip.go
3+
gcc -o dist/dedup dedup.c

dedup.c

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
#include <stdio.h>
2+
#include <stdlib.h>
3+
#define MASK(x) (x?(~((1u<<(32u-x))-1u)):0)
4+
5+
unsigned current;
6+
struct Trie {
7+
char flag;
8+
struct Trie *child[2];
9+
} *root=NULL;
10+
11+
char merge(struct Trie *p) {
12+
// this node is marked
13+
if(p->flag) return 1;
14+
// missing either child
15+
if(!p->child[0]||!p->child[1]) return 0;
16+
// true when both true;
17+
return (p->flag = merge(p->child[0]) && merge(p->child[1]));
18+
}
19+
20+
void print(struct Trie *p, unsigned depth) {
21+
// print whole subnet
22+
if(p->flag) {
23+
unsigned ip = current & MASK(depth);
24+
printf("%u.%u.%u.%u/%u\n", ip>>24&0xff, ip>>16&0xff, ip>>8&0xff, ip&0xff, depth);
25+
return;
26+
}
27+
// dig deeper
28+
if(p->child[0]) {
29+
current &= ~(1<<(31-depth));
30+
print(p->child[0], depth+1);
31+
}
32+
if(p->child[1]) {
33+
current |= 1<<(31-depth);
34+
print(p->child[1], depth+1);
35+
}
36+
}
37+
38+
int main() {
39+
unsigned ip1, ip2, ip3, ip4, prefix_len;
40+
while(scanf("%u.%u.%u.%u/%u", &ip1, &ip2, &ip3, &ip4, &prefix_len)==5) {
41+
// convert to binary
42+
unsigned ip = (ip1<<24) | (ip2<<16) | (ip3<<8) | (ip4);
43+
unsigned mask = MASK(prefix_len);
44+
// build trie
45+
struct Trie **p = &root;
46+
while(mask) {
47+
// walk
48+
if((*p)==NULL) (*p) = calloc(1, sizeof(struct Trie));
49+
p = &((*p)->child[ip>>31]);
50+
// next bit
51+
ip <<= 1;
52+
mask <<= 1;
53+
}
54+
// mark node
55+
if((*p)==NULL) (*p) = calloc(1, sizeof(struct Trie));
56+
(*p)->flag = 1;
57+
}
58+
if(root) {
59+
// merge trie
60+
merge(root);
61+
// print trie
62+
print(root, 0);
63+
}
64+
return 0;
65+
}

0 commit comments

Comments
 (0)