-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathsync_archlinux.py
More file actions
145 lines (128 loc) · 3.84 KB
/
sync_archlinux.py
File metadata and controls
145 lines (128 loc) · 3.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#!/usr/bin/env python
# autor: Ni Qingliang
# NOTE: this script can be used to sync arch repository which is accessed through
# http
import time
import datetime
import re
import os
import tempfile
import urllib.parse
from bs4 import BeautifulSoup
from subprocess import call
import fileinput
#g_host = "mirrors.aliyun.com"
#g_host = "mirrors.163.com"
#g_host = "mirror.bjtu.edu.cn"
g_host = "mirrors.neusoft.edu.cn"
g_loc_base_dir = "./repo"
class csub_rep:
def __init__(self, repo_name, main_page, loc_dir):
self._repo_name = repo_name
self._main_page = main_page
self._loc_dir = loc_dir
self._fl_new = []
self._fl_old = []
# get index
tmp_dir = tempfile.mkdtemp(prefix="syncarch_") + "/"
call("wget --progress=bar"
+ " -O index.html"
+ " --timeout=60"
+ " -P " + tmp_dir
+ " " + self._main_page, shell=True)
soup = BeautifulSoup(open("index.html"), "html.parser")
os.rmdir(tmp_dir)
tmp_node = soup.pre # 163
if not tmp_node:
tmp_node = soup.find("table") # bjtu
if tmp_node:
if tmp_node.find("tbody"):
tmp_node = tmp_node.find("tbody")
if not tmp_node:
print(" no data found!!!")
for a in tmp_node.find_all("a"):
# string的内容如果太长,html显示的是省略号
#self._fl_new.add(a.string);
file_name = a["href"]
#print(file_name)
#if not file_name:
# continue
#if re.match(".*/.*", file_name):
# continue
# 去除上一级目录
if file_name == "../":
continue
# 处理 %
file_name = urllib.parse.unquote(string=file_name, errors="strict")
# 移除开头的./,163的有问题
self._fl_new.append(re.sub("^\./", "", file_name));
#for f in self._fl_new:
# print(f)
#print(len(self._fl_new))
# create locale directory
if not os.path.exists(self._loc_dir):
os.makedirs(self._loc_dir)
else:
self._fl_old = os.listdir(self._loc_dir)
#for f in self._fl_old:
# print(f)
#print(len(self._fl_old))
# abs list
self.__abs_list = []
for i in (
".db",
".db.tar.gz",
".db.tar.gz.old",
".files",
".files.tar.gz",
".files.tar.gz.old",
".links.tar.gz"):
self.__abs_list.append(self._repo_name + i)
def download(self):
dl_list = list(set(self._fl_new) - set(self._fl_old) - set(self.__abs_list))
dl_list.sort()
for i in dl_list:
print("dling " + i)
#call("wget -N --progress=bar -P " + self._loc_dir + " " + self._main_page + i, shell=True)
cmd = "axel " \
+ "-n " + "32" + " " \
+ "-a -v " \
+ "-U " + "'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1 Safari/605.1.15'" + " " \
+ "-o '" + self._loc_dir + "' " \
+ self._main_page + i
call(cmd, shell=True)
if len(dl_list) != 0:
abs_names = self.__abs_list
bkup_dir = tempfile.mkdtemp(prefix="syncarch_") + "/"
for i in abs_names:
print("dling " + i)
call("wget -N --progress=bar -P " + bkup_dir + " " + self._main_page + i, shell=True)
for i in abs_names:
print("mving " + i)
call("mv " + bkup_dir + i + " " + self._loc_dir + i, shell=True)
os.rmdir(bkup_dir)
# check if we got right file lists
tmp_list = list(set(self.__abs_list) - set(self._fl_new))
for i in tmp_list:
print("check for right file lists: " + self._repo_name + ": " + i)
if len(tmp_list) != 0:
print("error for " + self._repo_name + ": can't remove for error index.")
return
# remove old files
for i in (set(self._fl_old) - set(self._fl_new)):
print("rming " + i)
os.remove(self._loc_dir + i)
if __name__ == '__main__':
repos = [
"core",
"extra",
# "community",
"multilib",
# "testing",
# "multilib-testing",
# "community-testing"
]
for repo in repos:
print("dling repo %s:" % (repo))
test = csub_rep(repo, "http://" + g_host + '/archlinux/' + repo + '/os/x86_64/', g_loc_base_dir + "/" + repo + "/os/x86_64/")
test.download()