基于接口,利用python批量下载文件 #1812
DataShare-duo
started this conversation in
General
Replies: 1 comment
-
|
增加功能:
"""
===========================
@Time : 2025/12/12 09:43
@File : openlist
@Software: PyCharm
@Platform: Win10
@Author : 数据人阿多
===========================
"""
import time
import requests
import json
import pandas as pd
from pathlib import Path
class OpenList:
def __init__(self,username,password,ip,port):
self.username = username
self.password = password
self.ip = ip
self.port = port
self._login()
def _login(self):
url = f'http://{self.ip}:{self.port}/api/auth/login'
payload = json.dumps({
"username": self.username,
"password": self.password,
# "otp_code": otp_code
})
headers = {
'Content-Type': 'application/json'
}
response = requests.request("POST", url, headers=headers, data=payload)
self.token=response.json()['data']['token']
def path_files(self,path,pages):
api_url = f'http://{self.ip}:{self.port}/api/fs/list'
pages_content_files = []
pages_content_dirs = []
for page in range(1,pages+1):
page_content=self.path_every_page(page, api_url, path)
if page_content:
for page_detail in page_content:
if page_detail['is_dir']:
pages_content_dirs.append(page_detail)
else:
pages_content_files.append(page_detail)
else:
print(f"获取路径 {path} 数据完成!")
break
return pd.DataFrame(pages_content_files),pd.DataFrame(pages_content_dirs)
def path_every_page(self, page, api_url, path, per_page=100):
payload = json.dumps({
"path": path,
"password": "",
"refresh": False,
"page": page,
"per_page": per_page
})
headers = {
'Authorization': self.token,
'Content-Type': 'application/json'
}
print(f"开始获取 {path} ,第{page:02}页数据")
response = requests.request("POST", api_url, headers=headers, data=payload)
return response.json()['data']['content']
def download_file(self, url, filename, local_path, retry=3):
for attempt in range(retry + 1): # 尝试次数 = 重试次数 + 1
try:
response = requests.get(url, stream=True, timeout=30)
# 检查请求是否成功
if response.status_code == 200:
total_size = int(response.headers.get('content-length', 0))
downloaded = 0
with open(local_path / filename, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
downloaded += len(chunk)
# 可选:显示下载进度
if total_size > 0:
percent = (downloaded / total_size) * 100
print(f"\r下载进度: {percent:.1f}%", end='')
print(f"\n下载成功: {filename}")
return True
elif response.status_code == 404:
print(f"文件不存在: {url}")
return False # 404错误不需要重试
else:
print(f"下载失败,状态码: {response.status_code}")
except requests.exceptions.RequestException as e:
print(f"请求异常: {e}")
# 如果不是最后一次尝试,等待后重试
if attempt < retry:
wait_time = 2 ** attempt # 指数退避:1, 2, 4秒...
print(f"等待 {wait_time} 秒后重试... (剩余尝试: {retry - attempt})")
time.sleep(wait_time)
print(f"达到最大重试次数,下载失败: {filename}")
return False
def download_path_files(self, path, local_path, pages=10):
path_files, path_dirs = self.path_files(path,pages)
total_path_files=len(path_files)
total_path_dirs=len(path_dirs)
print(f"目录 {path} 下共有{total_path_files}个文件, 有{total_path_dirs}个目录")
if total_path_files:
print(f"开始下载目录 {path} 下的文件")
for row in path_files.iterrows():
# print(row)
filename = row[1]['name']
sign = row[1]['sign']
url = f'http://{self.ip}:{self.port}/p{path}/{filename}?sign={sign}'
print(f"开始下载[{row[0]+1}/{total_path_files}]:{filename}")
self.download_file(url, filename, local_path)
if total_path_dirs:
for row_dir in path_dirs.iterrows():
path_dir = path + '/' + row_dir[1]['name']
local_path_subdir = local_path / row_dir[1]['name']
if not local_path_subdir.exists():
local_path_subdir.mkdir()
print(f"\n开始获取目录内容:{path_dir} ")
self.download_path_files(path_dir, local_path_subdir)
if __name__ == '__main__':
openlist = OpenList('admin','1234566','1.1.1.1','1024')
# 获取当前路径
current_path = Path.cwd()
cloud_path = '/datashare/data'
path = current_path / 'data'
if not path.exists():
path.mkdir()
openlist.download_path_files(cloud_path, path) |
Beta Was this translation helpful? Give feedback.
0 replies
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Uh oh!
There was an error while loading. Please reload this page.
-
Beta Was this translation helpful? Give feedback.
All reactions