Skip to content

我发现json.get('data').get('cardlistInfo')并没有cardlistInfo,只有page #11

@rojic12138

Description

@rojic12138

2021年12月9日20:40:04
json.get('data').get('cardlistInfo').get('cardlistInfo')会返回None.
可用代码:
import requests
from urllib.parse import urlencode
from pyquery import PyQuery as pq
base_url='https://m.weibo.cn/api/container/getIndex?'
headers={
'Host':'m.weibo.cn/',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.93 Safari/537.36',
'X-Requested-With':'XMLHttpRequest',
}

def get_one_page(page):
params={
'type':'uid',
#这两个数字在https://m.weibo.cn/u/7198559139?uid=7198559139&luicode=10000011&lfid=100103type%3D1%26q%3D%E4%B8%83%E6%B5%B7里找
#containerid在network-最大的xhr文件-url里面
'value':'7198559139',
'containerid':'1076037198559139'
}
if page!=0:
params['page']=page
url=base_url+urlencode(params)
print(url)
try:
response=requests.get(url)
if response.status_code==200:
return response.json()
except requests.ConnectionError as e:
print(e.args)

def parse_page(json):
global since_id
print(since_id)
if json:
items=json.get('data').get('cards')
for item in items:
item=item.get('mblog')
weibo={}
weibo['id']=item.get('id')
weibo['text']=pq(item.get('text')).text()
weibo['attitudes']=item.get('attitudes_count')
weibo['comments']=item.get('comments_count')
weibo['reposts']=item.get('reposts_count')
yield weibo

#并不需要cardlistInfo,只需要page
with open('nanami.txt','w',encoding='utf-8')as f:
for page in range(1,100):
json=get_one_page(page)
since_id=json.get('data').get('cardlistInfo').get('page')
results=parse_page(json)
for result in results:
f.write(str(result))
f.write('\n')

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions