Skip to content

Latest commit

 

History

History
97 lines (78 loc) · 3.33 KB

File metadata and controls

97 lines (78 loc) · 3.33 KB
Error in user YAML: (<unknown>): could not find expected ':' while scanning a simple key at line 3 column 1
---
- oeasy Python 0563
- 这是 oeasy 系统化 Python 教程,从基础一步步讲,扎实、完整、不跳步。愿意花时间学,就能真正学会。
本教程同步发布在: 

     个人网站: `https://oeasy.org` 
     蓝桥云课: `https://www.lanqiao.cn/courses/3584` 
     GitHub: `https://github.com/overmind1980/oeasy-python-tutorial` 
     Gitee: `https://gitee.com/overmind1980/oeasypython` 
---

打开艺术家页面

图片描述

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd

# 创建 WebDriver 实例
driver = webdriver.Safari()

# 打开 Spotify Keshi 歌手页面
driver.get('https://open.spotify.com/artist/3pc0bOVB5whxmD50W79wwO')

# 等待页面加载完成
driver.implicitly_wait(10)  # 根据网络情况,可能需要调整等待时间

# 点击“查看更多”按钮以加载所有歌曲
try:
    more_button_xpath = "//div[@class='e-9541-text encore-text-body-small-bold' and @data-encore-id='text' and contains(text(), '查看更多')]/parent::button"
    more_button = WebDriverWait(driver, 15).until(
        EC.presence_of_element_located((By.XPATH, more_button_xpath))
    )
    driver.execute_script("arguments[0].click();", more_button)
    # 等待更多歌曲加载
    WebDriverWait(driver, 10).until(
        lambda d: len(d.find_elements(By.CSS_SELECTOR, '.e-9541-text.encore-text-body-small.HxDMwNr5oCxTOyqt85gi')) >= 10
    )
except Exception as e:
    print(f"未能找到或点击'查看更多'按钮: {e}")

# 等待歌曲列表加载完成
time.sleep(5)

# 找到包含歌曲名信息的元素
song_titles_elements = driver.find_elements(By.CSS_SELECTOR, '.e-9541-text.encore-text-body-medium.encore-internal-color-text-base')
# 找到包含歌曲播放量信息的元素
play_count_elements = driver.find_elements(By.CSS_SELECTOR, '.e-9541-text.encore-text-body-small.HxDMwNr5oCxTOyqt85gi')

# 提取歌名和播放量信息
data = []
if len(song_titles_elements) >= 10 and len(play_count_elements) >= 10:
    for i in range(10):
        title = song_titles_elements[i].text
        play_count = play_count_elements[i].text.replace(',', '')  # 移除逗号
        try:
            play_count = int(play_count)  # 尝试转换为整数
        except ValueError:
            continue  # 如果转换失败,跳过这首歌
        data.append({'Title': title, 'Plays': play_count})
else:
    print("歌曲名或播放量信息不足 10 条,无法提取完整的 10 条数据。")

# 关闭浏览器
driver.quit()

# 将数据转换为 DataFrame
df = pd.DataFrame(data)

# 按播放量从大到小排列
df['Plays'] = df['Plays'].astype(int)  # 确保播放量列是整数类型
df = df.sort_values(by='Plays', ascending=False)

# 添加序号列
df.insert(0, 'Rank', range(1, len(df) + 1))

# 保存数据到 CSV 文件
df.to_csv('songs_with_plays.csv', index=False)

# 打印提取的数据
for index, row in df.iterrows():
    print(f"{row['Rank']}. {row['Title']} - Plays: {row['Plays']}")

print("Data scraped and saved to songs_with_plays.csv")

  • 本文来自 oeasy Python 系统教程。
  • 想完整、扎实学 Python,
  • 搜索 oeasy 即可。