简介
看到别人写的爬虫采集mm图,有那么多人给币,本菜鸡也想写点东西分享一下,就想下载些音乐,经过长时间的分析,终于完成了。
开发环境
- python3.8解释器
Pycharm 编辑器
所使用模块
- import requests
- import re
- import json
- import logging
- from os import makedirs
- from os.path import exists
抓取的页面
代码实现
import requests
import re
import json
import logging
from os import makedirs
from os.path import exists
logging.basicConfig(level=logging.INFO,
format='%(asctime)s %(levelname)s %(filename)s:%(lineno)s >>> %(message)s',
datefmt='%Y-%m-%d %H:%M:%S',)
BASE_URL = 'https://www.kugou.com'
def save_data(data, path, name):
exists(path) or makedirs(path)
data_path = f'{path}/{name}.mp3'
print(data_path)
with open(data_path, 'wb')as f:
f.write(data)
def get_page(url):
headers = {
'cookie': '自己填',
'referer': 'https://www.kugou.com/',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
}
try:
response = requests.get(url, headers=headers)
if response.status_code == 200:
# print(response.text)
return response.text
logging.error('在地址为: %s 时遇到错误的状态码', response.status_code, url)
except requests.RequestException:
logging.error('抓取此链接发生错误 %s', url, exc_info=True)
# 通过解析页面数据获取音频的api接口链接,再请求接口链接得到mp3链接
def parse_data(html):
pattern = re.compile(r'var data=(.*);')
data = re.findall(pattern, html)[0]
dit_data = json.loads(data)
play_urls = []
audio_names = []
album_names = []
for info in dit_data:
album_name = info['album_name']
author_name = info['author_name']
audio_name = info['audio_name']
encode_album_audio_id = info['encode_album_audio_id']
audio_detail_url = f'https://wwwapi.kugou.com/yy/index.php?r=play/getdata&dfid=1txkVX3n2MQE49Pvux0mAsuT&appid=1014&mid=b2d248b4902c623a204e0410004e70d0&platid=4&encode_album_audio_id={encode_album_audio_id}'
logging.info('处理接口链接为: %s', audio_detail_url)
audio_name = f'{author_name}-{audio_name}'
# 通过请求音乐接口链接再解析得到MP3播放地址
audio_info_html = get_page(audio_detail_url)
detail_data = json.loads(audio_info_html)
play_url = detail_data['data']['play_url']
play_urls.append(play_url)
audio_names.append(audio_name)
album_names.append(album_name)
datas = play_urls, audio_names, album_names
yield datas
def download(data):
for play_url, audio_name, name in data:
audio_content = requests.get(play_url).content
save_data(audio_content, name, audio_name)
logging.info('正在保存--> %s', audio_name)
def main():
url = 'https://www.kugou.com/album/info/yhaz39/'
html_data = get_page(url)
data = parse_data(html_data) # 生成器对象
logging.info('获得包含mp3,歌名,专辑名的生成器对象 >>> %s', data)
for i in data:
mp3_info_data = list(zip(i[0], i[1], i[2]))
download(mp3_info_data)
if __name__ == '__main__':
main()
执行结果:
大家可以去试试哦,白嫖的,不要白不要。
评论 (0)