1 Star 4 Fork 2

怀雪 / 爬虫音乐酷我网易云

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
爬取网易云音乐所有歌手---多线程.py 9.30 KB
一键复制 编辑 原始数据 按行查看 历史
怀雪 提交于 2022-08-14 07:49 . 额 exe文件 会报毒 自行甄别
import re
import threading
import requests
from bs4 import BeautifulSoup
import json
import os
def fixname(filename): # 用正则表达式去除windows下的特殊字符,这些字符不能用在文件名
intab = r'[*?/\|.:><""'']'
filename = re.sub(intab, " ", filename)
return filename
def mk_filename(filename): # 检查文件夹是否存在,如果不存在就新建文件夹
filename1 = filename + "\\"
if not os.path.exists(filename1):
os.mkdir(filename1)
return filename1
def music_data_list(url): # 提取music数据信息
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36"
" (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"
}
req = requests.get(url=url, headers=headers)
bs = BeautifulSoup(req.text, "html.parser")
bsre = bs.find("div", class_="g-wrap12")
name = bsre.find("textarea")
return json.loads(name.text)
def pyer_music_data_list(url): # 提取歌手页面music数据信息
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36"
" (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"
}
req = requests.get(url=url, headers=headers)
bs = BeautifulSoup(req.text, "html.parser")
bsre = bs.find("div", class_="f-cb")
name = bsre.find("textarea")
return json.loads(name.text)
def mu_download(url, filename): # 下载排行榜歌曲
filename1 = mk_filename(filename)
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36"
" (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"
}
data2 = music_data_list(url)
for i in range(len(data2)):
mu_name = fixname(data2[i]["name"])
mu_id = data2[i]["id"]
mu_play = data2[i]["artists"][0]["name"]
# print("music:", mu_name, "ID:", mu_id, "play:", mu_play)
i_url = 'http://music.163.com/song/media/outer/url?id=' + str(mu_id)
mu_data = requests.get(url=i_url, headers=headers).content
try:
with open(filename1 + str(mu_name) + "-" + str(mu_play) + '.mp3', mode='wb') as f:
f.write(mu_data)
print(mu_name + "-" + mu_play + '.mp3', '下载成功')
with open('log\\log.txt', "a", encoding="utf-8") as f:
f.write(mu_name + "---" + mu_play + '.mp3' + '下载成功\n')
except:
print(mu_name + "-" + mu_play + '.mp3', '下载失败')
with open('log\\log.txt', "a", encoding="utf-8") as f:
f.write(mu_name + "-" + mu_play + '.mp3' + '下载失败5\n')
def mu_player_download(url, path, filename): # 下载歌手歌曲
filename1 = mk_filename(path + filename)
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36"
" (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"
}
data2 = pyer_music_data_list(url)
for i in range(len(data2)):
mu_name = fixname(data2[i]["name"])
mu_id = data2[i]["id"]
mu_play = data2[i]["artists"][0]["name"]
if not os.path.exists(filename1 + str(mu_name) + "-" + str(mu_play) + '.mp3'):
# print("music:", mu_name, "ID:", mu_id, "play:", mu_play)
i_url = 'http://music.163.com/song/media/outer/url?id=' + str(mu_id)
mu_data = requests.get(url=i_url, headers=headers).content
try:
with open(filename1 + str(mu_name) + "-" + str(mu_play) + '.mp3', mode='wb') as f:
f.write(mu_data)
print(mu_name + "-" + mu_play + '.mp3', '下载成功')
with open('log\\log.txt', "a", encoding="utf-8") as f:
f.write(mu_name + "---" + mu_play + '.mp3' + '下载成功\n')
except:
print(mu_name + "-" + mu_play + '.mp3', '下载失败')
with open('log\\log.txt', "a", encoding="utf-8") as f:
f.write(mu_name + "-" + mu_play + '.mp3' + '下载失败5\n')
else:
print(filename1 + str(mu_name) + "-" + str(mu_play) + '.mp3', "----------------------------这首歌已经存在了\n")
def top_data_list(url): #处理排行榜数据
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36"
" (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"
}
req = requests.get(url=url, headers=headers)
bs = BeautifulSoup(req.text, "html.parser")
bsre = bs.find_all("p", class_="name")
top_re = re.findall("href\=\"\/discover\/toplist\?id\=(.*?)\"\>(.*?)\<\/a\>", str(bsre))
url_list = []
for i in top_re:
url_t = "https://music.163.com/discover/toplist?id=" + i[0]
url_list.append((url_t, i[1]))
return url_list
def mc_data_list(url): #处理歌手分类
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36"
" (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"
}
req = requests.get(url=url, headers=headers)
bs = BeautifulSoup(req.text, "html.parser")
bsre = bs.find_all("div", class_="g-wrap4 n-sgernav")
mc_re = re.findall("href=\"\/discover\/artist\/cat\?id\=(.*?)\"\>(.*?)\<\/a\>", str(bsre))
url_list = []
for i in mc_re:
url_t = "https://music.163.com/discover/artist/cat?id=" + i[0]
url_list.append((url_t, i[1]))
#print(url_list)
return url_list
def az_data_list(url): #处理歌手排序数据
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36"
" (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"
}
req = requests.get(url=url, headers=headers)
bs = BeautifulSoup(req.text, "html.parser")
bsre = bs.find_all("ul", class_="n-ltlst f-cb")
az_re = re.findall("\<a class=\"\" href=\"\/discover\/artist\/cat\?id=.*?&amp;initial=(.*?)\"", str(bsre))
#print(az_re)
#mc = mc_data_list("https://music.163.com/discover/artist")
url_list = []
for i in az_re:
# url_t = "https://music.163.com/discover/artist/cat?id=1001&initial=0" + i[0]
url_t = str(url) + "&initial=" + str(i)
url_list.append(url_t)
#print(url_list)
return url_list
def player_data_list(url): #处理歌手ID
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36"
" (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"
}
req = requests.get(url=url, headers=headers)
bs = BeautifulSoup(req.text, "html.parser")
bsre = bs.find_all("div", class_="m-sgerlist")
mc_re = re.findall("\<a class=\"nm nm-icn f-thide s-fc0\" href=\".*?\/artist\?id=(.*?)\" title=\"(.*?)\"\>(.*?)\<\/a\>", str(bsre))
url_list = []
for i in mc_re:
url_t = "https://music.163.com/artist?id=" + i[0]
url_list.append((url_t, i[2]))
#print(url_list)
return url_list
def music_pyer_data_list():
pass
def top_down():
url_top = "https://music.163.com/discover/toplist?id=5453912201"
aaa = top_data_list(url_top)
print(len(top_data_list(url_top)))
a = 0
for i in aaa:
a = a + 1
print(a, i[1])
for i in aaa:
mu_download(i[0], i[1])
def pyer_down():
mc_list = mc_data_list("https://music.163.com/discover/artist")
print(mc_list)
try:
for i in mc_list:
az_list = az_data_list(i[0])
mu_path = mk_filename(fixname(i[1]))
try:
for q in az_list:
play_list = player_data_list(q)
print(play_list)
try:
threads = []
for w in play_list:
# if not os.path.exists(mu_path+w[1]):
threads.append(threading.Thread(target=mu_player_download, args=(w[0], mu_path, w[1])))
# print(mu_path + w[1])
# mu_player_download(w[0], mu_path, w[1])
# print(w[0], w[1])
# else:
# print(mu_path + w[1])
# continue
for thread in threads:
thread.start()
for thread in threads:
thread.join()
print(threads)
except:
print("三级出错")
except:
print("二级出错")
except:
print("顶级出错")
def main():
pyer_down()
if __name__ == '__main__':
main()
#top_down()
#www = mc_data_list("https://music.163.com/discover/artist")
#az_data_list("https://music.163.com/discover/artist/cat?id=1001")
#for i in www:
#qqq = az_data_list(i[0])
#print(qqq)
# player_data_list("https://music.163.com/discover/artist/cat?id=1001&initial=67")
Python
1
https://gitee.com/huaixue/pc_music.git
git@gitee.com:huaixue/pc_music.git
huaixue
pc_music
爬虫音乐酷我网易云
master

搜索帮助

53164aa7 5694891 3bd8fe86 5694891