from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdrivermon.by import By
import re
class main(object):
def__init__(self):
self.chrome_options = Options()
self.chrome_options.add_argument('--headless')
self.chrome_options.add_argument('--disable-gpu')
self.header ={
"accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3",
"accept-language":"zh-CN,zh;q=0.9",
"referer":"y.qq/n/yqq/toplist/27.html",
"upgrade-insecure-requests":"1",
"user-agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36"
}
def query_music(self,url):
self.driver = webdriver.Chrome(self.chrome_driver, options=self.chrome_options)
(url)
WebDriverWait(self.driver,10).until(
EC.presence_of_all_elements_located((By.CLASS_NAME,"songlist__songname_txt")))
lists = self.driver.find_elements_by_class_name("songlist__songname_txt")
pattern = repile(r"y.qq/n/yqq/song/(\S+).html")# 取出每⾸歌的具体链接
url =[]
for i in range(len(lists)):
li = lists.__getitem__(i)
a = li.find_element_by_class_name("js_song")
href = a.get_attribute("href")
qqy
m = pattern.match(href)
url.append(m.string)
self.driver.close()
return url
def list_music(self,url):
with open("./sing.json","w",encoding="utf-8")as fw:
self.driver = webdriver.Chrome(self.chrome_driver, options=self.chrome_options)
for u in self.query_music(url):
(u)
f = self.driver.find_element_by_class_name("data__name_txt")
f = f.text #歌曲名
g = self.driver.find_element_by_class_name("data__singer")
g = g.text#歌⼿
WebDriverWait(self.driver,10).until(
EC.presence_of_all_elements_located((By.CLASS_NAME,"data__info")))
lists = self.driver.find_elements_by_class_name("data__info")
for i in range(len(lists)):
li = lists.__getitem__(i)
a = li.find_element_by_class_name("js_album")
#专辑
a = a.text
b = li.find_element_by_class_name("js_lan")
#语种
b = b.text
c = li.find_element_by_class_name("js_genre")
#流派
c = c.text
c = c.text
d = li.find_element_by_class_name("js_company")
#唱⽚公司
d = d.text
e = li.find_element_by_class_name("js_public_time")
#发⾏时间
e = e.text
fw.write("{}".format(f +"\n"+ g +"\n"+a +"\n"+ b+"\n"+ c+"\n"+ d+"\n"+ e +"\n\n")) self.driver.close()
if __name__ =='__main__':
url ="y.qq/n/yqq/toplist/27.html#stat=u.27"#新歌榜url
url_re ="y.qq/n/yqq/toplist/26.html#stat=u.26"#热歌榜
url_bs ="y.qq/n/yqq/toplist/62.html#stat=u.62"#飙升榜
main().list_music(url_bs)
发布评论