from selenium import webdriver
import timeurl = 'https://www.douyu.com/directory/all'# 動態html網頁加載可能出現的問題:element is not attached to the page document
# 標簽沒有及時的加載顯示出來,如果加載時間不夠,可能報錯
# try exceptclass Douyu(object):def __init__(self):self.driver = webdriver.Chrome()def start(self):self.driver.get(url)time.sleep(2)self.driver.find_element_by_class_name('pop-zoom-close').click()fp = open('douyu.txt', 'w', encoding='utf-8')with open('douyu.html','w',encoding='utf-8') as file:file.write(self.driver.page_source)while False:titles = self.driver.find_elements_by_css_selector('h3.ellipsis')nums = self.driver.find_elements_by_xpath('//span[@class="dy-num fr"]')print(len(titles),len(nums))for title,num in zip(titles[10:],nums):print('標題是:%s,觀戰人數:%s'%(title.text.strip(),num.text.strip()))fp.write('標題是:%s,觀戰人數:%s'%(title.text.strip(),num.text.strip())+'\n')'''<a href="#" class="shark-pager-next shark-pager-disable shark-pager-disable-next">下一頁</a><a href="#" class="shark-pager-next">下一頁</a>'''# 如果沒有找到flag = -1# 找到了,返回索引值flag = self.driver.page_source.find('shark-pager-disable-next')if flag != -1:break# 選中的當前頁碼'''<a href="#" class="shark-pager-item current">1</a>'''current_page = self.driver.find_element_by_xpath('//a[@class="shark-pager-item current"]').textprint('當前頁碼是:',current_page)try:self.driver.find_element_by_class_name('shark-pager-next').click()time.sleep(2)except:time.sleep(2)# 點擊進入最后一頁# xpath多重條件查詢,直接后面添加[][]self.driver.find_element_by_xpath('//a[contains(@class,"shark-pager-item")][last()]').click()fp.close()time.sleep(10)self.driver.quit()if '__main__' == __name__:douyu = Douyu()douyu.start()
?