selenium進階設置

1、無頭瀏覽設置和規避爬蟲檢測

問題一：有界面時可以展示的元素，無頭模式報錯element not interactable

解決方法：通過錯誤截圖發現，頁面上有該元素，但是頁面不夠大，沒有顯示想定位的元素。

from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver import Chromechrome_options = Options()
chrome_options.add_argument("--headless")  # 設置無頭瀏覽
chrome_options.add_argument("--disable-gpu")  # 無頭瀏覽，禁止調用gpu
chrome_options.add_argument("--window-size=1920,1200")  # 無頭瀏覽配置1080p分辨率，不然會報錯
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36")
chrome_options.add_experimental_option('useAutomationExtension', False)  # 防止檢測
chrome_options.add_argument("--mute-audio")  # 靜音
chrome_options.add_experimental_option('excludeSwitches', ['enable-automation', 'enable-logging'])  # 防止檢測、禁止打印日志
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
chrome_options.add_argument('--ignore-certificate-errors')  # 忽略證書錯誤
chrome_options.add_argument('--ignore-ssl-errors')  # 忽略ssl錯誤
chrome_options.add_argument('–log-level=3')
chrome_options.add_argument("--start-maximized")
chrome_options.add_experimental_option("detach", True)
browser = Chrome(options=chrome_options)

2、多個chromedriver程序同時運行

使用多進程，確保每個 chromedriver.exe 都能單獨執行（多線程是不行的）

from concurrent.futures import ThreadPoolExecutor  # 多線程
from concurrent.futures import ProcessPoolExecutor # 多進程executor = ThreadPoolExecutor(5)  # 創建線程池執行器
pool = ProcessPoolExecutor(5)  # 創建進程池執行器
app = Flask(__name__)@app.route('/aut/wsla_api', methods=['post'])
def wsla_api():form_data = request.get_json() # 接收的數據包，from flask_code.wsla_automation import zdls_start  # 要執行到函數# zdls_start為執行函數，form_data 為函數傳的參數# executor.submit(zdls_start, form_data)  # 將程序放入進程池中pool.submit(zdls_start, form_data)  # 將程序放入進程池中return '成功'if __name__ == '__main__':BASE_DIR = os.path.dirname(os.path.abspath(__file__))config = configparser.ConfigParser()config.read(os.path.join(BASE_DIR, 'config.ini'), encoding="utf-8")port = config.get('flask_api', 'port')print('Flask后臺已開啟~')server = pywsgi.WSGIServer(('0.0.0.0', int(port)), app)server.serve_forever()

3、get直接返回，不再等待界面加載完成

from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities# get直接返回，不再等待界面加載完成desired_capabilities = DesiredCapabilities.CHROMEdesired_capabilities["pageLoadStrategy"] = "none"# 創建chrome參數對象
options = webdriver.ChromeOptions()
options.add_argument('--no-sandbox')  # 解決DevToolsActivePort文件不存在的報錯
options.add_argument('window-size=1600x900')  # 指定瀏覽器分辨率
options.add_argument('--disable-gpu')  # 谷歌文檔提到需要加上這個屬性來規避bug
# options.add_argument('--hide-scrollbars')  # 隱藏滾動條, 應對一些特殊頁面
options.add_experimental_option('excludeSwitches', ['enable-automation'])  # 開啟管理者模式
# options.add_argument('blink-settings=imagesEnabled=false')  # 不加載圖片, 提升速度
# 禁止圖片和css加載
prefs = {"profile.managed_default_content_settings.images": 2, 'permissions.default.stylesheet': 2}
options.add_experimental_option("prefs", prefs)
# options.add_argument('--headless')  # 瀏覽器不提供可視化頁面. linux下如果系統不支持可視化不加這條會啟動失敗
browser.set_page_load_timeout(timeout)
browser.set_script_timeout(timeout)  # 這兩種設置都進行才有效browser = webdriver.Chrome(executable_path='/root/桌面/youlv/driver/chromedriver',chrome_options=options, desired_capabilities=desired_capabilities)
# browser = webdriver.Firefox(executable_path='/usr/bin/geckodriver')

本文來自互聯網用戶投稿，該文觀點僅代表作者本人，不代表本站立場。本站僅提供信息存儲空間服務，不擁有所有權，不承擔相關法律責任。
如若轉載，請注明出處：http://www.pswp.cn/news/713250.shtml
繁體地址，請注明出處：http://hk.pswp.cn/news/713250.shtml
英文地址，請注明出處：http://en.pswp.cn/news/713250.shtml

如若內容造成侵權/違法違規/事實不符，請聯系多彩編程網進行投訴反饋email:809451989@qq.com，一經查實，立即刪除！