selenium 防檢測策略的方法匯總:
合理設置延遲:請求間添加隨機延遲 (2-10秒)
限制爬取頻率:控制每小時/每天的請求量
輪換用戶代理:準備至少10個不同的User-Agent
使用住宅代理:優先選擇高質量的住宅代理IP
處理驗證碼:集成2Captcha或Anti-Captcha服務
定期更新工具:保持selenium和瀏覽器驅動最新版本
1. 基礎防檢測配置
from selenium import webdriver
from selenium.webdriver.chrome.options import Optionsdef get_stealth_driver():options = Options()# 基本防檢測設置options.add_argument("--disable-blink-features=AutomationControlled")options.add_experimental_option("excludeSwitches", ["enable-automation"])options.add_experimental_option("useAutomationExtension", False)# 禁用自動化控制標志options.add_argument("--disable-infobars")options.add_argument("--disable-dev-shm-usage")options.add_argument("--no-sandbox")# 隨機用戶代理user_agents = ["Mozilla/5.0 (Windows NT 10.0; Win64; x64)...","Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)..."]import randomoptions.add_argument(f"user-agent={random.choice(user_agents)}")driver = webdriver.Chrome(options=options)# 修改navigator.webdriver屬性driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {"source": """Object.defineProperty(navigator, 'webdriver', {get: () => undefined})"""})return driver
2. 高級防檢測技術
2.1 使用 undetected-chromedriver
import undetected_chromedriver as ucdef get_undetected_driver():options = uc.ChromeOptions()# 配置選項options.add_argument("--disable-popup-blocking")options.add_argument("--disable-notifications")# 隨機窗口大小import randomwidth = random.randint(1000, 1400)height = random.randint(700, 900)options.add_argument(f"--window-size={width},{height}")driver = uc.Chrome(options=options,version_main=114, # 匹配你的Chrome版本headless=False,use_subprocess=True)return driver
2.2 模擬人類行為模式
from selenium.webdriver.common.action_chains import ActionChains
import time
import randomdef human_like_behavior(driver, element=None):"""模擬人類操作行為"""actions = ActionChains(driver)# 隨機鼠標移動if element:actions.move_to_element(element)else:x = random.randint(0, 500)y = random.randint(0, 500)actions.move_by_offset(x, y)# 隨機延遲time.sleep(random.uniform(0.5, 2.5))# 隨機滾動scroll_amount = random.randint(200, 800)driver.execute_script(f"window.scrollBy(0, {scroll_amount})")time.sleep(random.uniform(0.3, 1.8))actions.perform()
3. 完整防檢測爬取流程
def stealth_scrape(url):try:# 使用undetected-chromedriverdriver = get_undetected_driver()# 訪問目標URLdriver.get(url)# 隨機等待time.sleep(random.uniform(2, 5))# 模擬人類瀏覽行為human_like_behavior(driver)# 執行實際爬取操作# 示例:獲取頁面標題title = driver.titleprint(f"成功獲取頁面標題: {title}")# 更多爬取邏輯...except Exception as e:print(f"爬取過程中發生錯誤: {str(e)}")finally:driver.quit()
# 使用示例
stealth_scrape("https://example.com")
4. 額外防護措施
4.1 代理IP輪換
proxies = ["123.45.67.89:8080","98.76.54.32:3128"
] #換成自己的def get_proxy_driver():options = uc.ChromeOptions()proxy = random.choice(proxies)options.add_argument(f"--proxy-server=http://{proxy}")return uc.Chrome(options=options)
4.2 指紋混淆
def modify_fingerprint(driver):# 修改屏幕分辨率driver.execute_script("Object.defineProperty(screen, 'width', {get: () => 1920});""Object.defineProperty(screen, 'height', {get: () => 1080});")# 修改時區driver.execute_cdp_cmd("Emulation.setTimezoneOverride",{"timezoneId": "America/New_York"})# 修改WebGL指紋driver.execute_script("const getParameter = WebGLRenderingContext.prototype.getParameter;""WebGLRenderingContext.prototype.getParameter = function(parameter) {"" if (parameter === 37445) { return 'NVIDIA Corporation'; }"" return getParameter.call(this, parameter);""};")
5. 檢測與驗證
def test_stealth(driver):test_urls = ["https://bot.sannysoft.com","https://arh.antoinevastel.com/bots/areyouheadless"]for url in test_urls:driver.get(url)time.sleep(3)driver.save_screenshot(f"stealth_test_{url.split('/')[-1]}.png")print(f"測試結果已保存: stealth_test_{url.split('/')[-1]}.png")