1. Selenium:瀏覽器自動化之王
核心定位:
????????跨平臺、跨語言的瀏覽器操控框架,通過驅動真實瀏覽器實現像素級用戶行為模擬。
技術架構:
核心特性:
-
支持所有主流瀏覽器(含移動端模擬)
-
精確的DOM元素定位(XPath/CSS/ID)
-
屏幕截圖與視頻錄制功能
-
分布式測試能力(Selenium Grid)
2. HtmlUnit:無頭瀏覽器輕騎兵
核心定位:
????????純Java實現的無界面瀏覽器引擎,專為服務端自動化場景優化。
技術架構:
核心特性:
-
毫秒級頁面加載速度
-
線程安全設計
-
內置基礎AJAX支持
-
Cookie自動管理
3. BrowserMobProxy:網絡流量手術刀
核心定位:
基于Netty開發的HTTP代理服務器,專為Web流量監控與操控設計。
技術架構:
核心特性:
-
實時流量鏡像
-
請求/響應內容篡改
-
性能指標采集(TTFB等)
-
支持HTTPS中間人攻擊
能力對比矩陣
維度 | Selenium | HtmlUnit | BrowserMobProxy |
---|---|---|---|
執行環境 | 真實瀏覽器進程 | 純JVM環境 | 獨立代理服務 |
JS支持 | 完整ES6+ | ES5(Rhino引擎) | 不涉及 |
網絡延遲模擬 | 需擴展 | 原生支持 | 精確到毫秒級控制 |
跨域請求處理 | 受同源策略限制 | 自動繞過 | 全流量穿透 |
移動端調試 | 完整設備模擬 | 僅UA偽裝 | 流量分析 |
典型應用場景 | 自動化測試 | 服務端爬蟲 | 接口監控 |
安裝瀏覽器:Google Chrome谷歌為例

4、selenium和BrowserMobProxy捕獲網絡請求實例
驅動下載:Chrome for Testing 的可用性(135后版本)chromedriver.storage.googleapis.com/index.html(舊版本驅動)
安裝需記住安裝位置,啟動時需要設置驅動路徑
代碼實現
getDynamicCrawlersDocument方法為htmlunit的請求監控使用
getParamsByNodeUrl方法為selenium的請求實現,selenium需要驅動支持,可以獲取到復雜的請求接口
依賴:
<dependency><groupId>org.seleniumhq.selenium</groupId><artifactId>selenium-java</artifactId><version>4.10.0</version> </dependency> <!-- BrowserMob Proxy --> <dependency><groupId>net.lightbody.bmp</groupId><artifactId>browsermob-core</artifactId><version>2.1.5</version> </dependency> <!-- ChromeDriver --> <dependency><groupId>org.seleniumhq.selenium</groupId><artifactId>selenium-chrome-driver</artifactId><version>4.1.0</version> </dependency>
代碼工具類:
package com.zzkj.zei.utils;import com.zzkj.zei.component.ServerConfig;
import lombok.extern.slf4j.Slf4j;
import net.lightbody.bmp.BrowserMobProxy;
import net.lightbody.bmp.BrowserMobProxyServer;
import net.lightbody.bmp.client.ClientUtil;
import net.lightbody.bmp.core.har.Har;
import net.lightbody.bmp.core.har.HarEntry;
import net.lightbody.bmp.core.har.HarRequest;
import net.lightbody.bmp.mitm.manager.ImpersonatingMitmManager;
import net.lightbody.bmp.proxy.CaptureType;
import org.apache.commons.lang3.ObjectUtils;
import org.htmlunit.BrowserVersion;
import org.htmlunit.FailingHttpStatusCodeException;
import org.htmlunit.ScriptException;
import org.htmlunit.WebClient;
import org.htmlunit.html.HtmlPage;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.openqa.selenium.JavascriptExecutor;
import org.openqa.selenium.Proxy;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import org.openqa.selenium.remote.CapabilityType;
import org.openqa.selenium.support.ui.WebDriverWait;
import org.htmlunit.ProxyConfig;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;import javax.annotation.PostConstruct;
import java.net.InetSocketAddress;
import java.net.UnknownHostException;
import java.util.concurrent.TimeUnit;
import java.time.Duration;
import java.util.*;/*** FileName: SeleniumUtils* Author: wzk* Date:2025/4/29 11:31*/
@Component
@Slf4j
public class SeleniumUtils {private static String SELENIUM_PATH;@Value("${selenium.chromedriver_path}")private String seleniumPath; // 非靜態變量接收注入@PostConstructpublic void init() {SELENIUM_PATH = this.seleniumPath;}private final static List<String> UA_LIST = Arrays.asList("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.0.0 Safari/537.36","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.0.0 Safari/537.36","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.0.0 Safari/537.36","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.0.0 Safari/537.36","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.0.0 Safari/537.36","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.0.0 Safari/537.36","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.0.0 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.0.0 Safari/537.36");public static void main(String[] args) {
// List<interfaceNodeData> interfaceNodeDataList = getParamsByNodeUrl(ce,true);List<interfaceNodeData> interfaceNodeDataList = getDynamicCrawlersDocument(gfwj1, 1000, true);for (interfaceNodeData interfaceNodeData : interfaceNodeDataList) {log.info("方法:{} 鏈接:{} 請求參數:{}" ,interfaceNodeData.getMethod(),interfaceNodeData.getUrl(), interfaceNodeData.getParams());}}public static List<interfaceNodeData> getDynamicCrawlersDocument(String url, Integer waitTime, boolean javaScriptEnabled) {List<interfaceNodeData> interfaceNodeDatas = new ArrayList<>();// 1. 啟動BrowserMob代理BrowserMobProxy proxy = new BrowserMobProxyServer();proxy.start(0); // 自動分配端口int proxyPort = proxy.getPort();try {// 2. 配置HtmlUnit使用代理WebClient browser = new WebClient(BrowserVersion.CHROME);browser.getOptions().setProxyConfig(new ProxyConfig("localhost",proxyPort,"http"));// 啟用HTTPS支持(忽略證書驗證)browser.getOptions().setSSLInsecureProtocol("ssl");//解決動態頁面抓取不到信息問題browser.getOptions().setCssEnabled(false);browser.getOptions().setJavaScriptEnabled(javaScriptEnabled);browser.getOptions().setThrowExceptionOnScriptError(false);browser.getOptions().setUseInsecureSSL(true);// 設置自定義的錯誤處理類browser.setJavaScriptErrorListener(new JsoupHtmlUintUtils.MyJSErrorListener());// 開始捕獲請求proxy.newHar("zzjk");HtmlPage page = null;page = browser.getPage(url);// 等待后臺腳本執行時間browser.waitForBackgroundJavaScript(waitTime);// String pageAsXml = page.asXml();
// document = Jsoup.parse(pageAsXml.replaceAll("\\<\\?xml.*?\\?>", ""));
// document.setBaseUri(url);// 5. 獲取并分析HAR數據Har har = proxy.getHar();processHarEntries(har, url, interfaceNodeDatas);} catch (ScriptException e) {log.error("getDynamicCrawlersDocument頁面:{} JavaScript 異常:{}", url, e.getMessage());} catch (UnknownHostException e) {log.error("getDynamicCrawlersDocument頁面:{} 無法解析或找到指定的主機名:{}", url, e.getMessage());} catch (FailingHttpStatusCodeException e) {log.error("getDynamicCrawlersDocument頁面:{} HTTP 狀態異常:{}", url, e.getStatusCode());} catch (Exception e) {log.error("getDynamicCrawlersDocument頁面:{} 獲取頁面異常:{}", url, e.getMessage());} finally {// 6. 清理資源proxy.stop();}return interfaceNodeDatas;}public static List<interfaceNodeData> getParamsByNodeUrl(String url,Boolean isTime){long stat = new Date().getTime();System.setProperty("webdriver.chrome.driver", SELENIUM_PATH); //設置chrome驅動程序的路徑BrowserMobProxy proxy = new BrowserMobProxyServer();proxy.start(0); // 自動選擇端口// 獲取Selenium的Proxy對象Proxy seleniumProxy = ClientUtil.createSeleniumProxy(proxy);ChromeOptions opt = new ChromeOptions();opt.addArguments();opt.addArguments("--headless", // 開啟無界面模式"--disable-gpu", // 禁用gpu"--remote-allow-origins=*", // 允許所有源訪問"--ignore-certificate-errors", // 忽略證書錯誤"--user-agent=" + UA_LIST.get(0), // 設置請求頭"--no-sandbox", // 禁用沙盒,減少權限檢查"--disable-dev-shm-usage", // 避免共享內存問題"--log-level=3", // 禁用 Chrome 日志"--blink-settings=imagesEnabled=false", // 禁止圖片加載"--disable-extensions", // 禁用擴展"--disable-javascript", // 禁用 JavaScript(如果目標頁面不需要 JS)"--disable-css", // 禁用 CSS 渲染(按需)"--disable-fonts", // 禁用字體加載"--dns-prefetch-disable", // 禁用 DNS 預解析"--disk-cache-size=0", // 禁用 緩存"--disable-cache" // 禁用 緩存);opt.setCapability(CapabilityType.PROXY, seleniumProxy);WebDriver driver = new ChromeDriver(opt); //初始化一個chrome驅動實例,保存到driver中try {driver.manage().window().maximize();driver.manage().timeouts().implicitlyWait(10, TimeUnit.SECONDS); // 啟用隱式等待return seleniumGetDocument(driver,proxy,url,isTime);} catch (Exception e) {log.info("錯誤URL: " + driver.getCurrentUrl());e.printStackTrace();} finally {driver.quit(); // 自動清理Cookies和會話proxy.stop();long end = new Date().getTime();log.info("selenium參數獲取時間" + (end - stat));}return null;}public static List<interfaceNodeData> seleniumGetDocument(WebDriver driver, BrowserMobProxy proxy, String url,Boolean isTime) {List<interfaceNodeData> dataList = new ArrayList<>();try {// 啟用MITM抓取HTTPSproxy.setMitmManager(new ImpersonatingMitmManager.Builder().trustAllServers(true).build());proxy.setHarCaptureTypes(CaptureType.REQUEST_CONTENT, CaptureType.RESPONSE_CONTENT);proxy.newHar("zzkj");// 訪問頁面并等待driver.get(url);// 等待頁面加載完成new WebDriverWait(driver, Duration.ofSeconds(30)).until(webDriver -> ((JavascriptExecutor) webDriver).executeScript("return document.readyState").equals("complete"));// 驗證請求是否穩定validationAll(proxy);if (isTime){Thread.sleep(10000); // 休眠10秒}// 處理HAR數據Har har = proxy.getHar();processHarEntries(har, url, dataList);} catch (Exception e) {e.printStackTrace();}return dataList;}private static void validationAll(BrowserMobProxy proxy) throws InterruptedException {int retries = 0;int stableCount = 0;int lastEntrySize = 0;while (retries < 30 && stableCount < 3) { // 最多等30秒,穩定3次Thread.sleep(1000); // 每秒檢查一次int currentSize = proxy.getHar().getLog().getEntries().size();if (currentSize == lastEntrySize) {stableCount++;} else {stableCount = 0;lastEntrySize = currentSize;}retries++;}if (retries >= 30) {log.info("----------------------- 請求驗證穩定超時 -----------------------");}}// 處理 HAR 條目并過濾private static void processHarEntries(Har har, String baseUrl, List<interfaceNodeData> interfaceNodeDatas) {har.getLog().getEntries().forEach(entry -> {HarRequest request = entry.getRequest();String method = request.getMethod();String toUrl = request.getUrl();log.info("檢測鏈接:{}",toUrl);// 過濾boolean isStaticResource = toUrl.matches(".*\\.(css|js|png|jpg|jpeg|gif|ico|woff|woff2|svg|mp4|mp3)(\\?.*)?$");boolean isStaticPath = toUrl.contains("/material/") ||toUrl.contains("/fonts/") ||toUrl.contains("/script/") ||toUrl.contains("/login/") ||toUrl.contains("/images/");if (("POST".equalsIgnoreCase(method) || "GET".equalsIgnoreCase(method)) &&
// !filterOutsideUrl(baseUrl, toUrl) &&isCurrentNodeUrl(baseUrl,toUrl) &&!isStaticResource &&!isStaticPath) {interfaceNodeData interfaceNodeData = new interfaceNodeData();interfaceNodeData.setUrl(toUrl);interfaceNodeData.setMethod(method);interfaceNodeData.setData(entry.getResponse().getContent().getText());interfaceNodeData.setParams(ObjectUtils.isEmpty(request.getPostData()) ? "" : request.getPostData().getText());interfaceNodeDatas.add(interfaceNodeData);}});}private static boolean isCurrentNodeUrl(String sourceUrl, String targetUrl) {// 移除協議、轉為小寫、處理末尾斜杠和index.htmlString normalizedSource = normalizeUrl(sourceUrl);String normalizedTarget = normalizeUrl(targetUrl);// 判斷目標URL是否以當前節點URL開頭return !normalizedSource.contains(normalizedTarget);}/*** 標準化URL處理*/private static String normalizeUrl(String url) {// 移除協議頭并轉為小寫String normalized = url.replaceAll("^(http://|https://)", "").toLowerCase();// 移除末尾的 "/" 和 "index.html"normalized = normalized.replaceAll("/+$", "").replaceAll("/index\\.html$", "");return normalized;}}
package com.zzkj.zei.utils;import lombok.Data;/*** FileName: interfaceNodeData* Author: wzk* Date:2025/4/30 17:00*/
@Data
public class interfaceNodeData {String url;String data;String method;String params;interfaceNodeData(){}interfaceNodeData(String url,String method,String data,String params){this.url = url;this.method = method;this.data = data;this.params = params;}
}
?