爬蟲Hook技術常用字段和勾子函數
目錄
- Hook技術概述
- 網絡請求相關Hook
- 瀏覽器環境Hook
- JavaScript引擎Hook
- 加密算法Hook
- 反爬蟲檢測Hook
- 實際應用示例
- Hook工具和框架
Hook技術概述
Hook(鉤子)技術是一種在程序運行時攔截和修改函數調用的技術。在爬蟲中,Hook技術主要用于:
- 繞過反爬蟲檢測
- 獲取加密參數
- 模擬真實瀏覽器行為
- 動態修改請求參數
網絡請求相關Hook
1. XMLHttpRequest Hook
// 攔截XMLHttpRequest的open方法
(function() {const originalOpen = XMLHttpRequest.prototype.open;XMLHttpRequest.prototype.open = function(method, url, async, user, password) {console.log('XHR Request:', {method: method,url: url,async: async,user: user,password: password});// 可以在這里修改請求參數if (url.includes('api.example.com')) {url = url.replace('api.example.com', 'api.hooked.com');}return originalOpen.call(this, method, url, async, user, password);};// 攔截send方法const originalSend = XMLHttpRequest.prototype.send;XMLHttpRequest.prototype.send = function(data) {console.log('XHR Send Data:', data);// 可以在這里修改發送的數據if (data && typeof data === 'string') {data = data.replace('old_value', 'new_value');}return originalSend.call(this, data);};
})();
2. Fetch API Hook
// 攔截fetch方法
(function() {const originalFetch = window.fetch;window.fetch = function(url, options) {console.log('Fetch Request:', {url: url,options: options});// 修改請求頭if (options && options.headers) {options.headers['X-Hooked'] = 'true';}return originalFetch.call(this, url, options).then(response => {console.log('Fetch Response:', response);return response;});};
})();
3. Axios Hook
// 攔截axios請求
(function() {if (window.axios) {// 請求攔截器window.axios.interceptors.request.use(function(config) {console.log('Axios Request Config:', config);// 修改請求頭config.headers['X-Hooked'] = 'true';// 修改請求數據if (config.data) {config.data.hooked = true;}return config;}, function(error) {return Promise.reject(error);});// 響應攔截器window.axios.interceptors.response.use(function(response) {console.log('Axios Response:', response);return response;}, function(error) {console.log('Axios Error:', error);return Promise.reject(error);});}
})();
瀏覽器環境Hook
1. Navigator對象Hook
// Hook navigator.userAgent
(function() {const originalUserAgent = Object.getOwnPropertyDescriptor(Navigator.prototype, 'userAgent');Object.defineProperty(Navigator.prototype, 'userAgent', {get: function() {const userAgent = originalUserAgent.get.call(this);console.log('UserAgent accessed:', userAgent);// 返回修改后的userAgentreturn 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36';}});
})();// Hook navigator.platform
(function() {const originalPlatform = Object.getOwnPropertyDescriptor(Navigator.prototype, 'platform');Object.defineProperty(Navigator.prototype, 'platform', {get: function() {const platform = originalPlatform.get.call(this);console.log('Platform accessed:', platform);return 'Win32';}});
})();// Hook navigator.language
(function() {const originalLanguage = Object.getOwnPropertyDescriptor(Navigator.prototype, 'language');Object.defineProperty(Navigator.prototype, 'language', {get: function() {const language = originalLanguage.get.call(this);console.log('Language accessed:', language);return 'zh-CN';}});
})();
2. Screen對象Hook
// Hook screen.width和screen.height
(function() {const originalWidth = Object.getOwnPropertyDescriptor(Screen.prototype, 'width');const originalHeight = Object.getOwnPropertyDescriptor(Screen.prototype, 'height');Object.defineProperty(Screen.prototype, 'width', {get: function() {const width = originalWidth.get.call(this);console.log('Screen width accessed:', width);return 1920;}});Object.defineProperty(Screen.prototype, 'height', {get: function() {const height = originalHeight.get.call(this);console.log('Screen height accessed:', height);return 1080;}});
})();
3. Window對象Hook
// Hook window.innerWidth和window.innerHeight
(function() {const originalInnerWidth = Object.getOwnPropertyDescriptor(Window.prototype, 'innerWidth');const originalInnerHeight = Object.getOwnPropertyDescriptor(Window.prototype, 'innerHeight');Object.defineProperty(Window.prototype, 'innerWidth', {get: function() {const width = originalInnerWidth.get.call(this);console.log('Inner width accessed:', width);return 1366;}});Object.defineProperty(Window.prototype, 'innerHeight', {get: function() {const height = originalInnerHeight.get.call(this);console.log('Inner height accessed:', height);return 768;}});
})();
4. Document對象Hook
// Hook document.cookie
(function() {const originalCookie = Object.getOwnPropertyDescriptor(Document.prototype, 'cookie');Object.defineProperty(Document.prototype, 'cookie', {get: function() {const cookie = originalCookie.get.call(this);console.log('Cookie accessed:', cookie);return cookie;},set: function(value) {console.log('Cookie set:', value);return originalCookie.set.call(this, value);}});
})();// Hook document.referrer
(function() {const originalReferrer = Object.getOwnPropertyDescriptor(Document.prototype, 'referrer');Object.defineProperty(Document.prototype, 'referrer', {get: function() {const referrer = originalReferrer.get.call(this);console.log('Referrer accessed:', referrer);return 'https://www.google.com/';}});
})();
JavaScript引擎Hook
1. Date對象Hook
// Hook Date構造函數
(function() {const originalDate = Date;Date = function(...args) {console.log('Date constructor called with:', args);if (args.length === 0) {// 返回固定時間return new originalDate('2023-01-01T00:00:00.000Z');}return new originalDate(...args);};// 復制靜態方法Date.now = originalDate.now;Date.parse = originalDate.parse;Date.UTC = originalDate.UTC;
})();// Hook Date.now()
(function() {const originalNow = Date.now;Date.now = function() {const now = originalNow();console.log('Date.now() called:', now);// 返回固定時間戳return 1672531200000; // 2023-01-01 00:00:00};
})();
2. Math對象Hook
// Hook Math.random()
(function() {const originalRandom = Math.random;Math.random = function() {const random = originalRandom();console.log('Math.random() called:', random);// 返回固定值或修改后的值return 0.5;};
})();// Hook Math.floor()
(function() {const originalFloor = Math.floor;Math.floor = function(x) {const result = originalFloor(x);console.log('Math.floor() called with:', x, 'result:', result);return result;};
})();
3. JSON對象Hook
// Hook JSON.stringify()
(function() {const originalStringify = JSON.stringify;JSON.stringify = function(value, replacer, space) {console.log('JSON.stringify() called with:', value);const result = originalStringify(value, replacer, space);console.log('JSON.stringify() result:', result);return result;};
})();// Hook JSON.parse()
(function() {const originalParse = JSON.parse;JSON.parse = function(text, reviver) {console.log('JSON.parse() called with:', text);const result = originalParse(text, reviver);console.log('JSON.parse() result:', result);return result;};
})();
加密算法Hook
1. Crypto API Hook
// Hook crypto.getRandomValues()
(function() {const originalGetRandomValues = crypto.getRandomValues;crypto.getRandomValues = function(array) {console.log('crypto.getRandomValues() called with:', array);const result = originalGetRandomValues.call(this, array);console.log('crypto.getRandomValues() result:', result);return result;};
})();// Hook crypto.subtle.digest()
(function() {const originalDigest = crypto.subtle.digest;crypto.subtle.digest = function(algorithm, data) {console.log('crypto.subtle.digest() called with:', {algorithm: algorithm,data: data});return originalDigest.call(this, algorithm, data).then(result => {console.log('crypto.subtle.digest() result:', result);return result;});};
})();
2. 常見加密庫Hook
// Hook CryptoJS
(function() {if (window.CryptoJS) {// Hook MD5const originalMD5 = CryptoJS.MD5;CryptoJS.MD5 = function(message, options) {console.log('CryptoJS.MD5() called with:', message);const result = originalMD5(message, options);console.log('CryptoJS.MD5() result:', result.toString());return result;};// Hook SHA256const originalSHA256 = CryptoJS.SHA256;CryptoJS.SHA256 = function(message, options) {console.log('CryptoJS.SHA256() called with:', message);const result = originalSHA256(message, options);console.log('CryptoJS.SHA256() result:', result.toString());return result;};// Hook AESconst originalAES = CryptoJS.AES;CryptoJS.AES = {encrypt: function(message, key, options) {console.log('CryptoJS.AES.encrypt() called with:', {message: message,key: key,options: options});const result = originalAES.encrypt(message, key, options);console.log('CryptoJS.AES.encrypt() result:', result.toString());return result;},decrypt: function(ciphertext, key, options) {console.log('CryptoJS.AES.decrypt() called with:', {ciphertext: ciphertext,key: key,options: options});const result = originalAES.decrypt(ciphertext, key, options);console.log('CryptoJS.AES.decrypt() result:', result.toString());return result;}};}
})();
反爬蟲檢測Hook
1. WebDriver檢測Hook
// Hook webdriver屬性
(function() {Object.defineProperty(navigator, 'webdriver', {get: function() {console.log('webdriver property accessed');return false; // 返回false表示不是webdriver}});
})();// Hook chrome對象
(function() {if (!window.chrome) {window.chrome = {runtime: {},loadTimes: function() {return {commitLoadTime: 0,connectionInfo: 'h2',finishDocumentLoadTime: 0,finishLoadTime: 0,firstPaintAfterLoadTime: 0,navigationType: 'Other',npnNegotiatedProtocol: 'h2',requestTime: 0,startLoadTime: 0,wasAlternateProtocolAvailable: false,wasFetchedViaSpdy: true,wasNpnNegotiated: true};}};}
})();
2. 指紋檢測Hook
// Hook canvas指紋
(function() {const originalToDataURL = HTMLCanvasElement.prototype.toDataURL;HTMLCanvasElement.prototype.toDataURL = function(type, quality) {console.log('Canvas toDataURL() called');const result = originalToDataURL.call(this, type, quality);// 可以在這里修改canvas指紋if (type === 'image/png') {// 返回固定的canvas指紋return '';}return result;};
})();// Hook WebGL指紋
(function() {const originalGetParameter = WebGLRenderingContext.prototype.getParameter;WebGLRenderingContext.prototype.getParameter = function(parameter) {console.log('WebGL getParameter() called with:', parameter);const result = originalGetParameter.call(this, parameter);// 修改某些WebGL參數if (parameter === 37445) { // UNMASKED_VENDOR_WEBGLreturn 'Intel Inc.';}if (parameter === 37446) { // UNMASKED_RENDERER_WEBGLreturn 'Intel Iris OpenGL Engine';}return result;};
})();
3. 行為檢測Hook
// Hook鼠標事件
(function() {const originalAddEventListener = EventTarget.prototype.addEventListener;EventTarget.prototype.addEventListener = function(type, listener, options) {console.log('addEventListener() called with:', {type: type,listener: listener,options: options});// 如果是鼠標事件,可以在這里添加隨機延遲if (type.startsWith('mouse')) {const originalListener = listener;listener = function(event) {console.log('Mouse event triggered:', type, event);return originalListener.call(this, event);};}return originalAddEventListener.call(this, type, listener, options);};
})();// Hook鍵盤事件
(function() {const originalKeyEvent = KeyboardEvent.prototype;const originalKeyDown = originalKeyEvent.key;const originalKeyCode = originalKeyEvent.keyCode;Object.defineProperty(originalKeyEvent, 'key', {get: function() {const key = originalKeyDown.get.call(this);console.log('Keyboard key accessed:', key);return key;}});Object.defineProperty(originalKeyEvent, 'keyCode', {get: function() {const keyCode = originalKeyCode.get.call(this);console.log('Keyboard keyCode accessed:', keyCode);return keyCode;}});
})();
實際應用示例
1. 完整的反檢測Hook腳本
// 完整的反檢測Hook腳本
(function() {'use strict';console.log('Anti-detection hooks loaded');// 1. 隱藏webdriverObject.defineProperty(navigator, 'webdriver', {get: () => false});// 2. 修改userAgentObject.defineProperty(navigator, 'userAgent', {get: () => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'});// 3. 添加chrome對象if (!window.chrome) {window.chrome = {runtime: {},loadTimes: () => ({commitLoadTime: 0,connectionInfo: 'h2',finishDocumentLoadTime: 0,finishLoadTime: 0,firstPaintAfterLoadTime: 0,navigationType: 'Other',npnNegotiatedProtocol: 'h2',requestTime: 0,startLoadTime: 0,wasAlternateProtocolAvailable: false,wasFetchedViaSpdy: true,wasNpnNegotiated: true})};}// 4. Hook canvas指紋const originalToDataURL = HTMLCanvasElement.prototype.toDataURL;HTMLCanvasElement.prototype.toDataURL = function(type, quality) {if (type === 'image/png') {return '';}return originalToDataURL.call(this, type, quality);};// 5. Hook WebGL指紋const originalGetParameter = WebGLRenderingContext.prototype.getParameter;WebGLRenderingContext.prototype.getParameter = function(parameter) {if (parameter === 37445) return 'Intel Inc.';if (parameter === 37446) return 'Intel Iris OpenGL Engine';return originalGetParameter.call(this, parameter);};// 6. Hook Math.randomconst originalRandom = Math.random;Math.random = function() {const random = originalRandom();// 可以在這里添加隨機性return random;};// 7. Hook Date.nowconst originalNow = Date.now;Date.now = function() {const now = originalNow();// 可以在這里添加時間偏移return now;};console.log('Anti-detection hooks completed');
})();
2. 加密參數Hook腳本
// 加密參數Hook腳本
(function() {'use strict';console.log('Encryption parameter hooks loaded');// Hook CryptoJSif (window.CryptoJS) {const originalMD5 = CryptoJS.MD5;CryptoJS.MD5 = function(message, options) {console.log('MD5 input:', message);const result = originalMD5(message, options);console.log('MD5 output:', result.toString());return result;};const originalSHA256 = CryptoJS.SHA256;CryptoJS.SHA256 = function(message, options) {console.log('SHA256 input:', message);const result = originalSHA256(message, options);console.log('SHA256 output:', result.toString());return result;};}// Hook crypto APIconst originalGetRandomValues = crypto.getRandomValues;crypto.getRandomValues = function(array) {console.log('getRandomValues input:', array);const result = originalGetRandomValues.call(this, array);console.log('getRandomValues output:', result);return result;};// Hook JSON.stringifyconst originalStringify = JSON.stringify;JSON.stringify = function(value, replacer, space) {console.log('JSON.stringify input:', value);const result = originalStringify(value, replacer, space);console.log('JSON.stringify output:', result);return result;};console.log('Encryption parameter hooks completed');
})();
3. 網絡請求Hook腳本
// 網絡請求Hook腳本
(function() {'use strict';console.log('Network request hooks loaded');// Hook XMLHttpRequestconst originalOpen = XMLHttpRequest.prototype.open;const originalSend = XMLHttpRequest.prototype.send;XMLHttpRequest.prototype.open = function(method, url, async, user, password) {console.log('XHR Open:', {method, url, async, user, password});return originalOpen.call(this, method, url, async, user, password);};XMLHttpRequest.prototype.send = function(data) {console.log('XHR Send:', data);return originalSend.call(this, data);};// Hook fetchconst originalFetch = window.fetch;window.fetch = function(url, options) {console.log('Fetch Request:', {url, options});return originalFetch.call(this, url, options).then(response => {console.log('Fetch Response:', response);return response;});};// Hook axiosif (window.axios) {window.axios.interceptors.request.use(function(config) {console.log('Axios Request:', config);return config;});window.axios.interceptors.response.use(function(response) {console.log('Axios Response:', response);return response;});}console.log('Network request hooks completed');
})();
Hook工具和框架
1. 瀏覽器擴展
- Tampermonkey: 用戶腳本管理器
- Greasemonkey: Firefox用戶腳本管理器
- Violentmonkey: 現代化的用戶腳本管理器
2. 代理工具
- Fiddler: 網絡調試代理
- Charles: 網絡代理工具
- Burp Suite: Web應用安全測試工具
3. 瀏覽器自動化
- Puppeteer: Node.js瀏覽器自動化
- Selenium: 瀏覽器自動化框架
- Playwright: 現代化的瀏覽器自動化
4. 移動端Hook
- Frida: 動態插樁工具
- Xposed: Android框架Hook
- Substrate: iOS越獄Hook框架
本文檔提供了爬蟲中常用的Hook技術和勾子函數,建議在實際使用中根據具體需求進行調整和優化。請注意遵守相關法律法規和網站使用條款。