一、入口定位
入口定位-- 關鍵字搜索-- 方法關鍵字--最簡單,最高效的 排第一-- encrypt 加密-- decrypt 解密-- JSON.stringify 給一個JS對象做Json字符串處理的把一個對象轉換為Json字符串JSON.stringify({a:'1',b:"2"})'{"a":"1","b":"2"}'-- JSON.parse 把一個JS字符串轉換為JS的對象JSON.parse('{"a":"1","b":"2"}'){a: '1', b: '2'}備注:當我們要搜索的key放到headers中時,我們可以搜索headers;如果搜索的key放到了請求體中,我們就需要搜索JSON.stringify,因為結構化數據沒辦法傳輸,只能傳輸字符串形式的。當我們發送完請求,服務器給我們返回數據以后,基本上返回的數據都是Json字符串,所以我們就需要把Json字符串轉換為結構化對象,就需要用到JSON.parse
-- key關鍵字--最高頻,用的最多 排第二 例如:portal_sign-- 攔截器關鍵字--比較有利的補充 排第三interceptors.request.use(func)interceptors.response.use(func)t.headers["p"+"o"+"r"+"t"+"a"+"l" +"-" +"s" +"i" +"g" +"n"] = f.getxxx(e),-- headers關鍵字 --偶爾會用 排第四-- 路徑關鍵字 --偶爾會用 排第四請求入口定位,與響應無關-- 請求堆棧請求入口定位,與響應無關
二、攔截器關鍵字
?因為很多的接口可能都需要相同的解密操作,所以前端開發人員就把相同功能的代碼抽取出來集中放到了攔截器中,降低了代碼冗余,
?很多一些中小網站,都不會針對每個接口寫一個獨立的加密 / 解密代碼,通常都會讓很多接口使用相同的加密 /解密 方式,并且為了減少代碼冗余,實現代碼的高內聚、低耦合,都會把共用的代碼放到攔截器中,在攔截器關鍵字搜索中,我們通常加密的情況下搜索【interceptors.request.use】,解密搜索【interceptors.response.use】,所以通搜索得出下圖
?三、路徑關鍵字
3.1、根據路徑關鍵字定位參數? ?
正則找單詞邊界進行定位
?3.2、一層一層,深入定位請求頭
?四、請求堆棧
補充內容:斷點擴展
普通斷點:我們平時使用的斷點,只要是走到這里就卡住,只要執行到就卡住
條件斷點:點擊修改斷點,修改為條件斷點
日志斷點
XHR斷點
4.1、條件斷點:當在什么時間卡住
?4.2、XHR斷點,等價于條件斷點中的includes? ?建議用這個
?五、Day13&Day14作業案例二
破解網站:https://www.swhysc.com/swhysc/news/company
5.1、JS文件:01 swhy.js
const cryptoJs = require("crypto-js")function decrypt_data(e) {var t = cryptoJs.enc.Utf8.parse("rewin-swhysc1234"), n = cryptoJs.AES.decrypt(e, t, {mode: cryptoJs.mode.ECB,padding: cryptoJs.pad.Pkcs7});return cryptoJs.enc.Utf8.stringify(n).toString() }// 測試 /*第一步把JS代碼拷貝出來以后,先做一下測試,第二步肯定會報錯,報找不到u.a第三步npm安裝第三方庫 crypto-js,安裝到對應的文件下,導入然后替換然后Ctrl+r 替換,把u.a替換為第三方庫cryptoJs注意:ECB模式,是不需要iv的,只需要有key就行 key應該是固定的,點擊頁碼進行多次測試即可確定不是CBC*/ data = "" console.log(decrypt_data(data))
5.2、Python文件:02 swhy.py
import execjs import requestscookies = {'Hm_lvt_553ce4fa7b2bd3ea6d85c1fb6b901c6c': '1755603688','HMACCOUNT': '1D88C5C5B0786DD8','zh_choose': 's','sajssdk_2015_cross_new_user': '1','sensorsdata2015jssdkcross': '%7B%22distinct_id%22%3A%22198c2226013cf0-0c2b796ee5f188-26001151-2073600-198c2226014af4%22%2C%22first_id%22%3A%22%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%2C%22%24latest_referrer%22%3A%22%22%7D%2C%22identities%22%3A%22eyIkaWRlbnRpdHlfY29va2llX2lkIjoiMTk4YzIyMjYwMTNjZjAtMGMyYjc5NmVlNWYxODgtMjYwMDExNTEtMjA3MzYwMC0xOThjMjIyNjAxNGFmNCJ9%22%2C%22history_login_id%22%3A%7B%22name%22%3A%22%22%2C%22value%22%3A%22%22%7D%2C%22%24device_id%22%3A%22198c2226013cf0-0c2b796ee5f188-26001151-2073600-198c2226014af4%22%7D','Hm_lpvt_553ce4fa7b2bd3ea6d85c1fb6b901c6c': '1755603826', }headers = {'Accept': 'application/json, text/plain, */*','Accept-Language': 'zh-CN,zh;q=0.9','Connection': 'keep-alive',# 'Cookie': 'Hm_lvt_553ce4fa7b2bd3ea6d85c1fb6b901c6c=1755603688; HMACCOUNT=1D88C5C5B0786DD8; zh_choose=s; sajssdk_2015_cross_new_user=1; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%22198c2226013cf0-0c2b796ee5f188-26001151-2073600-198c2226014af4%22%2C%22first_id%22%3A%22%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%2C%22%24latest_referrer%22%3A%22%22%7D%2C%22identities%22%3A%22eyIkaWRlbnRpdHlfY29va2llX2lkIjoiMTk4YzIyMjYwMTNjZjAtMGMyYjc5NmVlNWYxODgtMjYwMDExNTEtMjA3MzYwMC0xOThjMjIyNjAxNGFmNCJ9%22%2C%22history_login_id%22%3A%7B%22name%22%3A%22%22%2C%22value%22%3A%22%22%7D%2C%22%24device_id%22%3A%22198c2226013cf0-0c2b796ee5f188-26001151-2073600-198c2226014af4%22%7D; Hm_lpvt_553ce4fa7b2bd3ea6d85c1fb6b901c6c=1755603826','Referer': 'https://www.swhysc.com/swhysc/news/company','Sec-Fetch-Dest': 'empty','Sec-Fetch-Mode': 'cors','Sec-Fetch-Site': 'same-origin','User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36','Xdemeter': '{"DeviceType":"PW"}','sec-ch-ua': '"Chromium";v="128", "Not;A=Brand";v="24", "Google Chrome";v="128"','sec-ch-ua-mobile': '?0','sec-ch-ua-platform': '"Windows"', }params = {'topFlag': '3','pageSize': '10','status': '2','pageNum': '2','channelId': '00010002000100030001', }response = requests.get('https://www.swhysc.com/swhy/service/wscms/v1/cms/infobaselist',params=params,cookies=cookies,headers=headers, )# 1、通過https://curlconverter.com/,自動生成基礎爬蟲請求代碼 # 2、打印返回的數據,查看是否是解密數據 # print(response.text) 可以正常返回加密,證明生成的請求代碼沒問題# 先打開,然后逐行讀取JS代碼 with open("01 swhy.js", encoding="utf-8") as f:js_code = f.read()# 獲取JS代碼的編譯器 js_compile = execjs.compile(js_code)# 通過JS代碼編譯器,調用decrypt_data方法 data = js_compile.call("decrypt_data", response.text) print(data)
六、案例三:清華大學大學排名
地址鏈接:https://www.shanghairanking.cn/institution/tsinghua-university 爬取數據地址鏈接:https://www.shanghairanking.cn/api/v2010/univ_comm/univ/tsinghua-university
現在已經把加密的authorization獲取到了,所以我們就需要在Python代碼中調用該JS代碼,把生成的加密authorization放到heaers中即可
代碼升級:可以獲取到任意學校的數據
并且時間戳也完全符合JS代碼中的邏輯進行替換
6.1、JS代碼文件:04 清華大學排名.js
const cryptoJs = require("crypto-js")var v = "/api", f = {arr0: [161, 65, 7, 6, 94, 210, 25, 42, 44, 89, 27, 57, 139, 56, 189, 28, 73, 107, 165, 33, 137, 63, 177, 185, 161, 91, 82, 130, 147, 159, 62, 45, 62, 141, 0, 60] }, h = null, m = function (t) {var e = f.arr0.length, n = new Array(e);return f.arr0.forEach((function (o, i) {n[i] = o ^ t[e - 1 - i]})),String.fromCodePoint.apply(String, n) } x = function (t, e, n, o, base) {var r = m([10, 52, 187, 12, 28, 14, 168, 164, 183, 51, 56, 145, 148, 134, 12, 190, 64, 136, 88, 112, 36, 137, 21, 191, 13, 42, 96, 1, 78, 46, 183, 111, 55, 49, 118, 151]), l = function (t) {if (!(t && t instanceof Object))return "";var e = Object.keys(t);return e.sort(),e.map((function (e) {return e + "=" + t[e]})).join("&")}(n), d = e.replace(base, "");d = d.replace("/api", "");var v = t.toUpperCase() + " " + d + " " + l, f = o + (new Date).getTime(), h = "3#" + r + "#" + v + "#" + f, x = "3:" + cryptoJs.SHA256(h) + ":" + f;return x = cryptoJs.enc.Utf8.parse(x),cryptoJs.enc.Base64.stringify(x) }function get_authorization(timer, school) {let method = "get"let url = "/api/v2010/univ_comm/univ/" + school // 根據學校獲取對應學校的加密值let n = {}let h = timer - (new Date).getTime()let v = "/api"return x(method, url, n, h, v) }console.log(get_authorization())
6.2、Python代碼文件:04 清華大學排名.py
import requests import execjscookies = {'Hm_lvt_af1fda4748dacbd3ee2e3a69c3496570': '1755675277','HMACCOUNT': '0AEF3215315FCD60','_clck': '1tsp5b3%5E2%5Efym%5E0%5E2058','Hm_lpvt_af1fda4748dacbd3ee2e3a69c3496570': '1755675292','_clsk': 'f3dj7u%5E1755675292420%5E2%5E1%5Ej.clarity.ms%2Fcollect', } headers = {'accept': 'application/json, text/plain, */*','accept-language': 'zh-CN,zh;q=0.9',# 'authorization': 'MzplZjJlYzBmNWNiMGU0M2FkYzY0MTg1ZDRjOTcxYWM5MWQ2MzEwNDdjM2EwZjJjOTQzMmUxNDJhNDMwYzlkNWIyOjE3NTU2NzUyOTI4MDk=',# 'cookie': 'Hm_lvt_af1fda4748dacbd3ee2e3a69c3496570=1755675277; HMACCOUNT=0AEF3215315FCD60; _clck=1tsp5b3%5E2%5Efym%5E0%5E2058; Hm_lpvt_af1fda4748dacbd3ee2e3a69c3496570=1755675292; _clsk=f3dj7u%5E1755675292420%5E2%5E1%5Ej.clarity.ms%2Fcollect','priority': 'u=1, i','referer': 'https://www.shanghairanking.cn/institution/tsinghua-university','sec-ch-ua': '"Chromium";v="128", "Not;A=Brand";v="24", "Google Chrome";v="128"','sec-ch-ua-mobile': '?0','sec-ch-ua-platform': '"Windows"','sec-fetch-dest': 'empty','sec-fetch-mode': 'cors','sec-fetch-site': 'same-origin','user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36', }def get_timestamp():# (1)發起請求獲取服務器時間戳response = requests.get('https://www.shanghairanking.cn/api/pub/v1/ms', cookies=cookies, headers=headers)print("time:::", response.text) # time::: {"code":200,"msg":"success","data":1755684227463}return response.json().get("data") # 把返回數據中的data字段返回def main():# 把需要獲取的學校的名稱提取出來school = "university-of-jinan"# 發送請求獲取服務器時間戳timer = get_timestamp()# 讀取JS代碼獲取加密的authorization值authorization = execjs.compile(open("04 清華大學排名.js", encoding="utf-8").read()).call("get_authorization", timer, school)print("authorization:::", authorization) # 可以成功獲取到headers["authorization"] = authorizationresponse = requests.get(f'https://www.shanghairanking.cn/api/v2010/univ_comm/univ/{school}',cookies=cookies,headers=headers,)print(response.text)main()