一、TP5.0通過行為(Behavior)攔截爬蟲并避免生成 [ error ] NULL 錯誤日志
1. 創建行為類(攔截爬蟲)
在 application/common/behavior 目錄下新建BlockBot.php ,用于識別并攔截爬蟲請求:
<?php
namespace app\common\behavior;use think\Response;class BlockBot
{// 爬蟲User-Agent特征列表protected $botPatterns = ['/bot/i', '/spider/i', '/curl/i', '/wget/i', '/python/i', '/scrapy/i', '/crawl/i', '/httpclient/i',// 下面是自己添加的爬蟲'/toutiao/i','/zhanzhang.toutiao.com/i','/dataforseo/i','/dataforseo.com/i','/dataforseo-bot/i','/semrush/i','/www.semrush.com/i','/YisouSpider/i',];// 白名單(搜索引擎合法爬蟲)protected $allowPatterns = ['/googlebot/i', '/bingbot/i', '/baiduspider/i', '/Sogou web spider/i'];public function run(){$request = request();$userAgent = $request->header('user-agent', '');$path = $request->path();// 白名單放行foreach ($this->allowPatterns as $pattern) {if (preg_match($pattern, $userAgent)) {trace("放行{$pattern}爬蟲: UA={$userAgent}, Path={$path}", 'info');return;}}// 黑名單攔截foreach ($this->botPatterns as $pattern) {if (preg_match($pattern, $userAgent)) {// 靜默記錄日志(不觸發錯誤)trace("Blocked Bot: UA={$userAgent}, Path={$path}", 'info');// 靜默攔截(不記錄錯誤日志)$this->silentBlock();}}}/*** 靜默攔截邏輯*/private function silentBlock(){// 返回404頁面(或自定義響應)$response = Response::create()->code(404)->data('Access Denied')->header(['Content-Type' => 'text/plain']);// 終止后續執行throw new \think\exception\HttpResponseException($response);}}
2. 注冊行為到請求事件
在 application/tags.php 中綁定行為到 app_init事件(應用初始化):
return [// 應用初始化'app_init' => ['app\\common\\behavior\\BlockBot', //爬蟲攔截],
];
3. 自定義異常處理(防止錯誤日志)
(1) 創建異常處理類
在 application/common/exception 下新建 ExceptionHandler.php,覆蓋默認錯誤處理:
<?php
namespace app\common\exception;use think\exception\Handle;
use think\exception\RouteNotFoundException;
use think\exception\ValidateException;
use think\Response;class ExceptionHandler extends Handle
{public function render(\Exception $e){// 攔截路由不存在錯誤(常見于爬蟲探測)if ($e instanceof RouteNotFoundException) {return $this->silentResponse(404);}// 攔截參數驗證錯誤(如分頁參數過大)if ($e instanceof ValidateException) {return $this->silentResponse(400);}// 其他錯誤靜默記錄(可選)trace("Silent Error: " . $e->getMessage(), 'error');return parent::render($e);}/*** 靜默響應(不記錄日志)*/private function silentResponse($code){return Response::create()->code($code)->data('')->header(['Content-Type' => 'text/plain']);}
}
(2) 配置異常處理
在 application/config.php 中指定自定義異常處理器:
// 異常處理配置
'exception_handle' => 'app\common\exception\ExceptionHandler',
4. Nginx層優化(可選)
在服務器配置中攔截部分爬蟲并靜默處理:
server {listen 80;server_name yourdomain.com;# 攔截爬蟲User-Agent并靜默處理if ($http_user_agent ~* (bot|spider|python|curl|wget)) {access_log off; # 不記錄訪問日志return 444; # 靜默關閉連接}# 不記錄404錯誤日志error_page 404 = /404;location = /404 {internal;access_log off;}# ThinkPHP偽靜態規則location / {if (!-e $request_filename) {rewrite ^(.*)$ /index.php?s=$1 last;break;}}
}
二、TP5.1或TP6.0通過中間件(middleware)攔截爬蟲并避免生成 [ error ] NULL 錯誤日志
以TP5.1演示:
1. 創建攔截中間件(核心邏輯)
在 application/common/middleware 下新建 BlockBot.php,實現 雙重防護:
<?php
namespace app\common\middleware;use think\Response;class BlockBot
{// 爬蟲User-Agent特征列表protected $botPatterns = ['/bot/i', '/spider/i', '/curl/i', '/wget/i', '/python/i', '/scrapy/i', '/crawl/i', '/httpclient/i'];// 白名單(搜索引擎合法爬蟲)protected $allowPatterns = ['/googlebot/i', '/bingbot/i', '/baiduspider/i'];public function handle($request, \Closure $next){$userAgent = $request->header('user-agent', '');$path = $request->pathinfo();// 放行白名單爬蟲foreach ($this->allowPatterns as $pattern) {if (preg_match($pattern, $userAgent)) {return $next($request);}}// 攔截黑名單爬蟲foreach ($this->botPatterns as $pattern) {if (preg_match($pattern, $userAgent)) {// 靜默記錄日志(不觸發錯誤)trace("Blocked Bot: UA={$userAgent}, Path={$path}", 'info');// 直接返回404或403,避免后續邏輯執行return response('', 404)->header(['Content-Type' => 'text/html; charset=utf-8']);}}return $next($request);}
}
2. 注冊中間件(全局生效)
修改 application/config.php 配置,確保中間件在最優先執行:
// 中間件配置
'middleware' => ['app\common\middleware\BlockBot', // 添加此行到最前面// ...其他中間件
],
3. 防止生成 [ error ] NULL 日志
(1) 自定義錯誤處理(覆蓋ThinkPHP5默認行為)
在 application/config.php 中配置:
// 錯誤處理配置
'exception_handle' => 'app\common\exception\ExceptionHandler',
創建 application/common/exception/ExceptionHandler.php:
<?php
namespace app\common\exception;use think\exception\Handle;
use think\exception\RouteNotFoundException;
use think\exception\ValidateException;
use think\Response;class ExceptionHandler extends Handle
{public function render(\Exception $e){// 攔截路由不存在錯誤(常見于爬蟲探測)if ($e instanceof RouteNotFoundException) {return $this->silentResponse(404);}// 攔截參數驗證錯誤(如分頁過大)if ($e instanceof ValidateException) {return $this->silentResponse(400);}// 其他錯誤按需處理(此處靜默記錄)trace("Silent Error: " . $e->getMessage(), 'error');return parent::render($e);}/*** 靜默響應(不記錄日志)*/private function silentResponse($code){return Response::create()->code($code)->data('')->header(['Content-Type' => 'text/plain']);}
}
(2) 配置日志過濾
修改 application/config.php 忽略部分錯誤類型:
// 日志配置
'log' => ['type' => 'File','level' => ['error', 'sql'],'apart_level' => ['error', 'sql'],'ignore_error' => [// 忽略路由不存在錯誤(避免生成 [ error ] NULL 日志)'think\exception\RouteNotFoundException',],
],
4. Nginx層優化(雙重防護)
server {listen 80;server_name yourdomain.com;# 攔截爬蟲User-Agent并靜默處理if ($http_user_agent ~* (bot|spider|python|curl|wget)) {access_log off; # 不記錄訪問日志return 444; # 靜默關閉連接}# FastAdmin偽靜態規則location / {if (!-e $request_filename) {rewrite ^(.*)$ /index.php?s=$1 last;break;}}# 不記錄404錯誤日志error_page 404 = /404.html;location = /404.html {internal;access_log off;}
}
注意:經查,網站一直寫入[ error ] NULL日志,是因為框架文件thinkphp\library\think\App.php文件代碼被惡意篡改
![在這里插入圖片描述](https://i-blog.csdnimg.cn/direct/3ad9271f561e4ce9ac4d5c9f2867de63.png