保留 HTML/XML 標簽結構,僅翻譯文本內容,避免破壞富文本格式。采用「HTML 解析 → 文本提取 → 批量翻譯 → 回填」的流程。
百度翻譯集成方案:富文本內容翻譯系統
HTML 解析 + 百度翻譯 API 集成
文件結構
app/
├── Controller/
│ └── TranslationController.php
├── Service/
│ ├── BaiduTranslator.php
│ └── HtmlParser.php
├── Job/
│ └── TranslateContentJob.php
├── Model/
│ └── Article.php
config/
│ └── autoload/
│ └── translation.php
代碼實現
- 配置文件 config/autoload/translation.php
<?phpreturn ['baidu' => ['appid' => env('BAIDU_TRANSLATE_APPID', ''),'secret' => env('BAIDU_TRANSLATE_SECRET', ''),'api_url' => 'https://fanyi-api.baidu.com/api/trans/vip/fieldtranslate','chunk_size' => 30, // 每次翻譯的文本段落數'max_length' => 5000, // 單次請求最大字節數'preserve_tags' => 'p,div,span,h1,h2,h3,h4,h5,h6,ul,ol,li,table,tr,td,th,img,video,a,strong,em,b,i,u','ignore_tags' => 'code,pre,script,style',],
];
- 模型 app/Model/Article.php
<?phpdeclare(strict_types=1);namespace App\Model;use Hyperf\DbConnection\Model\Model;/*** @property int $id * @property string $title * @property string $content * @property string $en_content * @property int $translation_status 0-未翻譯 1-翻譯中 2-翻譯完成 3-翻譯失敗* @property \Carbon\Carbon $created_at * @property \Carbon\Carbon $updated_at */
class Article extends Model
{const STATUS_PENDING = 0;const STATUS_PROCESSING = 1;const STATUS_COMPLETED = 2;const STATUS_FAILED = 3;protected ?string $table = 'articles';protected array $fillable = ['title', 'content', 'en_content', 'translation_status'];protected array $casts = ['id' => 'integer','translation_status' => 'integer','created_at' => 'datetime','updated_at' => 'datetime'];
}
- HTML 解析器 app/Service/HtmlParser.php
<?phpdeclare(strict_types=1);namespace App\Service;use voku\helper\HtmlDomParser;class HtmlParser
{public function extractTextNodes(string $html): array{$dom = HtmlDomParser::str_get_html($html);$textNodes = [];// 遍歷所有元素$dom->filter('*')->each(function ($node) use (&$textNodes) {// 跳過忽略標簽$ignoreTags = explode(',', config('translation.baidu.ignore_tags', 'code,pre,script,style'));if (in_array($node->tag, $ignoreTags)) {return;}// 只處理沒有子元素的文本節點if ($node->text() && !$node->hasChildren()) {$textNodes[] = ['node' => $node,'text' => $node->text(),'outer_html' => $node->outerhtml];}// 處理圖片alt屬性if ($node->tag === 'img' && $node->alt) {$textNodes[] = ['node' => $node,'text' => $node->alt,'is_attr' => 'alt','outer_html' => $node->outerhtml];}// 處理視頻標題if ($node->tag === 'video' && $node->title) {$textNodes[] = ['node' => $node,'text' => $node->title,'is_attr' => 'title','outer_html' => $node->outerhtml];}});return $textNodes;}public function replaceTranslatedText(array $originalNodes, array $translatedTexts): string{$html = '';$index = 0;foreach ($originalNodes as $nodeInfo) {if (isset($nodeInfo['is_attr'])) {// 屬性翻譯$nodeInfo['node']->setAttribute($nodeInfo['is_attr'], $translatedTexts[$index] ?? $nodeInfo['text']);} else {// 文本節點翻譯$nodeInfo['node']->innertext = htmlspecialchars($translatedTexts[$index] ?? $nodeInfo['text']);}$html .= $nodeInfo['node']->outerhtml;$index++;}return $html;}
}
- 百度翻譯服務 app/Service/BaiduTranslator.php
<?phpdeclare(strict_types=1);namespace App\Service;use Hyperf\Di\Annotation\Inject;
use Hyperf\HttpServer\Contract\RequestInterface;
use Hyperf\Guzzle\ClientFactory;
use Hyperf\Utils\Arr;
use Hyperf\Utils\Codec\Json;class BaiduTranslator
{#[Inject]private ClientFactory $clientFactory;#[Inject]private HtmlParser $htmlParser;public function translate(string $html): string{// 1. 提取需要翻譯的文本節點$textNodes = $this->htmlParser->extractTextNodes($html);if (empty($textNodes)) {return $html;}// 2. 提取純文本內容用于翻譯$texts = array_column($textNodes, 'text');// 3. 分批翻譯文本$chunks = array_chunk($texts, config('translation.baidu.chunk_size', 30));$translatedChunks = [];foreach ($chunks as $chunk) {$translatedChunks[] = $this->translateChunk($chunk);}$translatedTexts = array_merge(...$translatedChunks);// 4. 將翻譯結果替換回HTMLreturn $this->htmlParser->replaceTranslatedText($textNodes, $translatedTexts);}private function translateChunk(array $texts): array{$client = $this->clientFactory->create();$appId = config('translation.baidu.appid');$secret = config('translation.baidu.secret');// 生成簽名$salt = time();$combinedText = implode("\n", $texts);$sign = md5($appId . $combinedText . $salt . $secret);// 構造請求參數$params = ['q' => $combinedText,'from' => 'zh','to' => 'en','appid' => $appId,'salt' => $salt,'sign' => $sign,'tag_handling' => '1', // 開啟HTML標簽處理'ignore_tags' => config('translation.baidu.ignore_tags', 'code,pre,script,style'),'preserve_tags' => config('translation.baidu.preserve_tags', 'p,div,span')];try {$response = $client->post(config('translation.baidu.api_url'), ['form_params' => $params,'timeout' => 15]);$result = Json::decode($response->getBody()->getContents());if (isset($result['error_code'])) {throw new \RuntimeException("百度翻譯錯誤: {$result['error_code']} - {$result['error_msg']}");}$translated = $result['trans_result'] ?? [];return array_map(function ($item) {return $item['dst'] ?? '';}, $translated);} catch (\Throwable $e) {// 記錄錯誤并返回原文logger()->error('百度翻譯失敗: ' . $e->getMessage(), ['texts' => $texts,'exception' => $e]);return $texts; // 返回原文作為降級處理}}
}
- 異步任務 app/Job/TranslateContentJob.php
declare(strict_types=1);namespace App\Job;use App\Model\Article;
use App\Service\BaiduTranslator;
use Hyperf\AsyncQueue\Job;class TranslateContentJob extends Job
{public int $articleId;public function __construct(int $articleId){$this->articleId = $articleId;}public function handle(){$article = Article::find($this->articleId);if (!$article) {logger()->warning("文章不存在: {$this->articleId}");return;}try {// 更新狀態為翻譯中$article->translation_status = Article::STATUS_PROCESSING;$article->save();// 執行翻譯$translator = make(BaiduTranslator::class);$enContent = $translator->translate($article->content);// 更新結果$article->en_content = $enContent;$article->translation_status = Article::STATUS_COMPLETED;$article->save();} catch (\Throwable $e) {$article->translation_status = Article::STATUS_FAILED;$article->save();logger()->error("文章翻譯失敗: {$e->getMessage()}", ['article_id' => $this->articleId,'exception' => $e]);}}
}
- 控制器 app/Controller/TranslationController.php
<?phpdeclare(strict_types=1);namespace App\Controller;use App\Job\TranslateContentJob;
use App\Model\Article;
use App\Service\BaiduTranslator;
use Hyperf\Di\Annotation\Inject;
use Hyperf\HttpServer\Annotation\AutoController;
use Hyperf\HttpServer\Annotation\PostMapping;
use Hyperf\AsyncQueue\Driver\DriverFactory;
use Hyperf\Utils\ApplicationContext;/*** @AutoController()*/
class TranslationController
{#[Inject]private BaiduTranslator $translator;/*** 實時翻譯接口* * @PostMapping(path="/translate")*/public function translate(){$html = $this->request->post('content', '');if (empty($html)) {return $this->response->json(['code' => 400,'message' => '內容不能為空']);}try {$start = microtime(true);$translatedContent = $this->translator->translate($html);$time = round(microtime(true) - $start, 3);return $this->response->json(['code' => 0,'message' => '翻譯成功','data' => ['content' => $translatedContent,'time' => $time . 's']]);} catch (\Throwable $e) {return $this->response->json(['code' => 500,'message' => '翻譯失敗: ' . $e->getMessage()]);}}/*** 創建文章并異步翻譯* * @PostMapping(path="/article")*/public function createArticle(){$title = $this->request->post('title', '');$content = $this->request->post('content', '');if (empty($title) || empty($content)) {return $this->response->json(['code' => 400,'message' => '標題和內容不能為空']);}try {// 創建文章$article = Article::create(['title' => $title,'content' => $content,'translation_status' => Article::STATUS_PENDING]);// 加入翻譯隊列$queue = ApplicationContext::getContainer()->get(DriverFactory::class)->get('default');$queue->push(new TranslateContentJob($article->id));return $this->response->json(['code' => 0,'message' => '文章創建成功,翻譯任務已提交','data' => ['id' => $article->id,'translation_status' => $article->translation_status]]);} catch (\Throwable $e) {return $this->response->json(['code' => 500,'message' => '文章創建失敗: ' . $e->getMessage()]);}}/*** 查詢文章翻譯狀態* * @GetMapping(path="/article/status/{id}")*/public function getTranslationStatus($id){$article = Article::find($id);if (!$article) {return $this->response->json(['code' => 404,'message' => '文章不存在']);}$statusMap = [Article::STATUS_PENDING => '等待翻譯',Article::STATUS_PROCESSING => '翻譯中',Article::STATUS_COMPLETED => '翻譯完成',Article::STATUS_FAILED => '翻譯失敗'];return $this->response->json(['code' => 0,'data' => ['id' => $article->id,'status' => $article->translation_status,'status_text' => $statusMap[$article->translation_status] ?? '未知狀態','en_content' => $article->en_content]]);}
}
- 環境配置 .env
BAIDU_TRANSLATE_APPID=your_app_id
BAIDU_TRANSLATE_SECRET=your_secret_key
完結!