在軟件開發中,代碼的迭代優化往往從提升可維護性、可追蹤性入手。本文將詳細解析新增的日志系統改進,以及這些改進如何提升系統的實用性和可調試性。
一、代碼整體背景
代碼實現了一個基于 TF-IDF 算法的問答系統,核心功能包括:
- 加載訓練數據(
training_data.txt
)構建問答庫 - 提取中英文關鍵詞(支持 GBK 編碼中文處理)
- 通過精確匹配和 TF-IDF 相似度計算返回最佳答案
- 支持基礎交互命令(
help
/topics
/exit
等)
其中,改進版在原版本的基礎上,重點新增了日志記錄功能,下面詳細解析具體改進點。
二、核心改進點:新增日志系統
1. 日志相關頭文件與常量定義
代碼新增了日志功能所需的頭文件和常量:
#include <ctime> // 用于日志時間戳
// 日志文件名
const string LOG_FILE = "chat_log.txt";
- 引入
<ctime>
庫用于獲取當前時間,為日志添加時間戳 - 定義
LOG_FILE
常量指定日志文件名(chat_log.txt
),便于統一管理日志存儲路徑
2. 時間戳生成函數:getCurrentTime()
為了讓日志具備時間維度的可追溯性,改進版新增了時間戳生成函數:
// 獲取當前時間字符串(格式: YYYY-MM-DD HH:MM:SS)
string getCurrentTime() {time_t now = time(NULL);struct tm* localTime = localtime(&now);char timeStr[20];sprintf(timeStr, "%04d-%02d-%02d %02d:%02d:%02d",localTime->tm_year + 1900, // 年份轉換(tm_year為從1900開始的偏移量)localTime->tm_mon + 1, // 月份轉換(0-11 → 1-12)localTime->tm_mday,localTime->tm_hour,localTime->tm_min,localTime->tm_sec);return string(timeStr);
}
- 功能:生成
YYYY-MM-DD HH:MM:SS
格式的時間字符串,確保日志記錄的時間精確到秒 - 優勢:統一的時間格式便于后續日志分析(如按時間篩選用戶交互記錄)
3. 日志寫入函數:writeLog()
新增了日志寫入核心函數,負責將信息追加到日志文件:
// 寫入日志信息
void writeLog(const string& type, const string& content) {ofstream logFile(LOG_FILE.c_str(), ios::app); // 追加模式打開if (logFile.is_open()) {logFile << "[" << getCurrentTime() << "] [" << type << "] " << content << endl;logFile.close();} else {cerr << "警告: 無法打開日志文件 " << LOG_FILE << endl;}
}
- 關鍵參數:
type
:日志類型(如 "系統"/"用戶命令"/"用戶輸入"/"系統響應"),用于分類日志content
:日志具體內容
- 實現細節:
- 使用
ios::app
模式打開文件,確保新日志追加到文件末尾(不覆蓋歷史記錄) - 日志格式:
[時間戳] [類型] 內容
,結構清晰,便于閱讀和解析
- 使用
4. 關鍵節點日志記錄
改進版在程序運行的關鍵節點添加了日志記錄,覆蓋系統生命周期和用戶交互的全流程:
場景 | 日志記錄代碼 | 作用 |
---|---|---|
程序啟動 | writeLog("系統", "程序啟動"); | 記錄系統初始化時間,用于排查啟動故障 |
訓練數據加載完成 | sprintf(logMsg, "加載訓練數據完成,共%d條記錄", exactAnswers.size()); writeLog("系統", logMsg); | 記錄數據加載結果,驗證數據是否正確加載 |
用戶輸入命令(help) | writeLog("用戶命令", "輸入help,查看幫助信息"); | 追蹤用戶使用幫助命令的行為 |
用戶輸入命令(topics) | writeLog("用戶命令", "輸入topics,查看可回答話題"); | 分析用戶對話題的關注度 |
用戶輸入空內容 | writeLog("用戶輸入", "空輸入"); | 統計無效輸入情況,優化交互提示 |
用戶輸入問題 | writeLog("用戶輸入", "問題: " + input); | 記錄用戶原始問題,用于后續優化問答庫 |
系統返回答案 | writeLog("系統響應", "精確匹配回答: " + it->second); ?或?writeLog("系統響應", "TF-IDF匹配回答: " + bestAnswer); | 關聯用戶問題與系統答案,分析匹配準確性 |
程序退出 | writeLog("系統", "用戶輸入exit,程序退出"); | 記錄系統終止時間和原因 |
三、改進帶來的核心價值
可追溯性提升
日志記錄了系統從啟動到退出的全流程狀態,以及用戶的每一次交互(輸入內容、執行命令),當系統出現異常時,可通過日志快速定位問題節點(如數據加載失敗、匹配邏輯錯誤等)。用戶行為分析
通過用戶輸入日志(問題、命令),可以統計高頻問題、用戶關注的話題等,為優化問答庫(補充熱門問題答案)提供數據支持。系統調試效率提升
無需通過cout
打印臨時調試信息,日志文件可永久保存,便于復現問題和對比不同版本的運行差異。審計與合規
對于需要留存交互記錄的場景(如簡單的客服系統),日志可作為合規審計的依據。
代碼?
#include <iostream>
#include <fstream>
#include <string>
#include <map>
#include <vector>
#include <cctype>
#include <cmath>
#include <algorithm>
#include <set>
#include <ctime> // 用于日志時間戳
using namespace std;// 日志文件名
const string LOG_FILE = "chat_log.txt";// 獲取當前時間字符串(格式: YYYY-MM-DD HH:MM:SS)
string getCurrentTime() {time_t now = time(NULL);struct tm* localTime = localtime(&now);char timeStr[20];sprintf(timeStr, "%04d-%02d-%02d %02d:%02d:%02d",localTime->tm_year + 1900,localTime->tm_mon + 1,localTime->tm_mday,localTime->tm_hour,localTime->tm_min,localTime->tm_sec);return string(timeStr);
}// 寫入日志信息
void writeLog(const string& type, const string& content) {ofstream logFile(LOG_FILE.c_str(), ios::app); // 追加模式打開if (logFile.is_open()) {logFile << "[" << getCurrentTime() << "] [" << type << "] " << content << endl;logFile.close();} else {cerr << "警告: 無法打開日志文件 " << LOG_FILE << endl;}
}// 判斷是否為中文標點符號(GBK編碼)
bool isChinesePunctuation(unsigned char c1, unsigned char c2) {if ((c1 == 0xA1 && (c2 >= 0xA2 && c2 <= 0xAF)) || (c1 == 0xA3 && (c2 == 0xAC || c2 == 0xAD)) || (c1 == 0xBC && (c2 >= 0x80 && c2 <= 0x8F))) { return true;}return false;
}// 將字符串轉換為小寫(僅處理ASCII字符)
string toLower(const string& str) {string result = str;for (size_t i = 0; i < result.length(); ++i) {result[i] = tolower(static_cast<unsigned char>(result[i]));}return result;
}// 從字符串中提取關鍵詞(修復中文處理)
vector<string> extractKeywords(const string& text) {vector<string> keywords;string asciiWord; // 存儲英文/數字詞for (size_t i = 0; i < text.length(); ) {unsigned char c = static_cast<unsigned char>(text[i]);// 處理ASCII字符(0-127)if (c <= 127) {if (isalnum(c)) { // 字母或數字asciiWord += text[i];++i;} else { // ASCII標點或空格,作為分隔符if (!asciiWord.empty()) {keywords.push_back(toLower(asciiWord));asciiWord.clear();}++i;}} // 處理中文字符(GBK編碼,2字節)else {if (i + 1 >= text.length()) {++i;continue;}unsigned char c2 = static_cast<unsigned char>(text[i+1]);// 過濾中文標點if (isChinesePunctuation(c, c2)) {if (!asciiWord.empty()) {keywords.push_back(toLower(asciiWord));asciiWord.clear();}i += 2;continue;}// 提取單個漢字作為關鍵詞string chineseChar;chineseChar += text[i];chineseChar += text[i+1];keywords.push_back(chineseChar);i += 2;}}// 處理剩余的ASCII詞if (!asciiWord.empty()) {keywords.push_back(toLower(asciiWord));}return keywords;
}// 顯示幫助信息
void showHelp() {cout << "\n===== 使用幫助 =====" << endl;cout << "1. 直接輸入您的問題,我會盡力為您解答" << endl;cout << "2. 輸入 'exit' 或 'quit' 結束對話" << endl;cout << "3. 輸入 'help' 查看幫助信息" << endl;cout << "4. 輸入 'topics' 查看我能回答的問題類型" << endl;cout << "====================\n" << endl;
}// 顯示可回答的話題類型
void showTopics(const map<string, string>& exactAnswers) {if (exactAnswers.empty()) {cout << "暫無可用的話題信息" << endl;return;}cout << "\n===== 我可以回答這些類型的問題 =====" << endl;int count = 0;for (map<string, string>::const_iterator it = exactAnswers.begin(); it != exactAnswers.end() && count < 5; ++it, ++count) {string sample = it->first;if (sample.length() > 30) {sample = sample.substr(0, 30) + "...";}cout << "- " << sample << endl;}if (exactAnswers.size() > 5) {cout << "... 還有 " << (exactAnswers.size() - 5) << " 個其他話題" << endl;}cout << "=================================\n" << endl;
}// 計算TF-IDF并返回最佳匹配答案
string getBestAnswerByTFIDF(const vector<string>& userKeywords,const map<string, vector<string> >& qas,const map<string, vector<string> >& questionKeywords,const map<string, double>& idfValues) {map<string, double> userTFIDF;for (vector<string>::const_iterator kit = userKeywords.begin(); kit != userKeywords.end(); ++kit) {const string& keyword = *kit;double tf = 0.0;for (vector<string>::const_iterator it = userKeywords.begin(); it != userKeywords.end(); ++it) {if (*it == keyword) tf++;}tf /= userKeywords.size();double idf = 0.0;map<string, double>::const_iterator idfIt = idfValues.find(keyword);if (idfIt != idfValues.end()) {idf = idfIt->second;}userTFIDF[keyword] = tf * idf;}map<string, double> similarityScores;for (map<string, vector<string> >::const_iterator pit = questionKeywords.begin(); pit != questionKeywords.end(); ++pit) {const string& question = pit->first;const vector<string>& keywords = pit->second;map<string, double> questionTFIDF;for (vector<string>::const_iterator kit = keywords.begin(); kit != keywords.end(); ++kit) {const string& keyword = *kit;double tf = 0.0;for (vector<string>::const_iterator it = keywords.begin(); it != keywords.end(); ++it) {if (*it == keyword) tf++;}tf /= keywords.size();double idf = 0.0;map<string, double>::const_iterator idfIt = idfValues.find(keyword);if (idfIt != idfValues.end()) {idf = idfIt->second;}questionTFIDF[keyword] = tf * idf;}double dotProduct = 0.0;double userNorm = 0.0;double questionNorm = 0.0;for (map<string, double>::const_iterator uit = userTFIDF.begin(); uit != userTFIDF.end(); ++uit) {const string& keyword = uit->first;double userWeight = uit->second;userNorm += userWeight * userWeight;map<string, double>::const_iterator qit = questionTFIDF.find(keyword);if (qit != questionTFIDF.end()) {dotProduct += userWeight * qit->second;}}for (map<string, double>::const_iterator qit = questionTFIDF.begin(); qit != questionTFIDF.end(); ++qit) {questionNorm += qit->second * qit->second;}userNorm = sqrt(userNorm);questionNorm = sqrt(questionNorm);double similarity = 0.0;if (userNorm > 0 && questionNorm > 0) {similarity = dotProduct / (userNorm * questionNorm);}similarityScores[question] = similarity;}string bestQuestion;double maxSimilarity = 0.0;for (map<string, double>::const_iterator it = similarityScores.begin(); it != similarityScores.end(); ++it) {if (it->second > maxSimilarity) {maxSimilarity = it->second;bestQuestion = it->first;}}if (maxSimilarity >= 0.15) { map<string, vector<string> >::const_iterator ansIt = qas.find(bestQuestion);if (ansIt != qas.end() && !ansIt->second.empty()) {return ansIt->second[0];}}return "";
}int main() {// 初始化日志writeLog("系統", "程序啟動");map<string, string> exactAnswers;map<string, vector<string> > qas;map<string, vector<string> > questionKeywords;map<string, int> documentFrequency;// 打開訓練文件ifstream trainingFile("training_data.txt");if (trainingFile.is_open()) {string line;string question = "";bool readingAnswer = false;int totalDocuments = 0;while (getline(trainingFile, line)) {if (line.empty()) {question = "";readingAnswer = false;continue;}if (line.size() >= 2 && line.substr(0, 2) == "Q:") {question = line.substr(2);readingAnswer = false;totalDocuments++;}else if (line.size() >= 2 && line.substr(0, 2) == "A:") {if (!question.empty()) {string answer = line.substr(2);exactAnswers[question] = answer;qas[question].push_back(answer);vector<string> keywords = extractKeywords(question);questionKeywords[question] = keywords;set<string> uniqueKeywords;for (vector<string>::const_iterator it = keywords.begin(); it != keywords.end(); ++it) {uniqueKeywords.insert(*it);}for (set<string>::const_iterator it = uniqueKeywords.begin(); it != uniqueKeywords.end(); ++it) {documentFrequency[*it]++;}}readingAnswer = true;}else if (readingAnswer && !question.empty()) {exactAnswers[question] += "\n" + line;qas[question].back() += "\n" + line;}}trainingFile.close();cout << "已加載 " << exactAnswers.size() << " 條訓練數據" << endl;// 記錄訓練數據加載情況char logMsg[100];sprintf(logMsg, "加載訓練數據完成,共%d條記錄", exactAnswers.size());writeLog("系統", logMsg);map<string, double> idfValues;for (map<string, int>::const_iterator it = documentFrequency.begin(); it != documentFrequency.end(); ++it) {const string& keyword = it->first;int df = it->second;double idf = log(static_cast<double>(totalDocuments) / (df + 1)) + 1;idfValues[keyword] = idf;}cout << "\n=================================" << endl;cout << "歡迎使用問答系統!我可以回答您的問題" << endl;cout << "輸入 'help' 查看可用命令,'exit' 退出程序" << endl;cout << "=================================\n" << endl;string input;while (true) {cout << "請輸入您的問題: ";getline(cin, input);if (input == "exit" || input == "quit") {cout << "機器人: 再見!感謝使用!" << endl;writeLog("系統", "用戶輸入exit,程序退出");break;}else if (input == "help") {showHelp();writeLog("用戶命令", "輸入help,查看幫助信息");continue;}else if (input == "topics") {showTopics(exactAnswers);writeLog("用戶命令", "輸入topics,查看可回答話題");continue;}else if (input.empty()) {cout << "機器人: 您的輸入為空,請重新輸入" << endl;writeLog("用戶輸入", "空輸入");continue;}// 記錄用戶輸入writeLog("用戶輸入", "問題: " + input);// 精確匹配嘗試string inputClean = input;vector<string> inputKeywords = extractKeywords(input);inputClean = "";for (vector<string>::const_iterator it = inputKeywords.begin(); it != inputKeywords.end(); ++it) {inputClean += *it;}bool exactFound = false;for (map<string, string>::const_iterator it = exactAnswers.begin(); it != exactAnswers.end(); ++it) {string questionClean = "";vector<string> qKeywords = extractKeywords(it->first);for (vector<string>::const_iterator qit = qKeywords.begin(); qit != qKeywords.end(); ++qit) {questionClean += *qit;}if (questionClean == inputClean) {cout << "機器人: " << it->second << endl;writeLog("系統響應", "精確匹配回答: " + it->second);exactFound = true;break;}}if (exactFound) {continue;}// 關鍵詞匹配string bestAnswer = getBestAnswerByTFIDF(inputKeywords, qas, questionKeywords, idfValues);if (!bestAnswer.empty()) {cout << "機器人: " << bestAnswer << endl;writeLog("系統響應", "TF-IDF匹配回答: " + bestAnswer);continue;}cout << "機器人: 抱歉,我不太理解這個問題。" << endl;cout << "您可以嘗試輸入 'topics' 查看我能回答的問題類型" << endl;writeLog("系統響應", "無法匹配到合適回答");}} else {cout << "無法打開訓練文件 training_data.txt,請確保文件存在且路徑正確" << endl;writeLog("錯誤", "無法打開訓練文件 training_data.txt");return 1;}return 0;
}
四、總結
本次改進的核心是新增了結構化日志系統,通過在關鍵節點記錄時間戳、事件類型和具體內容,顯著提升了問答系統的可維護性和可分析性。這種改進思路具有通用性 —— 對于任何需要長期運行或涉及用戶交互的程序,添加日志系統都是低成本高收益的優化手段。
?
后續可基于此日志系統進一步擴展,例如:添加日志級別(INFO/WARN/ERROR)、實現日志文件按日期分割(避免單文件過大)、或通過日志分析自動優化 TF-IDF 的匹配閾值等。
注:本文使用豆包輔助編寫
?