需求場景
獲取網站點擊的下載pdf,并把pdf重命名再上傳到COS云上面
技術使用
“puppeteer”: “^19.7.2”,
“egg”: “^3.15.0”, // 服務期用egg搭的
文件服務使用COS騰訊云
核心思路
獲取瀏覽器下載事件,并把文件保存到本地
const session = await substitutePage.target().createCDPSession();await session.send('Page.setDownloadBehavior', {behavior: 'allow',downloadPath, // 指定文件保存路徑回家});
在保存到本地前監聽此文件夾,如果有文件則獲取并上傳
加timer做防抖是為了防止在文件寫入時以及重命名文件時多次觸發watch函數,導致出會出現0KB源文件臟數據
let timer: any = null;fs.watch(downloadPath, async (_eventType, filename) => {if (timer !== null) {clearTimeout(timer);}timer = setTimeout(() => {// 防止出現下載的臨時文件就觸發if (filename.endsWith('.pdf')) {resolve({filename,});}}, 500);});
完整代碼
const session = await substitutePage.target().createCDPSession();await session.send('Page.setDownloadBehavior', {behavior: 'allow',downloadPath, // 指定文件保存路徑回家});// res就是文件相關信息了const [ res ] = await this.downloadPdfHandler(substitutePage, downloadPath);// filePath就是自己本地的文件所在絕對路徑const filePath = `${downloadPath}/${res.fileName}`;// uploadFile是cos文件上傳相關實現, 我就不放了,有私密的keyconst pdfUriCode = await this.uploadFile(filePath, filePath);const pdfUri = decodeURIComponent(pdfUriCode);this.domainList = {pdfSize: res.pdfSize,pdfUri: pdfUri.substring(pdfUri.indexOf('root')),};
downloadPdfHandler函數實現
downloadPdfHandler(page, downloadPath): Promise<any> {const uuidName = uuidv4();const fsWatchApi = () => {// 使用防抖是為了防止下載的文件沒有寫入完全就重命名,那樣會產生一個臟文件let timer: any = null;return new Promise<{ filename: string }>(resolve => {fs.watch(downloadPath, async (_eventType, filename) => {if (timer !== null) {clearTimeout(timer);}timer = setTimeout(() => {// 防止出現下載的臨時文件就觸發if (filename.endsWith('.pdf')) {resolve({filename,});}}, 500);});});};function responseWatchApi() {return new Promise<void>(resolve => {page.on('response', async response => {// 檢查響應是否為application/octet-stream且可能包含PDF(或你期望的其他文件類型)if (response.headers()['content-type'].startsWith('application/octet-stream')) {resolve();}});});}return new Promise(async (resolve, reject) => {try {const [ , { filename }] = await Promise.all([ responseWatchApi(), fsWatchApi() ]);const oldFilePath = path.join(downloadPath, filename);const newFilePath = path.join(downloadPath, `${uuidName}.pdf`);try {fs.renameSync(oldFilePath, newFilePath);this.logger.info(`文件名已經被修改完成:${uuidName}`);} catch (error) {this.logger.info(`文件名已經被修改完成:${uuidName}`);}await this.sleep(5 * 1000);const files = fs.readdirSync(downloadPath);// 創建一個數組,將文件名和其mtime(最后修改時間)一起存儲const filesWithMtime = files.map(file => {const filePath = path.join(downloadPath, file);const stats = fs.statSync(filePath);return { fileName: file, mtime: stats.mtime, size: stats.size };});const newestFile = filesWithMtime.sort((a, b) => b.mtime.getTime() - a.mtime.getTime())[0];this.logger.info('newestFile: %o', {newestFile,});resolve({pdfSize: newestFile.size,fileName: newestFile.fileName,});} catch (e) {reject(e);}});}