import fs from 'node:fs/promises'; import path from 'node:path'; import { chromium, type Browser } from 'playwright'; import * as XLSXNS from 'xlsx'; import { AppDataSource, PlatformAccount } from '../models/index.js'; import { BrowserManager } from '../automation/browser.js'; import { logger } from '../utils/logger.js'; import { UserDayStatisticsService } from './UserDayStatisticsService.js'; import { AccountService } from './AccountService.js'; import type { ProxyConfig } from '@media-manager/shared'; import { WS_EVENTS } from '@media-manager/shared'; import { wsManager } from '../websocket/index.js'; // xlsx 在 ESM 下可能挂在 default 上;这里做一次兼容兜底 // eslint-disable-next-line @typescript-eslint/no-explicit-any const XLSX: any = (XLSXNS as any).default ?? (XLSXNS as any); type PlaywrightCookie = { name: string; value: string; domain?: string; path?: string; url?: string; expires?: number; httpOnly?: boolean; secure?: boolean; sameSite?: 'Lax' | 'None' | 'Strict'; }; function ensureDir(p: string) { return fs.mkdir(p, { recursive: true }); } function normalizeDateText(input: unknown): Date | null { if (!input) return null; if (input instanceof Date && !Number.isNaN(input.getTime())) { const d = new Date(input); d.setHours(0, 0, 0, 0); return d; } const s = String(input).trim(); if (!s) return null; // 2026-01-27 / 2026/01/27 const m1 = s.match(/(\d{4})\D(\d{1,2})\D(\d{1,2})/); if (m1) { const yyyy = Number(m1[1]); const mm = Number(m1[2]); const dd = Number(m1[3]); if (!yyyy || !mm || !dd) return null; const d = new Date(yyyy, mm - 1, dd); d.setHours(0, 0, 0, 0); return d; } // 01-27(兜底:用当前年份) const m2 = s.match(/^(\d{1,2})[-/](\d{1,2})$/); if (m2) { const yyyy = new Date().getFullYear(); const mm = Number(m2[1]); const dd = Number(m2[2]); const d = new Date(yyyy, mm - 1, dd); d.setHours(0, 0, 0, 0); return d; } return null; } function parseChineseNumberLike(input: unknown): number | null { if (input === null || input === undefined) return null; const s = String(input).trim(); if (!s) return null; // 8,077 const plain = s.replace(/,/g, ''); // 4.8万 const wan = plain.match(/^(\d+(\.\d+)?)\s*万$/); if (wan) return Math.round(Number(wan[1]) * 10000); const yi = plain.match(/^(\d+(\.\d+)?)\s*亿$/); if (yi) return Math.round(Number(yi[1]) * 100000000); const n = Number(plain.replace(/[^\d.-]/g, '')); if (Number.isFinite(n)) return Math.round(n); return null; } function parseCookiesFromAccount(cookieData: string | null): PlaywrightCookie[] { if (!cookieData) return []; const raw = cookieData.trim(); if (!raw) return []; // 1) JSON array(最常见:浏览器插件导出/前端保存) if (raw.startsWith('[') || raw.startsWith('{')) { try { const parsed = JSON.parse(raw); const arr = Array.isArray(parsed) ? parsed : (parsed?.cookies ? parsed.cookies : []); if (!Array.isArray(arr)) return []; return arr .map((c: any) => { const name = String(c?.name ?? '').trim(); const value = String(c?.value ?? '').trim(); if (!name) return null; const domain = c?.domain ? String(c.domain) : undefined; const pathVal = c?.path ? String(c.path) : '/'; const url = !domain ? 'https://creator.douyin.com' : undefined; const sameSiteRaw = c?.sameSite; const sameSite = sameSiteRaw === 'Lax' || sameSiteRaw === 'None' || sameSiteRaw === 'Strict' ? sameSiteRaw : undefined; return { name, value, domain, path: pathVal, url, expires: typeof c?.expires === 'number' ? c.expires : undefined, httpOnly: typeof c?.httpOnly === 'boolean' ? c.httpOnly : undefined, secure: typeof c?.secure === 'boolean' ? c.secure : undefined, sameSite, } satisfies PlaywrightCookie; }) .filter(Boolean) as PlaywrightCookie[]; } catch { // fallthrough } } // 2) "a=b; c=d" 拼接格式 const pairs = raw.split(';').map((p) => p.trim()).filter(Boolean); const cookies: PlaywrightCookie[] = []; for (const p of pairs) { const idx = p.indexOf('='); if (idx <= 0) continue; const name = p.slice(0, idx).trim(); const value = p.slice(idx + 1).trim(); if (!name) continue; cookies.push({ name, value, url: 'https://creator.douyin.com' }); } return cookies; } async function createBrowserForAccount(proxy: ProxyConfig | null): Promise<{ browser: Browser; shouldClose: boolean }> { // 静默同步:默认一律 headless,不弹窗 // 只有在“引导登录/验证”时(DY_STORAGE_STATE_BOOTSTRAP=1 且 DY_IMPORT_HEADLESS=0)才允许 headful const allowHeadfulForBootstrap = process.env.DY_STORAGE_STATE_BOOTSTRAP === '1' && process.env.DY_IMPORT_HEADLESS === '0'; const headless = !allowHeadfulForBootstrap; if (proxy?.enabled) { const server = `${proxy.type}://${proxy.host}:${proxy.port}`; const browser = await chromium.launch({ headless, proxy: { server, username: proxy.username, password: proxy.password, }, args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-gpu', '--window-size=1920,1080'], }); return { browser, shouldClose: true }; } const browser = await BrowserManager.getBrowser({ headless }); return { browser, shouldClose: false }; } function parseDouyinExcel( filePath: string ): Map> { const wb = XLSX.readFile(filePath); const result = new Map>(); logger.info( `[DY Import] Excel loaded. file=${path.basename(filePath)} sheets=${wb.SheetNames.join(' | ')}` ); for (const sheetName of wb.SheetNames) { const sheet = wb.Sheets[sheetName]; const rows = XLSX.utils.sheet_to_json>(sheet, { defval: '' }); if (!rows.length) { logger.warn(`[DY Import] Sheet empty. name=${sheetName}`); continue; } const keys = Object.keys(rows[0] || {}); logger.info(`[DY Import] Sheet parsed. name=${sheetName} rows=${rows.length} keys=${keys.join(',')}`); const normalizeKey = (k: string) => k.replace(/^\uFEFF/, '').trim(); for (const row of rows) { const rawKeys = Object.keys(row || {}); if (!rawKeys.length) continue; const keysNormalized = rawKeys.map((k) => ({ raw: k, norm: normalizeKey(k) })); // 兼容 Excel 表头带 BOM/空格:优先找包含“日期”的列作为日期列 const dateKey = keysNormalized.find((k) => k.norm === '日期')?.raw ?? keysNormalized.find((k) => k.norm.includes('日期'))?.raw ?? keysNormalized.find((k) => k.norm.toLowerCase() === 'date')?.raw ?? keysNormalized[0]!.raw; const dateVal = (row as any)[dateKey]; const d = normalizeDateText(dateVal); if (!d) continue; const key = `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, '0')}-${String(d.getDate()).padStart(2, '0')}`; if (!result.has(key)) result.set(key, { recordDate: d }); const obj = result.get(key)!; // 抖音导出的 Excel 通常是两列:日期 + 指标名(如“作品分享/净增粉丝/作品点赞/播放量...”) // 因此优先按“第二列标题”做自动映射,避免漏掉“沈凉音”这种全量导出格式。 const metricKeyRaw = keysNormalized.find((k) => k.raw !== dateKey)?.raw; if (!metricKeyRaw) continue; const metricKey = normalizeKey(metricKeyRaw); // 显式排除:主页访问 / 取关粉丝 if (metricKey.includes('主页访问') || metricKey.includes('取关粉丝')) continue; const rawVal = (row as any)[metricKeyRaw]; if (rawVal === undefined || rawVal === null) continue; // 1)封面点击率:字符串百分比直接存 if (metricKey.includes('封面点击率')) { const s = String(rawVal).trim(); if (s) (obj as any).coverClickRate = s; continue; } // 2)其余按数值解析 const n = parseChineseNumberLike(rawVal); if (typeof n !== 'number') continue; if (metricKey.includes('播放')) (obj as any).playCount = n; else if (metricKey.includes('点赞')) (obj as any).likeCount = n; else if (metricKey.includes('评论')) (obj as any).commentCount = n; else if (metricKey.includes('分享')) (obj as any).shareCount = n; else if (metricKey.includes('净增粉丝') || metricKey.includes('新增粉丝')) (obj as any).fansIncrease = n; // 总粉丝数/总粉丝量:入库 fans_count else if (metricKey.includes('总粉丝')) (obj as any).fansCount = n; } } return result; } export class DouyinAccountOverviewImportService { private accountRepository = AppDataSource.getRepository(PlatformAccount); private userDayStatisticsService = new UserDayStatisticsService(); private downloadDir = path.resolve(process.cwd(), 'tmp', 'douyin-account-overview'); private stateDir = path.resolve(process.cwd(), 'tmp', 'douyin-storage-state'); private getStatePath(accountId: number) { return path.join(this.stateDir, `${accountId}.json`); } private async ensureStorageState(account: PlatformAccount, cookies: PlaywrightCookie[]): Promise { const statePath = this.getStatePath(account.id); try { await fs.access(statePath); return statePath; } catch { // no state } // 需要你在弹出的浏览器里完成一次登录/验证,然后脚本会自动保存 storageState // 启用方式:DY_IMPORT_HEADLESS=0 且 DY_STORAGE_STATE_BOOTSTRAP=1 if (!(process.env.DY_IMPORT_HEADLESS === '0' && process.env.DY_STORAGE_STATE_BOOTSTRAP === '1')) { return null; } await ensureDir(this.stateDir); logger.warn( `[DY Import] No storageState for accountId=${account.id}. Bootstrapping... 请在弹出的浏览器中完成登录/验证。` ); const { browser, shouldClose } = await createBrowserForAccount(account.proxyConfig); try { const context = await browser.newContext({ viewport: { width: 1920, height: 1080 }, locale: 'zh-CN', timezoneId: 'Asia/Shanghai', }); await context.addCookies(cookies as any); const page = await context.newPage(); await page.goto('https://creator.douyin.com/creator-micro/data-center/operation', { waitUntil: 'domcontentloaded', }); // 最长等 5 分钟:让你手动完成登录/滑块/短信等 await page .waitForFunction(() => { const t = document.body?.innerText || ''; return t.includes('数据中心') || t.includes('账号总览') || t.includes('短视频'); }, { timeout: 5 * 60_000 }) .catch(() => undefined); await context.storageState({ path: statePath }); logger.info(`[DY Import] storageState saved: ${statePath}`); await context.close(); return statePath; } finally { if (shouldClose) await browser.close().catch(() => undefined); } } /** * 统一入口:定时任务与添加账号均调用此方法,执行“账号总览-短视频-数据表现-近30天” */ static async runDailyImport(): Promise { const svc = new DouyinAccountOverviewImportService(); await svc.runDailyImportForAllDouyinAccounts(); } /** * 为所有抖音账号导出“账号总览-短视频-数据表现-近30天”并导入 user_day_statistics */ async runDailyImportForAllDouyinAccounts(): Promise { await ensureDir(this.downloadDir); const accounts = await this.accountRepository.find({ where: { platform: 'douyin' as any }, }); logger.info(`[DY Import] Start. total_accounts=${accounts.length}`); for (const account of accounts) { try { await this.importAccountLast30Days(account); } catch (e) { logger.error( `[DY Import] Account failed. accountId=${account.id} name=${account.accountName || ''}`, e ); } } logger.info('[DY Import] Done.'); } /** * 单账号:导出 Excel → 解析 → 入库 → 删除文件 */ async importAccountLast30Days(account: PlatformAccount, isRetry = false): Promise { const cookies = parseCookiesFromAccount(account.cookieData); if (!cookies.length) { throw new Error('cookieData 为空或无法解析'); } const { browser, shouldClose } = await createBrowserForAccount(account.proxyConfig); try { const statePath = await this.ensureStorageState(account, cookies); const context = await browser.newContext({ acceptDownloads: true, viewport: { width: 1920, height: 1080 }, locale: 'zh-CN', timezoneId: 'Asia/Shanghai', userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', ...(statePath ? { storageState: statePath } : {}), }); context.setDefaultTimeout(60_000); // 如果没 state,就退回 cookie-only(可能导出为 0) if (!statePath) { await context.addCookies(cookies as any); } const page = await context.newPage(); logger.info(`[DY Import] accountId=${account.id} goto data-center...`); await page.goto('https://creator.douyin.com/creator-micro/data-center/operation', { waitUntil: 'domcontentloaded', }); await page.waitForTimeout(1500); if (page.url().includes('login')) { // 第一次检测到登录失效时,尝试刷新账号 if (!isRetry) { logger.info(`[DY Import] Login expired detected for account ${account.id}, attempting to refresh...`); await context.close(); if (shouldClose) await browser.close(); try { const accountService = new AccountService(); const refreshResult = await accountService.refreshAccount(account.userId, account.id); if (refreshResult.needReLogin) { // 刷新后仍需要重新登录,走原先的失效流程 logger.warn(`[DY Import] Account ${account.id} refresh failed, still needs re-login`); throw new Error('未登录/需要重新登录(跳转到 login)'); } // 刷新成功,重新获取账号信息并重试导入 logger.info(`[DY Import] Account ${account.id} refreshed successfully, retrying import...`); const refreshedAccount = await this.accountRepository.findOne({ where: { id: account.id } }); if (!refreshedAccount) { throw new Error('账号刷新后未找到'); } // 递归调用,标记为重试 return await this.importAccountLast30Days(refreshedAccount, true); } catch (refreshError) { logger.error(`[DY Import] Account ${account.id} refresh failed:`, refreshError); throw new Error('未登录/需要重新登录(跳转到 login)'); } } else { // 已经是重试了,不再尝试刷新 throw new Error('未登录/需要重新登录(跳转到 login)'); } } // 检测“暂无访问权限 / 权限申请中 / 暂无数据”提示:标记账号 expired + 推送提示 const bodyText = (await page.textContent('body').catch(() => '')) || ''; if ( bodyText.includes('暂无访问权限') || bodyText.includes('权限申请中') || bodyText.includes('暂无数据权限') || bodyText.includes('暂无数据,请稍后再试') ) { await this.accountRepository.update(account.id, { status: 'expired' as any }); wsManager.sendToUser(account.userId, WS_EVENTS.ACCOUNT_UPDATED, { account: { id: account.id, status: 'expired', platform: 'douyin' }, }); wsManager.sendToUser(account.userId, WS_EVENTS.SYSTEM_MESSAGE, { level: 'warning', message: `抖音账号「${account.accountName || account.accountId || account.id}」暂无数据看板访问权限,请到抖音创作者中心申请数据权限(通过后一般次日生效)。`, platform: 'douyin', accountId: account.id, }); throw new Error('抖音数据看板暂无访问权限/申请中,已标记 expired 并通知用户'); } // 已直达账号总览页(data-center/operation),无需再点「数据中心/账号总览」,直接点「短视频」和「近30天」 await page.waitForTimeout(500); logger.info(`[DY Import] accountId=${account.id} on 账号总览页, click 短视频 tab (#semiTabaweme)...`); const shortVideoById = page.locator('#semiTabaweme'); if ((await shortVideoById.count().catch(() => 0)) > 0) { await shortVideoById.first().click(); } else { const shortVideoCandidates = ['短视频', '短视频数据']; let shortVideoClicked = false; for (const text of shortVideoCandidates) { const loc = page.getByText(text, { exact: false }).first(); if ((await loc.count().catch(() => 0)) > 0) { await loc.click().catch(() => undefined); shortVideoClicked = true; break; } } if (!shortVideoClicked) { throw new Error('页面上未找到「短视频」入口,请确认抖音创作者后台是否改版'); } } // 切换“近30天”(优先用 ID #addon-aoc08fi,兜底文案) await page.waitForTimeout(500); logger.info(`[DY Import] accountId=${account.id} click 近30天 (#addon-aoc08fi)...`); const last30DaysById = page.locator('#addon-aoc08fi'); if ((await last30DaysById.count().catch(() => 0)) > 0) { await last30DaysById.first().click(); } else { await page.getByText(/近\d+天?/).first().click().catch(() => undefined); await page.getByText('近30天', { exact: true }).click(); } await page.waitForTimeout(1200); // 逐个指标导出(排除:主页访问 / 取关粉丝) // 说明:抖音导出通常是“日期 + 指标”两列,每次只能导出当前选中的指标 // 注意:抖音 UI 上“总粉丝”文案可能是「总粉丝量」而不是「总粉丝数」 const metricsToExport: Array<{ name: string; candidates: string[] }> = [ { name: '播放量', candidates: ['播放量'] }, { name: '作品点赞', candidates: ['作品点赞', '点赞'] }, { name: '作品评论', candidates: ['作品评论', '评论'] }, { name: '作品分享', candidates: ['作品分享', '分享'] }, { name: '封面点击率', candidates: ['封面点击率'] }, { name: '净增粉丝', candidates: ['净增粉丝', '新增粉丝'] }, { name: '总粉丝量', candidates: ['总粉丝量', '总粉丝数', '粉丝总量'] }, ]; let totalInserted = 0; let totalUpdated = 0; let mergedDays = new Map>(); const savedExcelPaths: string[] = []; const clickMetric = async (metric: { name: string; candidates: string[] }) => { // 先精确匹配,失败后用包含匹配(适配 UI 文案差异) for (const c of metric.candidates) { const locatorExact = page.getByText(c, { exact: true }).first(); const exactCount = await locatorExact.count().catch(() => 0); if (exactCount > 0) { await locatorExact.click().catch(() => undefined); await page.waitForTimeout(800); return c; } } for (const c of metric.candidates) { const locatorFuzzy = page.getByText(c, { exact: false }).first(); const fuzzyCount = await locatorFuzzy.count().catch(() => 0); if (fuzzyCount > 0) { await locatorFuzzy.click().catch(() => undefined); await page.waitForTimeout(800); return c; } } logger.warn(`[DY Import] metric not found on page. accountId=${account.id} metric=${metric.name}`); return null; }; for (const metric of metricsToExport) { logger.info(`[DY Import] accountId=${account.id} exporting metric: ${metric.name}...`); await clickMetric(metric); const [download] = await Promise.all([ page.waitForEvent('download', { timeout: 60_000 }), page.getByText('导出数据', { exact: true }).first().click(), ]); const filename = `${account.id}_${Date.now()}_${download.suggestedFilename()}`; const filePath = path.join(this.downloadDir, filename); await download.saveAs(filePath); // 保留 Excel 不删除,便于核对数据;路径打日志方便查看 const absolutePath = path.resolve(filePath); savedExcelPaths.push(absolutePath); logger.info( `[DY Import] Excel saved (${metric.name}): ${absolutePath}` ); try { const perDay = parseDouyinExcel(filePath); // 合并不同指标到同一日期 patch(与小红书维度一致) for (const [k, v] of perDay.entries()) { if (!mergedDays.has(k)) mergedDays.set(k, { recordDate: v.recordDate }); const base = mergedDays.get(k)!; Object.assign(base, v); } logger.info( `[DY Import] metric exported & parsed. accountId=${account.id} metric=${metric.name} file=${path.basename(filePath)} days=${perDay.size}` ); } finally { // 默认导入后删除 Excel,避免磁盘堆积;仅在显式 KEEP_DY_XLSX=true 时保留(用于调试) if (process.env.KEEP_DY_XLSX === 'true') { logger.warn(`[DY Import] KEEP_DY_XLSX=true, keep file: ${filePath}`); } else { await fs.unlink(filePath).catch(() => undefined); } } } // 汇总:本账号导出的 7 个 Excel 已解析 logger.info( `[DY Import] accountId=${account.id} 共 ${savedExcelPaths.length} 个 Excel 已解析` ); if (savedExcelPaths.length !== 7) { logger.warn(`[DY Import] accountId=${account.id} 预期 7 个 Excel,实际 ${savedExcelPaths.length} 个`); } // 合并完成后统一入库(避免同一天多次 update) for (const v of mergedDays.values()) { const { recordDate, ...patch } = v; const r = await this.userDayStatisticsService.saveStatisticsForDate(account.id, recordDate, patch); totalInserted += r.inserted; totalUpdated += r.updated; } logger.info( `[DY Import] short-video imported. accountId=${account.id} days=${mergedDays.size} inserted=${totalInserted} updated=${totalUpdated}` ); await context.close(); } finally { if (shouldClose) { await browser.close().catch(() => undefined); } } } }