import fs from 'node:fs/promises'; import path from 'node:path'; import { chromium, type Browser, type Page, type BrowserContext } from 'playwright'; import * as XLSXNS from 'xlsx'; import { AppDataSource, PlatformAccount } from '../models/index.js'; import { BrowserManager } from '../automation/browser.js'; import { logger } from '../utils/logger.js'; import { UserDayStatisticsService } from './UserDayStatisticsService.js'; import { AccountService } from './AccountService.js'; import { getPythonServiceBaseUrl } from './PythonServiceConfigService.js'; import type { ProxyConfig } from '@media-manager/shared'; import { WS_EVENTS } from '@media-manager/shared'; import { wsManager } from '../websocket/index.js'; // xlsx 在 ESM 下可能挂在 default 上;这里做一次兼容兜底 // eslint-disable-next-line @typescript-eslint/no-explicit-any const XLSX: any = (XLSXNS as any).default ?? (XLSXNS as any); type PlaywrightCookie = { name: string; value: string; domain?: string; path?: string; url?: string; expires?: number; httpOnly?: boolean; secure?: boolean; sameSite?: 'Lax' | 'None' | 'Strict'; }; function ensureDir(p: string) { return fs.mkdir(p, { recursive: true }); } function normalizeDateText(input: unknown): Date | null { if (!input) return null; const s = String(input).trim(); if (!s) return null; // 20260115 / 2026-01-15 / 2026/01/15 const mCompact = s.match(/^(\d{4})(\d{2})(\d{2})$/); if (mCompact) { const yyyy = Number(mCompact[1]); const mm = Number(mCompact[2]); const dd = Number(mCompact[3]); if (!yyyy || !mm || !dd) return null; const d = new Date(yyyy, mm - 1, dd); d.setHours(0, 0, 0, 0); return d; } const m1 = s.match(/(\d{4})\D(\d{1,2})\D(\d{1,2})/); if (m1) { const yyyy = Number(m1[1]); const mm = Number(m1[2]); const dd = Number(m1[3]); if (!yyyy || !mm || !dd) return null; const d = new Date(yyyy, mm - 1, dd); d.setHours(0, 0, 0, 0); return d; } return null; } function parseChineseNumberLike(input: unknown): number | null { if (input === null || input === undefined) return null; const s = String(input).trim(); if (!s) return null; const plain = s.replace(/,/g, ''); const wan = plain.match(/^(\d+(\.\d+)?)\s*万$/); if (wan) return Math.round(Number(wan[1]) * 10000); const yi = plain.match(/^(\d+(\.\d+)?)\s*亿$/); if (yi) return Math.round(Number(yi[1]) * 100000000); const n = Number(plain.replace(/[^\d.-]/g, '')); if (Number.isFinite(n)) return Math.round(n); return null; } function parseCookiesFromAccount(cookieData: string | null): PlaywrightCookie[] { if (!cookieData) return []; const raw = cookieData.trim(); if (!raw) return []; // 1) JSON array if (raw.startsWith('[') || raw.startsWith('{')) { try { const parsed = JSON.parse(raw); const arr = Array.isArray(parsed) ? parsed : (parsed?.cookies ? parsed.cookies : []); if (!Array.isArray(arr)) return []; return arr .map((c: any) => { const name = String(c?.name ?? '').trim(); const value = String(c?.value ?? '').trim(); if (!name) return null; const domain = c?.domain ? String(c.domain) : undefined; const pathVal = c?.path ? String(c.path) : '/'; const url = !domain ? 'https://baijiahao.baidu.com' : undefined; const sameSiteRaw = c?.sameSite; const sameSite = sameSiteRaw === 'Lax' || sameSiteRaw === 'None' || sameSiteRaw === 'Strict' ? sameSiteRaw : undefined; return { name, value, domain, path: pathVal, url, expires: typeof c?.expires === 'number' ? c.expires : undefined, httpOnly: typeof c?.httpOnly === 'boolean' ? c.httpOnly : undefined, secure: typeof c?.secure === 'boolean' ? c.secure : undefined, sameSite, } satisfies PlaywrightCookie; }) .filter(Boolean) as PlaywrightCookie[]; } catch { // fallthrough } } // 2) "a=b; c=d" const pairs = raw.split(';').map((p) => p.trim()).filter(Boolean); const cookies: PlaywrightCookie[] = []; for (const p of pairs) { const idx = p.indexOf('='); if (idx <= 0) continue; const name = p.slice(0, idx).trim(); const value = p.slice(idx + 1).trim(); if (!name) continue; cookies.push({ name, value, url: 'https://baijiahao.baidu.com' }); } return cookies; } async function createBrowserForAccount(proxy: ProxyConfig | null): Promise<{ browser: Browser; shouldClose: boolean }> { // 静默同步:默认一律 headless,不弹窗 // 只有在“引导登录/验证”时(BJ_STORAGE_STATE_BOOTSTRAP=1 且 BJ_IMPORT_HEADLESS=0)才允许 headful const allowHeadfulForBootstrap = process.env.BJ_STORAGE_STATE_BOOTSTRAP === '1' && process.env.BJ_IMPORT_HEADLESS === '0'; const headless = !allowHeadfulForBootstrap; if (proxy?.enabled) { const server = `${proxy.type}://${proxy.host}:${proxy.port}`; const browser = await chromium.launch({ headless, proxy: { server, username: proxy.username, password: proxy.password, }, args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-gpu', '--window-size=1920,1080'], }); return { browser, shouldClose: true }; } const browser = await BrowserManager.getBrowser({ headless }); return { browser, shouldClose: false }; } function parseBaijiahaoExcel( filePath: string ): Map> { const wb = XLSX.readFile(filePath); const result = new Map>(); logger.info( `[BJ Import] Excel loaded. file=${path.basename(filePath)} sheets=${wb.SheetNames.join(' | ')}` ); for (const sheetName of wb.SheetNames) { const sheet = wb.Sheets[sheetName]; const rows = XLSX.utils.sheet_to_json>(sheet, { defval: '' }); if (!rows.length) { logger.warn(`[BJ Import] Sheet empty. name=${sheetName}`); continue; } const keys = Object.keys(rows[0] || {}); logger.info( `[BJ Import] Sheet parsed. name=${sheetName} rows=${rows.length} keys=${keys.join(',')}` ); // 百家号 Excel 为 GBK 编码,列名在 node 环境下会变成乱码(但列顺序稳定),所以这里按“列位置”做映射: // 0: 日期(形如 20260115) // 1: 阅读量 // 2: 点击率 // 3: 互动率 // 4: 评论量 // 5: 评论率(%) // 6: 点赞量 // 7: 点赞率(%) // 8: 收藏量 // 9: 收藏率(%) // 10: 分享量 // 11: 分享率(%) // 12: 作品涨粉量 // 13: 作品涨粉率 // 14: 作品脱粉量 // ... 其余列暂不入库 for (const row of rows) { const cols = Object.keys(row || {}); if (!cols.length) continue; const dateVal = (row as any)[cols[0]]; const d = normalizeDateText(dateVal); if (!d) continue; const key = `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, '0')}-${String( d.getDate() ).padStart(2, '0')}`; if (!result.has(key)) result.set(key, { recordDate: d }); const obj = result.get(key)!; const safeGet = (idx: number): any => idx >= 0 && idx < cols.length ? (row as any)[cols[idx]] : undefined; // 阅读量 → playCount const readCount = parseChineseNumberLike(safeGet(1)); if (typeof readCount === 'number') (obj as any).playCount = readCount; // 点赞量 → likeCount const likeCount = parseChineseNumberLike(safeGet(6)); if (typeof likeCount === 'number') (obj as any).likeCount = likeCount; // 评论量 → commentCount const commentCount = parseChineseNumberLike(safeGet(4)); if (typeof commentCount === 'number') (obj as any).commentCount = commentCount; // 收藏量 → collectCount const collectCount = parseChineseNumberLike(safeGet(8)); if (typeof collectCount === 'number') (obj as any).collectCount = collectCount; // 分享量 → shareCount const shareCount = parseChineseNumberLike(safeGet(10)); if (typeof shareCount === 'number') (obj as any).shareCount = shareCount; // 点击率 → coverClickRate(不为 0 时加 %) const clickRateRaw = safeGet(2); const coverClickRate = formatRateWithPercent(clickRateRaw); if (coverClickRate !== '0') (obj as any).coverClickRate = coverClickRate; // fans_increase 只看作品涨粉量(不再扣除作品脱粉量) const inc = parseChineseNumberLike(safeGet(12)); if (typeof inc === 'number') { (obj as any).fansIncrease = inc; } } } return result; } /** 比率:不为 0 时加上 %,为 0 或空返回 '0' */ function formatRateWithPercent(v: unknown): string { if (v === null || v === undefined) return '0'; const s = String(v).trim(); if (!s) return '0'; const n = Number(s.replace(/,/g, '')); if (!Number.isFinite(n) || n === 0) return '0'; if (s.includes('%')) return s; if (n > 0 && n <= 1) return `${(n * 100).toFixed(2)}%`; return `${Number(n.toFixed(2))}%`; } function formatPercentString(input: unknown): string | null { if (input === null || input === undefined) return null; const s = String(input).trim(); if (!s) return null; if (s.includes('%')) return s; const n = Number(s); if (!Number.isFinite(n)) return null; // 0.0423 -> 4.23% if (n >= 0 && n <= 1) return `${(n * 100).toFixed(2)}%`; // 4.23 -> 4.23% return `${n.toFixed(2)}%`; } function findArrayWithDateLikeField(root: any): { arr: any[]; dateKey: string } | null { const seen = new Set(); const queue: any[] = [root]; const isDateLike = (v: any) => { if (v === null || v === undefined) return false; if (typeof v === 'number') return String(v).match(/^\d{8}$/); const s = String(v).trim(); return /^\d{8}$/.test(s) || /^\d{4}[-/]\d{1,2}[-/]\d{1,2}$/.test(s); }; const dateKeyCandidates = ['event_day', 'day', 'date', 'stat_day', 'statDay', 'dt', 'time', 'the_day']; const candidates: Array<{ arr: any[]; dateKey: string }> = []; while (queue.length) { const cur = queue.shift(); if (!cur || typeof cur !== 'object') continue; if (seen.has(cur)) continue; seen.add(cur); if (Array.isArray(cur)) { // 数组元素为对象且含日期字段 for (const item of cur) { if (!item || typeof item !== 'object') continue; const keys = Object.keys(item); for (const dk of dateKeyCandidates) { if (keys.includes(dk) && isDateLike((item as any)[dk])) { candidates.push({ arr: cur, dateKey: dk }); break; } } // 兜底:任意字段像日期 for (const k of keys) { if (isDateLike((item as any)[k])) { candidates.push({ arr: cur, dateKey: k }); break; } } } } else { for (const v of Object.values(cur)) { if (v && typeof v === 'object') queue.push(v); } } } if (!candidates.length) return null; candidates.sort((a, b) => (b.arr?.length ?? 0) - (a.arr?.length ?? 0)); return candidates[0]!; } function parseBaijiahaoAppStatisticV3(json: any): Map> { const result = new Map>(); const found = findArrayWithDateLikeField(json); if (!found) return result; const { arr, dateKey } = found; const pickNumber = (obj: any, keys: string[]): number | null => { for (const k of keys) { if (obj?.[k] === undefined || obj?.[k] === null) continue; const n = parseChineseNumberLike(obj[k]); if (typeof n === 'number') return n; } return null; }; const pickString = (obj: any, keys: string[]): string | null => { for (const k of keys) { if (obj?.[k] === undefined || obj?.[k] === null) continue; const s = String(obj[k]).trim(); if (s) return s; } return null; }; for (const item of arr) { if (!item || typeof item !== 'object') continue; const d = normalizeDateText(item[dateKey]); if (!d) continue; const key = `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, '0')}-${String(d.getDate()).padStart(2, '0')}`; if (!result.has(key)) result.set(key, { recordDate: d }); const obj = result.get(key)!; // 阅读量 → playCount(百家号 appStatisticV3 使用 view_count) const play = pickNumber(item, ['view_count', 'read_cnt', 'readCount', 'read', 'pv', 'view_cnt', 'viewCount', 'views']); if (typeof play === 'number') (obj as any).playCount = play; // 点赞量 → likeCount(百家号 API 使用 likes_count) const like = pickNumber(item, ['likes_count', 'like_cnt', 'praise_cnt', 'praise', 'likeCount', 'likes']); if (typeof like === 'number') (obj as any).likeCount = like; // 评论量 → commentCount(百家号 API 使用 comment_count) const comment = pickNumber(item, ['comment_count', 'comment_cnt', 'commentCount', 'comments']); if (typeof comment === 'number') (obj as any).commentCount = comment; // 收藏量 → collectCount(百家号 API 字段为 collect_count) const collect = pickNumber(item, ['collect_count', 'collect_cnt', 'favorite_cnt', 'fav_cnt', 'collectCount', 'favorites']); if (typeof collect === 'number') (obj as any).collectCount = collect; // 分享量 → shareCount(百家号 API 使用 share_count) const share = pickNumber(item, ['share_count', 'share_cnt', 'shareCount', 'shares']); if (typeof share === 'number') (obj as any).shareCount = share; // 点击率 → coverClickRate const clickRateRaw = pickString(item, ['click_rate', 'ctr', 'clickRate']) ?? (typeof pickNumber(item, ['click_rate', 'ctr', 'clickRate']) === 'number' ? String(pickNumber(item, ['click_rate', 'ctr', 'clickRate'])) : null); const clickRate = formatPercentString(clickRateRaw); if (clickRate) (obj as any).coverClickRate = clickRate; // 作品涨粉量 → fansIncrease(百家号 API 使用 fans_increase / fans_add_cnt) const fansInc = pickNumber(item, ['fans_increase', 'fans_add_cnt', 'works_fans_inc', 'worksFansInc', 'content_fans_inc', 'fans_inc', 'fansIncrease']); if (typeof fansInc === 'number') (obj as any).fansIncrease = fansInc; } return result; } export class BaijiahaoContentOverviewImportService { private accountRepository = AppDataSource.getRepository(PlatformAccount); private userDayStatisticsService = new UserDayStatisticsService(); private downloadDir = path.resolve(process.cwd(), 'tmp', 'baijiahao-content-overview'); private stateDir = path.resolve(process.cwd(), 'tmp', 'baijiahao-storage-state'); private getStatePath(accountId: number) { return path.join(this.stateDir, `${accountId}.json`); } private async ensureStorageState( account: PlatformAccount, cookies: PlaywrightCookie[] ): Promise { const statePath = this.getStatePath(account.id); try { await fs.access(statePath); return statePath; } catch { // no state } // 需要你在弹出的浏览器里完成一次登录/验证,然后脚本会自动保存 storageState // 启用方式:BJ_IMPORT_HEADLESS=0 且 BJ_STORAGE_STATE_BOOTSTRAP=1 if (!(process.env.BJ_IMPORT_HEADLESS === '0' && process.env.BJ_STORAGE_STATE_BOOTSTRAP === '1')) { return null; } await ensureDir(this.stateDir); logger.warn( `[BJ Import] No storageState for accountId=${account.id}. Bootstrapping... 请在弹出的浏览器中完成登录/验证。` ); const { browser, shouldClose } = await createBrowserForAccount(account.proxyConfig); try { const context = await browser.newContext({ viewport: { width: 1920, height: 1080 }, locale: 'zh-CN', timezoneId: 'Asia/Shanghai', }); await context.addCookies(cookies as any); const page = await context.newPage(); await page.goto('https://baijiahao.baidu.com/builder/rc/analysiscontent', { waitUntil: 'domcontentloaded', }); // 最长等 5 分钟:让你手动完成登录/短信等 await page .waitForFunction(() => { const t = document.body?.innerText || ''; return t.includes('数据中心') || t.includes('内容分析') || t.includes('基础数据'); }, { timeout: 5 * 60_000 }) .catch(() => undefined); await context.storageState({ path: statePath }); logger.info(`[BJ Import] storageState saved: ${statePath}`); await context.close(); return statePath; } finally { if (shouldClose) await browser.close().catch(() => undefined); } } /** * 通过 Python 调用 appStatisticV3(登录模式与打开后台一致:使用账号已存 Cookie) */ private async fetchAppStatisticV3ViaPython( account: PlatformAccount, startDay: string, endDay: string ): Promise> { const base = (await getPythonServiceBaseUrl()).replace(/\/$/, ''); const url = `${base}/baijiahao/app_statistic_v3`; const cookie = String(account.cookieData || '').trim(); if (!cookie) throw new Error('百家号账号 cookie 为空,无法调用 Python app_statistic_v3'); const controller = new AbortController(); const timeoutId = setTimeout(() => controller.abort(), 30_000); try { const res = await fetch(url, { method: 'POST', signal: controller.signal, headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ cookie, start_day: startDay, end_day: endDay }), }); const text = await res.text(); const data = text ? (JSON.parse(text) as Record) : {}; if (!res.ok) { const msg = String(data?.errmsg || data?.error || '').trim() || `HTTP ${res.status}`; throw new Error(`Python app_statistic_v3 调用失败: ${msg}`); } return data; } finally { clearTimeout(timeoutId); } } /** * 通过 Python 调用 getFansBasicInfo(登录模式与打开后台一致) */ private async fetchFansBasicInfoViaPython( account: PlatformAccount, start: string, end: string ): Promise> { const base = (await getPythonServiceBaseUrl()).replace(/\/$/, ''); const url = `${base}/baijiahao/fans_basic_info`; const cookie = String(account.cookieData || '').trim(); if (!cookie) throw new Error('百家号账号 cookie 为空,无法调用 Python fans_basic_info'); const controller = new AbortController(); const timeoutId = setTimeout(() => controller.abort(), 30_000); try { const res = await fetch(url, { method: 'POST', signal: controller.signal, headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ cookie, start, end }), }); const text = await res.text(); const data = text ? (JSON.parse(text) as Record) : {}; if (!res.ok) { const msg = String(data?.errmsg || data?.error || '').trim() || `HTTP ${res.status}`; throw new Error(`Python fans_basic_info 调用失败: ${msg}`); } return data; } finally { clearTimeout(timeoutId); } } /** * 统一入口:定时任务与添加账号均调用此方法,执行“内容分析-基础数据-近30天 + 粉丝 getFansBasicInfo” */ static async runDailyImport(): Promise { const svc = new BaijiahaoContentOverviewImportService(); await svc.runDailyImportForAllBaijiahaoAccounts(); } /** * 为所有百家号账号导出“数据中心-内容分析-基础数据-近30天”并导入 user_day_statistics */ async runDailyImportForAllBaijiahaoAccounts(): Promise { await ensureDir(this.downloadDir); const accounts = await this.accountRepository.find({ where: { platform: 'baijiahao' as any }, }); logger.info(`[BJ Import] Start. total_accounts=${accounts.length}`); for (const account of accounts) { try { await this.importAccountLast30Days(account); } catch (e) { logger.error( `[BJ Import] Account failed. accountId=${account.id} name=${account.accountName || ''}`, e ); } } logger.info('[BJ Import] Done.'); } /** * 单账号:优先 Python+Node(登录与打开后台一致,使用账号已存 Cookie);失败则刷新重试一次,再失败则浏览器兜底 */ async importAccountLast30Days(account: PlatformAccount, isRetry = false): Promise { const cookies = parseCookiesFromAccount(account.cookieData); if (!cookies.length) throw new Error('cookieData 为空或无法解析'); const end = new Date(); end.setHours(0, 0, 0, 0); end.setDate(end.getDate() - 1); const start = new Date(end); start.setDate(start.getDate() - 29); const fmt = (d: Date) => `${d.getFullYear()}${String(d.getMonth() + 1).padStart(2, '0')}${String(d.getDate()).padStart(2, '0')}`; const start_day = fmt(start); const end_day = fmt(end); const chinaTz = 'Asia/Shanghai'; const toChinaYMD = (date: Date): { y: number; m: number; d: number } => { const formatter = new Intl.DateTimeFormat('en-CA', { timeZone: chinaTz, year: 'numeric', month: '2-digit', day: '2-digit', }); const parts = formatter.formatToParts(date); const get = (type: string) => parts.find((p) => p.type === type)?.value ?? '0'; return { y: parseInt(get('year'), 10), m: parseInt(get('month'), 10), d: parseInt(get('day'), 10), }; }; const now = new Date(); const today = toChinaYMD(now); const yesterdayDate = new Date( Date.UTC(today.y, today.m - 1, today.d, 0, 0, 0, 0) - 24 * 60 * 60 * 1000 ); const startDate = new Date(yesterdayDate.getTime() - 29 * 24 * 60 * 60 * 1000); const endYMD = toChinaYMD(yesterdayDate); const startYMD = toChinaYMD(startDate); const pad = (n: number) => String(n).padStart(2, '0'); const startStr = `${startYMD.y}${pad(startYMD.m)}${pad(startYMD.d)}`; const endStr = `${endYMD.y}${pad(endYMD.m)}${pad(endYMD.d)}`; // 优先 Python(登录与打开后台一致:仅用账号已存 Cookie,不启浏览器) try { const data = await this.fetchAppStatisticV3ViaPython(account, start_day, end_day); const errno = typeof data?.errno === 'number' ? data.errno : Number(data?.errno ?? -1); if (errno !== 0) throw new Error(data?.errmsg ? String(data.errmsg) : 'appStatisticV3 errno !== 0'); const perDay = parseBaijiahaoAppStatisticV3(data); if (perDay.size === 0) throw new Error('appStatisticV3 解析后无数据'); let inserted = 0; let updated = 0; for (const v of perDay.values()) { const { recordDate, ...patch } = v; const r = await this.userDayStatisticsService.saveStatisticsForDate(account.id, recordDate, patch); inserted += r.inserted; updated += r.updated; } logger.info( `[BJ Import] basic-data (via Python). accountId=${account.id} days=${perDay.size} inserted=${inserted} updated=${updated}` ); try { const fansBody = await this.fetchFansBasicInfoViaPython(account, startStr, endStr); const fansErrno = (fansBody as any).errno; if (fansErrno === 0 || fansErrno === undefined) { const list = this.parseGetFansBasicInfoResponse(fansBody as Record); let fansUpdated = 0; for (const { recordDate, fansCount, fansIncrease } of list) { const r = await this.userDayStatisticsService.saveStatisticsForDate( account.id, recordDate, { fansCount, fansIncrease } ); fansUpdated += r.inserted + r.updated; } logger.info(`[BJ Import] Fans data (via Python). accountId=${account.id} days=${list.length} updated=${fansUpdated}`); } } catch (e) { logger.warn(`[BJ Import] Fans via Python failed (non-fatal). accountId=${account.id}`, e instanceof Error ? e.message : e); } return; } catch (pythonError) { logger.warn( `[BJ Import] Python path failed, fallback to browser. accountId=${account.id}`, pythonError instanceof Error ? pythonError.message : pythonError ); } if (!isRetry) { try { const accountService = new AccountService(); const refreshResult = await accountService.refreshAccount(account.userId, account.id); if (!refreshResult.needReLogin) { const refreshedAccount = await this.accountRepository.findOne({ where: { id: account.id } }); if (refreshedAccount) { logger.info(`[BJ Import] Account ${account.id} refreshed, retrying import...`); return await this.importAccountLast30Days(refreshedAccount, true); } } } catch (refreshError) { logger.error(`[BJ Import] Account ${account.id} refresh failed:`, refreshError); } } // 浏览器兜底:原有逻辑不变 const { browser, shouldClose } = await createBrowserForAccount(account.proxyConfig); try { const statePath = await this.ensureStorageState(account, cookies); const context = await browser.newContext({ acceptDownloads: true, viewport: { width: 1920, height: 1080 }, locale: 'zh-CN', timezoneId: 'Asia/Shanghai', userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', ...(statePath ? { storageState: statePath } : {}), }); context.setDefaultTimeout(60_000); if (!statePath) await context.addCookies(cookies as any); const page = await context.newPage(); await page.goto('https://baijiahao.baidu.com/builder/rc/analysiscontent', { waitUntil: 'domcontentloaded' }); await page.waitForTimeout(1500); if (page.url().includes('passport') || page.url().includes('login')) { if (!isRetry) { logger.info(`[BJ Import] Login expired for account ${account.id}, attempting refresh...`); await context.close(); if (shouldClose) await browser.close(); try { const accountService = new AccountService(); const refreshResult = await accountService.refreshAccount(account.userId, account.id); if (refreshResult.needReLogin) { logger.warn(`[BJ Import] Account ${account.id} refresh failed, still needs re-login`); throw new Error('未登录/需要重新登录(跳转到登录页)'); } const refreshedAccount = await this.accountRepository.findOne({ where: { id: account.id } }); if (!refreshedAccount) throw new Error('账号刷新后未找到'); return await this.importAccountLast30Days(refreshedAccount, true); } catch (refreshError) { logger.error(`[BJ Import] Account ${account.id} refresh failed:`, refreshError); throw new Error('未登录/需要重新登录(跳转到登录页)'); } } throw new Error('未登录/需要重新登录(跳转到登录页)'); } const bodyText = (await page.textContent('body').catch(() => '')) || ''; if (bodyText.includes('暂无数据') || bodyText.includes('无权访问')) { await this.accountRepository.update(account.id, { status: 'expired' as any }); wsManager.sendToUser(account.userId, WS_EVENTS.ACCOUNT_UPDATED, { account: { id: account.id, status: 'expired', platform: 'baijiahao' }, }); wsManager.sendToUser(account.userId, WS_EVENTS.SYSTEM_MESSAGE, { level: 'warning', message: `百家号账号「${account.accountName || account.accountId || account.id}」暂无数据看板访问权限,请到百家号后台检查数据权限。`, platform: 'baijiahao', accountId: account.id, }); throw new Error('百家号数据看板暂无访问权限/暂无数据,已标记 expired 并通知用户'); } await page.getByText('数据中心', { exact: false }).first().click().catch(() => undefined); await page.getByText('内容分析', { exact: false }).first().click().catch(() => undefined); await page.getByText('基础数据', { exact: false }).first().click().catch(() => undefined); try { const trigger = page.getByText(/近\d+天?/, { exact: false }).first(); if ((await trigger.count()) > 0) await trigger.click().catch(() => undefined); const thirtyDay = (await page.getByText('近30天', { exact: true }).first().count()) > 0 ? page.getByText('近30天', { exact: true }).first() : page.getByText('近30日', { exact: false }).first(); await thirtyDay.click().catch(() => undefined); await page.waitForTimeout(5000); } catch (e) { logger.warn(`[BJ Import] Unable to switch to 近30天. accountId=${account.id}`, e); } let perDay = new Map>(); let inserted = 0; let updated = 0; const tryFetchApi = async () => { const apiUrl = `https://baijiahao.baidu.com/author/eco/statistics/appStatisticV3?type=all&start_day=${start_day}&end_day=${end_day}&stat=0&special_filter_days=30`; const res = await (context as any).request.get(apiUrl, { headers: { Referer: 'https://baijiahao.baidu.com/builder/rc/analysiscontent' }, }); if (!res.ok()) throw new Error(`appStatisticV3 http ${res.status()}`); const json = await res.json().catch(() => null); if (!json) throw new Error('appStatisticV3 json parse failed'); if (process.env.BJ_IMPORT_DEBUG === '1') { const debugPath = path.join(this.downloadDir, `appStatisticV3_response_${account.id}_${Date.now()}.json`); await ensureDir(this.downloadDir); await fs.writeFile(debugPath, JSON.stringify(json, null, 2), 'utf-8'); logger.info(`[BJ Import] DEBUG: appStatisticV3 原始响应已写入 ${debugPath}`); } return parseBaijiahaoAppStatisticV3(json); }; try { perDay = await tryFetchApi(); } catch (e) { logger.warn(`[BJ Import] appStatisticV3 failed, fallback to Excel. accountId=${account.id}`, e); } let filePath: string | null = null; if (perDay.size === 0) { const [download] = await Promise.all([ page.waitForEvent('download', { timeout: 60_000 }), page.getByText('导出数据', { exact: true }).first().click(), ]); filePath = path.join(this.downloadDir, `${account.id}_${Date.now()}_${download.suggestedFilename()}`); await download.saveAs(filePath); perDay = parseBaijiahaoExcel(filePath); } else if (perDay.size < 20) { const [download] = await Promise.all([ page.waitForEvent('download', { timeout: 60_000 }), page.getByText('导出数据', { exact: true }).first().click(), ]); filePath = path.join(this.downloadDir, `${account.id}_${Date.now()}_${download.suggestedFilename()}`); await download.saveAs(filePath); const excelMap = parseBaijiahaoExcel(filePath); for (const [k, v] of excelMap.entries()) { if (!perDay.has(k)) perDay.set(k, v); } } try { for (const v of perDay.values()) { const { recordDate, ...patch } = v; const r = await this.userDayStatisticsService.saveStatisticsForDate(account.id, recordDate, patch); inserted += r.inserted; updated += r.updated; } logger.info(`[BJ Import] basic-data (browser). accountId=${account.id} days=${perDay.size} inserted=${inserted} updated=${updated}`); } finally { if (filePath && process.env.KEEP_BJ_XLSX !== 'true') await fs.unlink(filePath).catch(() => undefined); } try { await this.importFansDataByApi(context, account); } catch (e) { logger.warn(`[BJ Import] Fans import failed (non-fatal). accountId=${account.id}`, e instanceof Error ? e.message : e); } await context.close(); } finally { if (shouldClose) await browser.close().catch(() => undefined); } } /** * 粉丝数据:直接请求 getFansBasicInfo(近30天 = 中国时区昨天为结束,往前推 30 天),不打开页面 * sum_fans_count → fans_count,new_fans_count → fans_increase * 使用中国时区计算日期,避免服务器非东八区时只拿到部分天数 */ private async importFansDataByApi(context: BrowserContext, account: PlatformAccount): Promise { const chinaTz = 'Asia/Shanghai'; const toChinaYMD = (date: Date): { y: number; m: number; d: number } => { const formatter = new Intl.DateTimeFormat('en-CA', { timeZone: chinaTz, year: 'numeric', month: '2-digit', day: '2-digit' }); const parts = formatter.formatToParts(date); const get = (type: string) => parts.find((p) => p.type === type)?.value ?? '0'; return { y: parseInt(get('year'), 10), m: parseInt(get('month'), 10), d: parseInt(get('day'), 10) }; }; const now = new Date(); const today = toChinaYMD(now); const yesterdayDate = new Date(Date.UTC(today.y, today.m - 1, today.d, 0, 0, 0, 0) - 24 * 60 * 60 * 1000); const startDate = new Date(yesterdayDate.getTime() - 29 * 24 * 60 * 60 * 1000); const endYMD = toChinaYMD(yesterdayDate); const startYMD = toChinaYMD(startDate); const pad = (n: number) => String(n).padStart(2, '0'); const startStr = `${startYMD.y}${pad(startYMD.m)}${pad(startYMD.d)}`; const endStr = `${endYMD.y}${pad(endYMD.m)}${pad(endYMD.d)}`; const apiUrl = `https://baijiahao.baidu.com/author/eco/statistics/getFansBasicInfo?start=${startStr}&end=${endStr}&fans_type=new%2Csum&sort=asc&is_page=0&show_type=chart`; logger.info(`[BJ Import] getFansBasicInfo range (China). accountId=${account.id} start=${startStr} end=${endStr}`); let body: Record | null = null; try { const res = await (context as any).request.get(apiUrl, { headers: { Referer: 'https://baijiahao.baidu.com/builder/rc/analysisfans/basedata' }, }); if (res.ok()) body = await res.json().catch(() => null); } catch (e) { logger.warn(`[BJ Import] getFansBasicInfo request failed. accountId=${account.id}`, e); return; } if (!body || typeof body !== 'object') { logger.warn(`[BJ Import] getFansBasicInfo response not valid JSON, skip. accountId=${account.id}`); return; } const errno = (body as any).errno; if (errno !== 0 && errno !== undefined) { logger.warn(`[BJ Import] getFansBasicInfo errno=${errno}, skip. accountId=${account.id}`); return; } const list = this.parseGetFansBasicInfoResponse(body); if (!list.length) { logger.info(`[BJ Import] No fans data from getFansBasicInfo. accountId=${account.id}`); return; } const firstDay = list[0]?.recordDate; const lastDay = list[list.length - 1]?.recordDate; const fmtDay = (d: Date) => (d ? `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, '0')}-${String(d.getDate()).padStart(2, '0')}` : ''); logger.info(`[BJ Import] getFansBasicInfo response. accountId=${account.id} count=${list.length} first=${fmtDay(firstDay)} last=${fmtDay(lastDay)}`); let updated = 0; for (const { recordDate, fansCount, fansIncrease } of list) { const r = await this.userDayStatisticsService.saveStatisticsForDate(account.id, recordDate, { fansCount, fansIncrease, }); updated += r.inserted + r.updated; } logger.info(`[BJ Import] Fans data imported. accountId=${account.id} days=${list.length} updated=${updated}`); } /** * 解析 getFansBasicInfo 接口返回,提取 (recordDate, fansCount, fansIncrease) 列表 * sum_fans_count → fans_count,new_fans_count → fans_increase;"--" 或无效值跳过或按 0 处理 */ private parseGetFansBasicInfoResponse( body: Record ): Array<{ recordDate: Date; fansCount: number; fansIncrease: number }> { const list: Array<{ recordDate: Date; fansCount: number; fansIncrease: number }> = []; const data = body.data as Record | undefined; if (!data || typeof data !== 'object') return list; const arr = data.list as unknown[] | undefined; if (!Array.isArray(arr)) return list; for (const item of arr) { if (!item || typeof item !== 'object') continue; const o = item as Record; const dayRaw = o.day; if (dayRaw == null) continue; const dayStr = String(dayRaw).trim(); if (!/^\d{8}$/.test(dayStr)) continue; const d = normalizeDateText(dayStr); if (!d) continue; const sumRaw = o.sum_fans_count; const newRaw = o.new_fans_count; const toNum = (v: unknown): number => { if (v === null || v === undefined) return 0; if (typeof v === 'number' && Number.isFinite(v)) return Math.max(0, Math.round(v)); const s = String(v).trim(); if (s === '' || s === '--') return 0; const n = Number(s.replace(/,/g, '')); return Number.isFinite(n) ? Math.max(0, Math.round(n)) : 0; }; const fansCount = toNum(sumRaw); const fansIncrease = toNum(newRaw); list.push({ recordDate: d, fansCount, fansIncrease }); } return list; } }