import fs from 'node:fs/promises'; import path from 'node:path'; import { chromium, type Browser } from 'playwright'; import * as XLSXNS from 'xlsx'; import { AppDataSource, PlatformAccount } from '../models/index.js'; import { BrowserManager } from '../automation/browser.js'; import { logger } from '../utils/logger.js'; import { UserDayStatisticsService } from './UserDayStatisticsService.js'; import type { ProxyConfig } from '@media-manager/shared'; import { WS_EVENTS } from '@media-manager/shared'; import { wsManager } from '../websocket/index.js'; // xlsx 在 ESM 下可能挂在 default 上;这里做一次兼容兜底 // eslint-disable-next-line @typescript-eslint/no-explicit-any const XLSX: any = (XLSXNS as any).default ?? (XLSXNS as any); type PlaywrightCookie = { name: string; value: string; domain?: string; path?: string; url?: string; expires?: number; httpOnly?: boolean; secure?: boolean; sameSite?: 'Lax' | 'None' | 'Strict'; }; function ensureDir(p: string) { return fs.mkdir(p, { recursive: true }); } function normalizeDateText(input: unknown): Date | null { if (!input) return null; if (input instanceof Date && !Number.isNaN(input.getTime())) { const d = new Date(input); d.setHours(0, 0, 0, 0); return d; } const s = String(input).trim(); if (!s) return null; // 2026/1/27 or 2026-01-27 const m1 = s.match(/(\d{4})\D(\d{1,2})\D(\d{1,2})/); if (m1) { const yyyy = Number(m1[1]); const mm = Number(m1[2]); const dd = Number(m1[3]); if (!yyyy || !mm || !dd) return null; const d = new Date(yyyy, mm - 1, dd); d.setHours(0, 0, 0, 0); return d; } // 20260127 const m2 = s.match(/^(\d{4})(\d{2})(\d{2})$/); if (m2) { const yyyy = Number(m2[1]); const mm = Number(m2[2]); const dd = Number(m2[3]); const d = new Date(yyyy, mm - 1, dd); d.setHours(0, 0, 0, 0); return d; } return null; } function parseChineseNumberLike(input: unknown): number | null { if (input === null || input === undefined) return null; const s = String(input).trim(); if (!s) return null; const plain = s.replace(/,/g, ''); const wan = plain.match(/^(\d+(\.\d+)?)\s*万$/); if (wan) return Math.round(Number(wan[1]) * 10000); const yi = plain.match(/^(\d+(\.\d+)?)\s*亿$/); if (yi) return Math.round(Number(yi[1]) * 100000000); const n = Number(plain.replace(/[^\d.-]/g, '')); if (Number.isFinite(n)) return Math.round(n); return null; } function parseCookiesFromAccount(cookieData: string | null): PlaywrightCookie[] { if (!cookieData) return []; const raw = cookieData.trim(); if (!raw) return []; // 1) JSON array / 对象 if (raw.startsWith('[') || raw.startsWith('{')) { try { const parsed = JSON.parse(raw); const arr = Array.isArray(parsed) ? parsed : (parsed?.cookies ? parsed.cookies : []); if (!Array.isArray(arr)) return []; return arr .map((c: any) => { const name = String(c?.name ?? '').trim(); const value = String(c?.value ?? '').trim(); if (!name) return null; const domain = c?.domain ? String(c.domain) : undefined; const pathVal = c?.path ? String(c.path) : '/'; const url = !domain ? 'https://channels.weixin.qq.com' : undefined; const sameSiteRaw = c?.sameSite; const sameSite = sameSiteRaw === 'Lax' || sameSiteRaw === 'None' || sameSiteRaw === 'Strict' ? sameSiteRaw : undefined; return { name, value, domain, path: pathVal, url, expires: typeof c?.expires === 'number' ? c.expires : undefined, httpOnly: typeof c?.httpOnly === 'boolean' ? c.httpOnly : undefined, secure: typeof c?.secure === 'boolean' ? c.secure : undefined, sameSite, } satisfies PlaywrightCookie; }) .filter(Boolean) as PlaywrightCookie[]; } catch { // fallthrough } } // 2) "a=b; c=d" const pairs = raw.split(';').map((p) => p.trim()).filter(Boolean); const cookies: PlaywrightCookie[] = []; for (const p of pairs) { const idx = p.indexOf('='); if (idx <= 0) continue; const name = p.slice(0, idx).trim(); const value = p.slice(idx + 1).trim(); if (!name) continue; cookies.push({ name, value, url: 'https://channels.weixin.qq.com' }); } return cookies; } async function createBrowserForAccount(proxy: ProxyConfig | null): Promise<{ browser: Browser; shouldClose: boolean }> { // 默认 headless;但视频号在 headless 下经常会强制跳登录/风控, // 因此允许通过 WX_IMPORT_HEADLESS=0 强制用有头浏览器跑导入。 const headless = process.env.WX_IMPORT_HEADLESS === '0' ? false : true; if (proxy?.enabled) { const server = `${proxy.type}://${proxy.host}:${proxy.port}`; const browser = await chromium.launch({ headless, proxy: { server, username: proxy.username, password: proxy.password, }, args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-gpu', '--window-size=1920,1080'], }); return { browser, shouldClose: true }; } const browser = await BrowserManager.getBrowser({ headless }); return { browser, shouldClose: false }; } type WxSection = '关注者数据' | '视频数据' | '图文数据'; function parseCsvLine(line: string): string[] { // 简单 CSV 解析(处理双引号包裹与转义) const out: string[] = []; let cur = ''; let inQuotes = false; for (let i = 0; i < line.length; i++) { const ch = line[i]!; if (ch === '"') { const next = line[i + 1]; if (inQuotes && next === '"') { cur += '"'; i++; } else { inQuotes = !inQuotes; } continue; } if (ch === ',' && !inQuotes) { out.push(cur); cur = ''; continue; } cur += ch; } out.push(cur); return out.map((s) => s.trim()); } async function parseWeixinVideoFile(filePath: string): Promise>> { const ext = path.extname(filePath).toLowerCase(); if (ext === '.csv') { const text = await fs.readFile(filePath, 'utf8'); const lines = text.replace(/^\uFEFF/, '').split(/\r?\n/).filter((l) => l.trim().length > 0); const result = new Map>(); logger.info(`[WX Import] CSV loaded. file=${path.basename(filePath)} lines=${lines.length}`); // 找表头行(含“时间”或“日期”) const headerLineIdx = lines.findIndex((l) => l.includes('"时间"') || l.includes('"日期"') || l.startsWith('时间,') || l.startsWith('日期,')); if (headerLineIdx < 0) return result; const header = parseCsvLine(lines[headerLineIdx]!).map((c) => c.replace(/^"|"$/g, '').trim()); logger.info(`[WX Import] Header detected. headerRow=${headerLineIdx + 1} headers=${header.join('|')}`); const colIndex = (names: string[]) => { for (const n of names) { const idx = header.findIndex((h) => h === n); if (idx >= 0) return idx; } for (const n of names) { const idx = header.findIndex((h) => h.includes(n)); if (idx >= 0) return idx; } return -1; }; const dateCol = colIndex(['时间', '日期']); const playCol = colIndex(['播放', '播放量', '曝光量', '阅读/播放量', '阅读量']); const likeCol = colIndex(['喜欢', '点赞', '点赞量']); const commentCol = colIndex(['评论', '评论量']); const shareCol = colIndex(['分享', '分享量']); const fansIncCol = colIndex(['净增关注', '新增关注']); const fansTotalCol = colIndex(['关注者总数', '关注者总量', '粉丝总数', '粉丝总量']); for (let i = headerLineIdx + 1; i < lines.length; i++) { const cols = parseCsvLine(lines[i]!).map((c) => c.replace(/^"|"$/g, '').trim()); if (dateCol < 0 || cols.length <= dateCol) continue; const d = normalizeDateText(cols[dateCol]); if (!d) continue; const key = `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, '0')}-${String(d.getDate()).padStart(2, '0')}`; if (!result.has(key)) result.set(key, { recordDate: d }); const obj = result.get(key)!; if (playCol >= 0 && cols.length > playCol) { const n = parseChineseNumberLike(cols[playCol]); if (typeof n === 'number') (obj as any).playCount = n; } if (likeCol >= 0 && cols.length > likeCol) { const n = parseChineseNumberLike(cols[likeCol]); if (typeof n === 'number') (obj as any).likeCount = n; } if (commentCol >= 0 && cols.length > commentCol) { const n = parseChineseNumberLike(cols[commentCol]); if (typeof n === 'number') (obj as any).commentCount = n; } if (shareCol >= 0 && cols.length > shareCol) { const n = parseChineseNumberLike(cols[shareCol]); if (typeof n === 'number') (obj as any).shareCount = n; } if (fansIncCol >= 0 && cols.length > fansIncCol) { const n = parseChineseNumberLike(cols[fansIncCol]); if (typeof n === 'number') (obj as any).fansIncrease = n; } if (fansTotalCol >= 0 && cols.length > fansTotalCol) { const n = parseChineseNumberLike(cols[fansTotalCol]); if (typeof n === 'number') (obj as any).fansCount = n; } } return result; } // xlsx/xls:走 xlsx 解析 const wb = XLSX.readFile(filePath); const result = new Map>(); logger.info(`[WX Import] Excel loaded. file=${path.basename(filePath)} sheets=${wb.SheetNames.join(' | ')}`); for (const sheetName of wb.SheetNames) { const sheet = wb.Sheets[sheetName]; const rows: any[][] = XLSX.utils.sheet_to_json(sheet, { header: 1, defval: '' }); if (!rows.length) continue; let headerIdx = rows.findIndex( (r) => Array.isArray(r) && r.some((c) => ['时间', '日期'].includes(String(c).trim())) ); if (headerIdx < 0) continue; const header = rows[headerIdx].map((c) => String(c).trim()); logger.info(`[WX Import] Header detected. sheet=${sheetName} headerRow=${headerIdx + 1} headers=${header.join('|')}`); const colIndex = (names: string[]) => { for (const n of names) { const idx = header.findIndex((h) => h === n); if (idx >= 0) return idx; } for (const n of names) { const idx = header.findIndex((h) => h.includes(n)); if (idx >= 0) return idx; } return -1; }; const dateCol = colIndex(['时间', '日期']); if (dateCol < 0) continue; const playCol = colIndex(['播放', '播放量', '曝光量', '阅读/播放量', '阅读量']); const likeCol = colIndex(['喜欢', '点赞', '点赞量']); const commentCol = colIndex(['评论', '评论量']); const shareCol = colIndex(['分享', '分享量']); const fansIncCol = colIndex(['净增关注', '新增关注']); const fansTotalCol = colIndex(['关注者总数', '关注者总量', '粉丝总数', '粉丝总量']); for (let i = headerIdx + 1; i < rows.length; i++) { const r = rows[i]; if (!r || !Array.isArray(r) || r.length <= dateCol) continue; const d = normalizeDateText(r[dateCol]); if (!d) continue; const key = `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, '0')}-${String(d.getDate()).padStart(2, '0')}`; if (!result.has(key)) result.set(key, { recordDate: d }); const obj = result.get(key)!; if (playCol >= 0) { const n = parseChineseNumberLike(r[playCol]); if (typeof n === 'number') (obj as any).playCount = n; } if (likeCol >= 0) { const n = parseChineseNumberLike(r[likeCol]); if (typeof n === 'number') (obj as any).likeCount = n; } if (commentCol >= 0) { const n = parseChineseNumberLike(r[commentCol]); if (typeof n === 'number') (obj as any).commentCount = n; } if (shareCol >= 0) { const n = parseChineseNumberLike(r[shareCol]); if (typeof n === 'number') (obj as any).shareCount = n; } if (fansIncCol >= 0) { const n = parseChineseNumberLike(r[fansIncCol]); if (typeof n === 'number') (obj as any).fansIncrease = n; } if (fansTotalCol >= 0) { const n = parseChineseNumberLike(r[fansTotalCol]); if (typeof n === 'number') (obj as any).fansCount = n; } } } return result; } export class WeixinVideoDataCenterImportService { private accountRepository = AppDataSource.getRepository(PlatformAccount); private userDayStatisticsService = new UserDayStatisticsService(); // 兼容 monorepo 从根目录/从 server 目录启动 private baseDir = path.basename(process.cwd()).toLowerCase() === 'server' ? process.cwd() : path.resolve(process.cwd(), 'server'); private downloadDir = path.resolve(this.baseDir, 'tmp', 'weixin-video-data-center'); private stateDir = path.resolve(this.baseDir, 'tmp', 'weixin-video-storage-state'); private getStatePath(accountId: number) { return path.join(this.stateDir, `${accountId}.json`); } private async ensureStorageState(account: PlatformAccount, cookies: PlaywrightCookie[]): Promise { const statePath = this.getStatePath(account.id); try { await fs.access(statePath); return statePath; } catch { // no state } if (!(process.env.WX_IMPORT_HEADLESS === '0' && process.env.WX_STORAGE_STATE_BOOTSTRAP === '1')) { return null; } await ensureDir(this.stateDir); logger.warn(`[WX Import] No storageState for accountId=${account.id}. Bootstrapping... 请在弹出的浏览器中完成登录/验证。`); const { browser, shouldClose } = await createBrowserForAccount(account.proxyConfig); try { const context = await browser.newContext({ viewport: { width: 1920, height: 1080 }, locale: 'zh-CN', timezoneId: 'Asia/Shanghai', }); await context.addCookies(cookies as any); const page = await context.newPage(); await page.goto('https://channels.weixin.qq.com/platform', { waitUntil: 'domcontentloaded' }); await page .waitForFunction(() => { const t = document.body?.innerText || ''; return t.includes('数据中心') || t.includes('关注者数据') || t.includes('视频数据'); }, { timeout: 5 * 60_000 }) .catch(() => undefined); await context.storageState({ path: statePath }); logger.info(`[WX Import] storageState saved: ${statePath}`); await context.close(); return statePath; } finally { if (shouldClose) await browser.close().catch(() => undefined); } } async runDailyImportForAllWeixinVideoAccounts(): Promise { await ensureDir(this.downloadDir); const accounts = await this.accountRepository.find({ where: { platform: 'weixin_video' as any } }); logger.info(`[WX Import] Start. total_accounts=${accounts.length}`); for (const account of accounts) { try { await this.importAccountLast30Days(account); } catch (e) { logger.error(`[WX Import] Account failed. accountId=${account.id} name=${account.accountName || ''}`, e); } } logger.info('[WX Import] Done.'); } async importAccountLast30Days(account: PlatformAccount): Promise { const cookies = parseCookiesFromAccount(account.cookieData); if (!cookies.length) throw new Error('cookieData 为空或无法解析'); const { browser, shouldClose } = await createBrowserForAccount(account.proxyConfig); try { const statePath = await this.ensureStorageState(account, cookies); logger.info( `[WX Import] Context init. accountId=${account.id} storageState=${statePath ? statePath : 'none'}` ); const context = await browser.newContext({ acceptDownloads: true, viewport: { width: 1920, height: 1080 }, locale: 'zh-CN', timezoneId: 'Asia/Shanghai', userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', ...(statePath ? { storageState: statePath } : {}), }); context.setDefaultTimeout(60_000); if (!statePath) await context.addCookies(cookies as any); const page = await context.newPage(); await page.goto('https://channels.weixin.qq.com/platform', { waitUntil: 'domcontentloaded' }); await page.waitForTimeout(1500); if (page.url().includes('login') || page.url().includes('passport')) { throw new Error('未登录/需要重新登录(跳转到登录页)'); } // 进入 数据中心 await page.getByText('数据中心', { exact: false }).first().click(); await page.waitForTimeout(800); // 目前只需要关注者数据 + 视频数据,图文数据暂不采集 const sections: WxSection[] = ['关注者数据', '视频数据']; let mergedDays = new Map>(); const tryClick = async (texts: string[]) => { for (const t of texts) { const loc = page.getByText(t, { exact: true }).first(); if ((await loc.count().catch(() => 0)) > 0) { await loc.click().catch(() => undefined); return true; } } for (const t of texts) { const loc = page.getByText(t, { exact: false }).first(); if ((await loc.count().catch(() => 0)) > 0) { await loc.click().catch(() => undefined); return true; } } return false; }; const exportSection = async (section: WxSection) => { const ok = await tryClick([section]); if (!ok) { logger.warn(`[WX Import] Section not found, skip. accountId=${account.id} section=${section}`); return; } await page.waitForTimeout(1200); // 进入 增长详情/数据详情(页面上可能显示“增长详情”或“数据详情”) await tryClick(['增长详情', '数据详情']); await page.waitForTimeout(800); // 日期范围:点击「近30天」 try { if (section === '关注者数据') { const loc = page.locator( '#container-wrap > div.container-center > div > div > div.follower-growth-wrap > div:nth-child(4) > div > div > div.card-body > div.filter-wrap > div > div.filter-content > div > div > div.weui-desktop-radio-group.radio-group > label:nth-child(2)' ); if ((await loc.count().catch(() => 0)) > 0) { await loc.click().catch(() => undefined); } else { await tryClick(['近30天', '近30日', '近30']); } } else if (section === '视频数据') { const loc = page.locator( '#container-wrap > div.container-center > div > div > div > div.post-total-wrap > div.post-statistic-common > div:nth-child(3) > div > div > div.card-body > div.filter-wrap > div:nth-child(2) > div.filter-content > div > div > div.weui-desktop-radio-group.radio-group > label:nth-child(2)' ); if ((await loc.count().catch(() => 0)) > 0) { await loc.click().catch(() => undefined); } else { await tryClick(['近30天', '近30日', '近30']); } } else { await tryClick(['近30天', '近30日', '近30']); } } catch { await tryClick(['近30天', '近30日', '近30']); } await page.waitForTimeout(4000); // 下载表格 const [download] = await Promise.all([ page.waitForEvent('download', { timeout: 60_000 }), tryClick(['下载表格', '下载', '导出数据']), ]); const filename = `${account.id}_${Date.now()}_${download.suggestedFilename()}`; const filePath = path.join(this.downloadDir, filename); await download.saveAs(filePath); try { const perDay = await parseWeixinVideoFile(filePath); for (const [k, v] of perDay.entries()) { if (!mergedDays.has(k)) mergedDays.set(k, { recordDate: v.recordDate }); Object.assign(mergedDays.get(k)!, v); } logger.info(`[WX Import] Section parsed. accountId=${account.id} section=${section} days=${perDay.size}`); } finally { if (process.env.KEEP_WX_XLSX === 'true') { logger.warn(`[WX Import] KEEP_WX_XLSX=true, keep file: ${filePath}`); } else { await fs.unlink(filePath).catch(() => undefined); } } }; for (const s of sections) { await exportSection(s); } let inserted = 0; let updated = 0; for (const v of mergedDays.values()) { const { recordDate, ...patch } = v; const r = await this.userDayStatisticsService.saveStatisticsForDate(account.id, recordDate, patch); inserted += r.inserted; updated += r.updated; } logger.info(`[WX Import] Account imported. accountId=${account.id} days=${mergedDays.size} inserted=${inserted} updated=${updated}`); await context.close(); } finally { if (shouldClose) await browser.close().catch(() => undefined); } } }