| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926 |
- import fs from 'node:fs/promises';
- import path from 'node:path';
- import { chromium, type Browser, type Page, type BrowserContext } from 'playwright';
- import * as XLSXNS from 'xlsx';
- import { AppDataSource, PlatformAccount } from '../models/index.js';
- import { BrowserManager } from '../automation/browser.js';
- import { logger } from '../utils/logger.js';
- import { UserDayStatisticsService } from './UserDayStatisticsService.js';
- import { AccountService } from './AccountService.js';
- import { getPythonServiceBaseUrl } from './PythonServiceConfigService.js';
- import type { ProxyConfig } from '@media-manager/shared';
- import { WS_EVENTS } from '@media-manager/shared';
- import { wsManager } from '../websocket/index.js';
- // xlsx 在 ESM 下可能挂在 default 上;这里做一次兼容兜底
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
- const XLSX: any = (XLSXNS as any).default ?? (XLSXNS as any);
- type PlaywrightCookie = {
- name: string;
- value: string;
- domain?: string;
- path?: string;
- url?: string;
- expires?: number;
- httpOnly?: boolean;
- secure?: boolean;
- sameSite?: 'Lax' | 'None' | 'Strict';
- };
- function ensureDir(p: string) {
- return fs.mkdir(p, { recursive: true });
- }
- function normalizeDateText(input: unknown): Date | null {
- if (!input) return null;
- const s = String(input).trim();
- if (!s) return null;
- // 20260115 / 2026-01-15 / 2026/01/15
- const mCompact = s.match(/^(\d{4})(\d{2})(\d{2})$/);
- if (mCompact) {
- const yyyy = Number(mCompact[1]);
- const mm = Number(mCompact[2]);
- const dd = Number(mCompact[3]);
- if (!yyyy || !mm || !dd) return null;
- const d = new Date(yyyy, mm - 1, dd);
- d.setHours(0, 0, 0, 0);
- return d;
- }
- const m1 = s.match(/(\d{4})\D(\d{1,2})\D(\d{1,2})/);
- if (m1) {
- const yyyy = Number(m1[1]);
- const mm = Number(m1[2]);
- const dd = Number(m1[3]);
- if (!yyyy || !mm || !dd) return null;
- const d = new Date(yyyy, mm - 1, dd);
- d.setHours(0, 0, 0, 0);
- return d;
- }
- return null;
- }
- function parseChineseNumberLike(input: unknown): number | null {
- if (input === null || input === undefined) return null;
- const s = String(input).trim();
- if (!s) return null;
- const plain = s.replace(/,/g, '');
- const wan = plain.match(/^(\d+(\.\d+)?)\s*万$/);
- if (wan) return Math.round(Number(wan[1]) * 10000);
- const yi = plain.match(/^(\d+(\.\d+)?)\s*亿$/);
- if (yi) return Math.round(Number(yi[1]) * 100000000);
- const n = Number(plain.replace(/[^\d.-]/g, ''));
- if (Number.isFinite(n)) return Math.round(n);
- return null;
- }
- function parseCookiesFromAccount(cookieData: string | null): PlaywrightCookie[] {
- if (!cookieData) return [];
- const raw = cookieData.trim();
- if (!raw) return [];
- // 1) JSON array
- if (raw.startsWith('[') || raw.startsWith('{')) {
- try {
- const parsed = JSON.parse(raw);
- const arr = Array.isArray(parsed) ? parsed : (parsed?.cookies ? parsed.cookies : []);
- if (!Array.isArray(arr)) return [];
- return arr
- .map((c: any) => {
- const name = String(c?.name ?? '').trim();
- const value = String(c?.value ?? '').trim();
- if (!name) return null;
- const domain = c?.domain ? String(c.domain) : undefined;
- const pathVal = c?.path ? String(c.path) : '/';
- const url = !domain ? 'https://baijiahao.baidu.com' : undefined;
- const sameSiteRaw = c?.sameSite;
- const sameSite =
- sameSiteRaw === 'Lax' || sameSiteRaw === 'None' || sameSiteRaw === 'Strict'
- ? sameSiteRaw
- : undefined;
- return {
- name,
- value,
- domain,
- path: pathVal,
- url,
- expires: typeof c?.expires === 'number' ? c.expires : undefined,
- httpOnly: typeof c?.httpOnly === 'boolean' ? c.httpOnly : undefined,
- secure: typeof c?.secure === 'boolean' ? c.secure : undefined,
- sameSite,
- } satisfies PlaywrightCookie;
- })
- .filter(Boolean) as PlaywrightCookie[];
- } catch {
- // fallthrough
- }
- }
- // 2) "a=b; c=d"
- const pairs = raw.split(';').map((p) => p.trim()).filter(Boolean);
- const cookies: PlaywrightCookie[] = [];
- for (const p of pairs) {
- const idx = p.indexOf('=');
- if (idx <= 0) continue;
- const name = p.slice(0, idx).trim();
- const value = p.slice(idx + 1).trim();
- if (!name) continue;
- cookies.push({ name, value, url: 'https://baijiahao.baidu.com' });
- }
- return cookies;
- }
- async function createBrowserForAccount(proxy: ProxyConfig | null): Promise<{ browser: Browser; shouldClose: boolean }> {
- // 静默同步:默认一律 headless,不弹窗
- // 只有在“引导登录/验证”时(BJ_STORAGE_STATE_BOOTSTRAP=1 且 BJ_IMPORT_HEADLESS=0)才允许 headful
- const allowHeadfulForBootstrap =
- process.env.BJ_STORAGE_STATE_BOOTSTRAP === '1' && process.env.BJ_IMPORT_HEADLESS === '0';
- const headless = !allowHeadfulForBootstrap;
- if (proxy?.enabled) {
- const server = `${proxy.type}://${proxy.host}:${proxy.port}`;
- const browser = await chromium.launch({
- headless,
- proxy: {
- server,
- username: proxy.username,
- password: proxy.password,
- },
- args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-gpu', '--window-size=1920,1080'],
- });
- return { browser, shouldClose: true };
- }
- const browser = await BrowserManager.getBrowser({ headless });
- return { browser, shouldClose: false };
- }
- function parseBaijiahaoExcel(
- filePath: string
- ): Map<string, { recordDate: Date } & Record<string, any>> {
- const wb = XLSX.readFile(filePath);
- const result = new Map<string, { recordDate: Date } & Record<string, any>>();
- logger.info(
- `[BJ Import] Excel loaded. file=${path.basename(filePath)} sheets=${wb.SheetNames.join(' | ')}`
- );
- for (const sheetName of wb.SheetNames) {
- const sheet = wb.Sheets[sheetName];
- const rows = XLSX.utils.sheet_to_json<Record<string, any>>(sheet, { defval: '' });
- if (!rows.length) {
- logger.warn(`[BJ Import] Sheet empty. name=${sheetName}`);
- continue;
- }
- const keys = Object.keys(rows[0] || {});
- logger.info(
- `[BJ Import] Sheet parsed. name=${sheetName} rows=${rows.length} keys=${keys.join(',')}`
- );
- // 百家号 Excel 为 GBK 编码,列名在 node 环境下会变成乱码(但列顺序稳定),所以这里按“列位置”做映射:
- // 0: 日期(形如 20260115)
- // 1: 阅读量
- // 2: 点击率
- // 3: 互动率
- // 4: 评论量
- // 5: 评论率(%)
- // 6: 点赞量
- // 7: 点赞率(%)
- // 8: 收藏量
- // 9: 收藏率(%)
- // 10: 分享量
- // 11: 分享率(%)
- // 12: 作品涨粉量
- // 13: 作品涨粉率
- // 14: 作品脱粉量
- // ... 其余列暂不入库
- for (const row of rows) {
- const cols = Object.keys(row || {});
- if (!cols.length) continue;
- const dateVal = (row as any)[cols[0]];
- const d = normalizeDateText(dateVal);
- if (!d) continue;
- const key = `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, '0')}-${String(
- d.getDate()
- ).padStart(2, '0')}`;
- if (!result.has(key)) result.set(key, { recordDate: d });
- const obj = result.get(key)!;
- const safeGet = (idx: number): any =>
- idx >= 0 && idx < cols.length ? (row as any)[cols[idx]] : undefined;
- // 阅读量 → playCount
- const readCount = parseChineseNumberLike(safeGet(1));
- if (typeof readCount === 'number') (obj as any).playCount = readCount;
- // 点赞量 → likeCount
- const likeCount = parseChineseNumberLike(safeGet(6));
- if (typeof likeCount === 'number') (obj as any).likeCount = likeCount;
- // 评论量 → commentCount
- const commentCount = parseChineseNumberLike(safeGet(4));
- if (typeof commentCount === 'number') (obj as any).commentCount = commentCount;
- // 收藏量 → collectCount
- const collectCount = parseChineseNumberLike(safeGet(8));
- if (typeof collectCount === 'number') (obj as any).collectCount = collectCount;
- // 分享量 → shareCount
- const shareCount = parseChineseNumberLike(safeGet(10));
- if (typeof shareCount === 'number') (obj as any).shareCount = shareCount;
- // 点击率 → coverClickRate(不为 0 时加 %)
- const clickRateRaw = safeGet(2);
- const coverClickRate = formatRateWithPercent(clickRateRaw);
- if (coverClickRate !== '0') (obj as any).coverClickRate = coverClickRate;
- // fans_increase 只看作品涨粉量(不再扣除作品脱粉量)
- const inc = parseChineseNumberLike(safeGet(12));
- if (typeof inc === 'number') {
- (obj as any).fansIncrease = inc;
- }
- }
- }
- return result;
- }
- /** 比率:不为 0 时加上 %,为 0 或空返回 '0' */
- function formatRateWithPercent(v: unknown): string {
- if (v === null || v === undefined) return '0';
- const s = String(v).trim();
- if (!s) return '0';
- const n = Number(s.replace(/,/g, ''));
- if (!Number.isFinite(n) || n === 0) return '0';
- if (s.includes('%')) return s;
- if (n > 0 && n <= 1) return `${(n * 100).toFixed(2)}%`;
- return `${Number(n.toFixed(2))}%`;
- }
- function formatPercentString(input: unknown): string | null {
- if (input === null || input === undefined) return null;
- const s = String(input).trim();
- if (!s) return null;
- if (s.includes('%')) return s;
- const n = Number(s);
- if (!Number.isFinite(n)) return null;
- // 0.0423 -> 4.23%
- if (n >= 0 && n <= 1) return `${(n * 100).toFixed(2)}%`;
- // 4.23 -> 4.23%
- return `${n.toFixed(2)}%`;
- }
- function findArrayWithDateLikeField(root: any): { arr: any[]; dateKey: string } | null {
- const seen = new Set<any>();
- const queue: any[] = [root];
- const isDateLike = (v: any) => {
- if (v === null || v === undefined) return false;
- if (typeof v === 'number') return String(v).match(/^\d{8}$/);
- const s = String(v).trim();
- return /^\d{8}$/.test(s) || /^\d{4}[-/]\d{1,2}[-/]\d{1,2}$/.test(s);
- };
- const dateKeyCandidates = ['event_day', 'day', 'date', 'stat_day', 'statDay', 'dt', 'time', 'the_day'];
- const candidates: Array<{ arr: any[]; dateKey: string }> = [];
- while (queue.length) {
- const cur = queue.shift();
- if (!cur || typeof cur !== 'object') continue;
- if (seen.has(cur)) continue;
- seen.add(cur);
- if (Array.isArray(cur)) {
- // 数组元素为对象且含日期字段
- for (const item of cur) {
- if (!item || typeof item !== 'object') continue;
- const keys = Object.keys(item);
- for (const dk of dateKeyCandidates) {
- if (keys.includes(dk) && isDateLike((item as any)[dk])) {
- candidates.push({ arr: cur, dateKey: dk });
- break;
- }
- }
- // 兜底:任意字段像日期
- for (const k of keys) {
- if (isDateLike((item as any)[k])) {
- candidates.push({ arr: cur, dateKey: k });
- break;
- }
- }
- }
- } else {
- for (const v of Object.values(cur)) {
- if (v && typeof v === 'object') queue.push(v);
- }
- }
- }
- if (!candidates.length) return null;
- candidates.sort((a, b) => (b.arr?.length ?? 0) - (a.arr?.length ?? 0));
- return candidates[0]!;
- }
- function parseBaijiahaoAppStatisticV3(json: any): Map<string, { recordDate: Date } & Record<string, any>> {
- const result = new Map<string, { recordDate: Date } & Record<string, any>>();
- const found = findArrayWithDateLikeField(json);
- if (!found) return result;
- const { arr, dateKey } = found;
- const pickNumber = (obj: any, keys: string[]): number | null => {
- for (const k of keys) {
- if (obj?.[k] === undefined || obj?.[k] === null) continue;
- const n = parseChineseNumberLike(obj[k]);
- if (typeof n === 'number') return n;
- }
- return null;
- };
- const pickString = (obj: any, keys: string[]): string | null => {
- for (const k of keys) {
- if (obj?.[k] === undefined || obj?.[k] === null) continue;
- const s = String(obj[k]).trim();
- if (s) return s;
- }
- return null;
- };
- for (const item of arr) {
- if (!item || typeof item !== 'object') continue;
- const d = normalizeDateText(item[dateKey]);
- if (!d) continue;
- const key = `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, '0')}-${String(d.getDate()).padStart(2, '0')}`;
- if (!result.has(key)) result.set(key, { recordDate: d });
- const obj = result.get(key)!;
- // 阅读量 → playCount(百家号 appStatisticV3 使用 view_count)
- const play = pickNumber(item, ['view_count', 'read_cnt', 'readCount', 'read', 'pv', 'view_cnt', 'viewCount', 'views']);
- if (typeof play === 'number') (obj as any).playCount = play;
- // 点赞量 → likeCount(百家号 API 使用 likes_count)
- const like = pickNumber(item, ['likes_count', 'like_cnt', 'praise_cnt', 'praise', 'likeCount', 'likes']);
- if (typeof like === 'number') (obj as any).likeCount = like;
- // 评论量 → commentCount(百家号 API 使用 comment_count)
- const comment = pickNumber(item, ['comment_count', 'comment_cnt', 'commentCount', 'comments']);
- if (typeof comment === 'number') (obj as any).commentCount = comment;
- // 收藏量 → collectCount(百家号 API 字段为 collect_count)
- const collect = pickNumber(item, ['collect_count', 'collect_cnt', 'favorite_cnt', 'fav_cnt', 'collectCount', 'favorites']);
- if (typeof collect === 'number') (obj as any).collectCount = collect;
- // 分享量 → shareCount(百家号 API 使用 share_count)
- const share = pickNumber(item, ['share_count', 'share_cnt', 'shareCount', 'shares']);
- if (typeof share === 'number') (obj as any).shareCount = share;
- // 点击率 → coverClickRate
- const clickRateRaw =
- pickString(item, ['click_rate', 'ctr', 'clickRate']) ??
- (typeof pickNumber(item, ['click_rate', 'ctr', 'clickRate']) === 'number'
- ? String(pickNumber(item, ['click_rate', 'ctr', 'clickRate']))
- : null);
- const clickRate = formatPercentString(clickRateRaw);
- if (clickRate) (obj as any).coverClickRate = clickRate;
- // 作品涨粉量 → fansIncrease(百家号 API 使用 fans_increase / fans_add_cnt)
- const fansInc = pickNumber(item, ['fans_increase', 'fans_add_cnt', 'works_fans_inc', 'worksFansInc', 'content_fans_inc', 'fans_inc', 'fansIncrease']);
- if (typeof fansInc === 'number') (obj as any).fansIncrease = fansInc;
- }
- return result;
- }
- export class BaijiahaoContentOverviewImportService {
- private accountRepository = AppDataSource.getRepository(PlatformAccount);
- private userDayStatisticsService = new UserDayStatisticsService();
- private downloadDir = path.resolve(process.cwd(), 'tmp', 'baijiahao-content-overview');
- private stateDir = path.resolve(process.cwd(), 'tmp', 'baijiahao-storage-state');
- private getStatePath(accountId: number) {
- return path.join(this.stateDir, `${accountId}.json`);
- }
- private async ensureStorageState(
- account: PlatformAccount,
- cookies: PlaywrightCookie[]
- ): Promise<string | null> {
- const statePath = this.getStatePath(account.id);
- try {
- await fs.access(statePath);
- return statePath;
- } catch {
- // no state
- }
- // 需要你在弹出的浏览器里完成一次登录/验证,然后脚本会自动保存 storageState
- // 启用方式:BJ_IMPORT_HEADLESS=0 且 BJ_STORAGE_STATE_BOOTSTRAP=1
- if (!(process.env.BJ_IMPORT_HEADLESS === '0' && process.env.BJ_STORAGE_STATE_BOOTSTRAP === '1')) {
- return null;
- }
- await ensureDir(this.stateDir);
- logger.warn(
- `[BJ Import] No storageState for accountId=${account.id}. Bootstrapping... 请在弹出的浏览器中完成登录/验证。`
- );
- const { browser, shouldClose } = await createBrowserForAccount(account.proxyConfig);
- try {
- const context = await browser.newContext({
- viewport: { width: 1920, height: 1080 },
- locale: 'zh-CN',
- timezoneId: 'Asia/Shanghai',
- });
- await context.addCookies(cookies as any);
- const page = await context.newPage();
- await page.goto('https://baijiahao.baidu.com/builder/rc/analysiscontent', {
- waitUntil: 'domcontentloaded',
- });
- // 最长等 5 分钟:让你手动完成登录/短信等
- await page
- .waitForFunction(() => {
- const t = document.body?.innerText || '';
- return t.includes('数据中心') || t.includes('内容分析') || t.includes('基础数据');
- }, { timeout: 5 * 60_000 })
- .catch(() => undefined);
- await context.storageState({ path: statePath });
- logger.info(`[BJ Import] storageState saved: ${statePath}`);
- await context.close();
- return statePath;
- } finally {
- if (shouldClose) await browser.close().catch(() => undefined);
- }
- }
- /**
- * 通过 Python 调用 appStatisticV3(登录模式与打开后台一致:使用账号已存 Cookie)
- */
- private async fetchAppStatisticV3ViaPython(
- account: PlatformAccount,
- startDay: string,
- endDay: string
- ): Promise<Record<string, unknown>> {
- const base = (await getPythonServiceBaseUrl()).replace(/\/$/, '');
- const url = `${base}/baijiahao/app_statistic_v3`;
- const cookie = String(account.cookieData || '').trim();
- if (!cookie) throw new Error('百家号账号 cookie 为空,无法调用 Python app_statistic_v3');
- const controller = new AbortController();
- const timeoutId = setTimeout(() => controller.abort(), 30_000);
- try {
- const res = await fetch(url, {
- method: 'POST',
- signal: controller.signal,
- headers: { 'Content-Type': 'application/json' },
- body: JSON.stringify({ cookie, start_day: startDay, end_day: endDay }),
- });
- const text = await res.text();
- const data = text ? (JSON.parse(text) as Record<string, unknown>) : {};
- if (!res.ok) {
- const msg = String(data?.errmsg || data?.error || '').trim() || `HTTP ${res.status}`;
- throw new Error(`Python app_statistic_v3 调用失败: ${msg}`);
- }
- return data;
- } finally {
- clearTimeout(timeoutId);
- }
- }
- /**
- * 通过 Python 调用 getFansBasicInfo(登录模式与打开后台一致)
- */
- private async fetchFansBasicInfoViaPython(
- account: PlatformAccount,
- start: string,
- end: string
- ): Promise<Record<string, unknown>> {
- const base = (await getPythonServiceBaseUrl()).replace(/\/$/, '');
- const url = `${base}/baijiahao/fans_basic_info`;
- const cookie = String(account.cookieData || '').trim();
- if (!cookie) throw new Error('百家号账号 cookie 为空,无法调用 Python fans_basic_info');
- const controller = new AbortController();
- const timeoutId = setTimeout(() => controller.abort(), 30_000);
- try {
- const res = await fetch(url, {
- method: 'POST',
- signal: controller.signal,
- headers: { 'Content-Type': 'application/json' },
- body: JSON.stringify({ cookie, start, end }),
- });
- const text = await res.text();
- const data = text ? (JSON.parse(text) as Record<string, unknown>) : {};
- if (!res.ok) {
- const msg = String(data?.errmsg || data?.error || '').trim() || `HTTP ${res.status}`;
- throw new Error(`Python fans_basic_info 调用失败: ${msg}`);
- }
- return data;
- } finally {
- clearTimeout(timeoutId);
- }
- }
- /**
- * 统一入口:定时任务与添加账号均调用此方法,执行“内容分析-基础数据-近30天 + 粉丝 getFansBasicInfo”
- */
- static async runDailyImport(): Promise<void> {
- const svc = new BaijiahaoContentOverviewImportService();
- await svc.runDailyImportForAllBaijiahaoAccounts();
- }
- /**
- * 为所有百家号账号导出“数据中心-内容分析-基础数据-近30天”并导入 user_day_statistics
- */
- async runDailyImportForAllBaijiahaoAccounts(): Promise<void> {
- await ensureDir(this.downloadDir);
- const accounts = await this.accountRepository.find({
- where: { platform: 'baijiahao' as any },
- });
- logger.info(`[BJ Import] Start. total_accounts=${accounts.length}`);
- for (const account of accounts) {
- try {
- await this.importAccountLast30Days(account);
- } catch (e) {
- logger.error(
- `[BJ Import] Account failed. accountId=${account.id} name=${account.accountName || ''}`,
- e
- );
- }
- }
- logger.info('[BJ Import] Done.');
- }
- /**
- * 单账号:优先 Python+Node(登录与打开后台一致,使用账号已存 Cookie);失败则刷新重试一次,再失败则浏览器兜底
- */
- async importAccountLast30Days(account: PlatformAccount, isRetry = false): Promise<void> {
- const cookies = parseCookiesFromAccount(account.cookieData);
- if (!cookies.length) throw new Error('cookieData 为空或无法解析');
- const end = new Date();
- end.setHours(0, 0, 0, 0);
- end.setDate(end.getDate() - 1);
- const start = new Date(end);
- start.setDate(start.getDate() - 29);
- const fmt = (d: Date) =>
- `${d.getFullYear()}${String(d.getMonth() + 1).padStart(2, '0')}${String(d.getDate()).padStart(2, '0')}`;
- const start_day = fmt(start);
- const end_day = fmt(end);
- const chinaTz = 'Asia/Shanghai';
- const toChinaYMD = (date: Date): { y: number; m: number; d: number } => {
- const formatter = new Intl.DateTimeFormat('en-CA', {
- timeZone: chinaTz,
- year: 'numeric',
- month: '2-digit',
- day: '2-digit',
- });
- const parts = formatter.formatToParts(date);
- const get = (type: string) => parts.find((p) => p.type === type)?.value ?? '0';
- return {
- y: parseInt(get('year'), 10),
- m: parseInt(get('month'), 10),
- d: parseInt(get('day'), 10),
- };
- };
- const now = new Date();
- const today = toChinaYMD(now);
- const yesterdayDate = new Date(
- Date.UTC(today.y, today.m - 1, today.d, 0, 0, 0, 0) - 24 * 60 * 60 * 1000
- );
- const startDate = new Date(yesterdayDate.getTime() - 29 * 24 * 60 * 60 * 1000);
- const endYMD = toChinaYMD(yesterdayDate);
- const startYMD = toChinaYMD(startDate);
- const pad = (n: number) => String(n).padStart(2, '0');
- const startStr = `${startYMD.y}${pad(startYMD.m)}${pad(startYMD.d)}`;
- const endStr = `${endYMD.y}${pad(endYMD.m)}${pad(endYMD.d)}`;
- // 优先 Python(登录与打开后台一致:仅用账号已存 Cookie,不启浏览器)
- try {
- const data = await this.fetchAppStatisticV3ViaPython(account, start_day, end_day);
- const errno = typeof data?.errno === 'number' ? data.errno : Number(data?.errno ?? -1);
- if (errno !== 0) throw new Error(data?.errmsg ? String(data.errmsg) : 'appStatisticV3 errno !== 0');
- const perDay = parseBaijiahaoAppStatisticV3(data);
- if (perDay.size === 0) throw new Error('appStatisticV3 解析后无数据');
- let inserted = 0;
- let updated = 0;
- for (const v of perDay.values()) {
- const { recordDate, ...patch } = v;
- const r = await this.userDayStatisticsService.saveStatisticsForDate(account.id, recordDate, patch);
- inserted += r.inserted;
- updated += r.updated;
- }
- logger.info(
- `[BJ Import] basic-data (via Python). accountId=${account.id} days=${perDay.size} inserted=${inserted} updated=${updated}`
- );
- try {
- const fansBody = await this.fetchFansBasicInfoViaPython(account, startStr, endStr);
- const fansErrno = (fansBody as any).errno;
- if (fansErrno === 0 || fansErrno === undefined) {
- const list = this.parseGetFansBasicInfoResponse(fansBody as Record<string, unknown>);
- let fansUpdated = 0;
- for (const { recordDate, fansCount, fansIncrease } of list) {
- const r = await this.userDayStatisticsService.saveStatisticsForDate(
- account.id,
- recordDate,
- { fansCount, fansIncrease }
- );
- fansUpdated += r.inserted + r.updated;
- }
- logger.info(`[BJ Import] Fans data (via Python). accountId=${account.id} days=${list.length} updated=${fansUpdated}`);
- }
- } catch (e) {
- logger.warn(`[BJ Import] Fans via Python failed (non-fatal). accountId=${account.id}`, e instanceof Error ? e.message : e);
- }
- return;
- } catch (pythonError) {
- logger.warn(
- `[BJ Import] Python path failed, fallback to browser. accountId=${account.id}`,
- pythonError instanceof Error ? pythonError.message : pythonError
- );
- }
- if (!isRetry) {
- try {
- const accountService = new AccountService();
- const refreshResult = await accountService.refreshAccount(account.userId, account.id);
- if (!refreshResult.needReLogin) {
- const refreshedAccount = await this.accountRepository.findOne({ where: { id: account.id } });
- if (refreshedAccount) {
- logger.info(`[BJ Import] Account ${account.id} refreshed, retrying import...`);
- return await this.importAccountLast30Days(refreshedAccount, true);
- }
- }
- } catch (refreshError) {
- logger.error(`[BJ Import] Account ${account.id} refresh failed:`, refreshError);
- }
- }
- // 浏览器兜底:原有逻辑不变
- const { browser, shouldClose } = await createBrowserForAccount(account.proxyConfig);
- try {
- const statePath = await this.ensureStorageState(account, cookies);
- const context = await browser.newContext({
- acceptDownloads: true,
- viewport: { width: 1920, height: 1080 },
- locale: 'zh-CN',
- timezoneId: 'Asia/Shanghai',
- userAgent:
- 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
- ...(statePath ? { storageState: statePath } : {}),
- });
- context.setDefaultTimeout(60_000);
- if (!statePath) await context.addCookies(cookies as any);
- const page = await context.newPage();
- await page.goto('https://baijiahao.baidu.com/builder/rc/analysiscontent', { waitUntil: 'domcontentloaded' });
- await page.waitForTimeout(1500);
- if (page.url().includes('passport') || page.url().includes('login')) {
- if (!isRetry) {
- logger.info(`[BJ Import] Login expired for account ${account.id}, attempting refresh...`);
- await context.close();
- if (shouldClose) await browser.close();
- try {
- const accountService = new AccountService();
- const refreshResult = await accountService.refreshAccount(account.userId, account.id);
- if (refreshResult.needReLogin) {
- logger.warn(`[BJ Import] Account ${account.id} refresh failed, still needs re-login`);
- throw new Error('未登录/需要重新登录(跳转到登录页)');
- }
- const refreshedAccount = await this.accountRepository.findOne({ where: { id: account.id } });
- if (!refreshedAccount) throw new Error('账号刷新后未找到');
- return await this.importAccountLast30Days(refreshedAccount, true);
- } catch (refreshError) {
- logger.error(`[BJ Import] Account ${account.id} refresh failed:`, refreshError);
- throw new Error('未登录/需要重新登录(跳转到登录页)');
- }
- }
- throw new Error('未登录/需要重新登录(跳转到登录页)');
- }
- const bodyText = (await page.textContent('body').catch(() => '')) || '';
- if (bodyText.includes('暂无数据') || bodyText.includes('无权访问')) {
- await this.accountRepository.update(account.id, { status: 'expired' as any });
- wsManager.sendToUser(account.userId, WS_EVENTS.ACCOUNT_UPDATED, {
- account: { id: account.id, status: 'expired', platform: 'baijiahao' },
- });
- wsManager.sendToUser(account.userId, WS_EVENTS.SYSTEM_MESSAGE, {
- level: 'warning',
- message: `百家号账号「${account.accountName || account.accountId || account.id}」暂无数据看板访问权限,请到百家号后台检查数据权限。`,
- platform: 'baijiahao',
- accountId: account.id,
- });
- throw new Error('百家号数据看板暂无访问权限/暂无数据,已标记 expired 并通知用户');
- }
- await page.getByText('数据中心', { exact: false }).first().click().catch(() => undefined);
- await page.getByText('内容分析', { exact: false }).first().click().catch(() => undefined);
- await page.getByText('基础数据', { exact: false }).first().click().catch(() => undefined);
- try {
- const trigger = page.getByText(/近\d+天?/, { exact: false }).first();
- if ((await trigger.count()) > 0) await trigger.click().catch(() => undefined);
- const thirtyDay =
- (await page.getByText('近30天', { exact: true }).first().count()) > 0
- ? page.getByText('近30天', { exact: true }).first()
- : page.getByText('近30日', { exact: false }).first();
- await thirtyDay.click().catch(() => undefined);
- await page.waitForTimeout(5000);
- } catch (e) {
- logger.warn(`[BJ Import] Unable to switch to 近30天. accountId=${account.id}`, e);
- }
- let perDay = new Map<string, { recordDate: Date } & Record<string, any>>();
- let inserted = 0;
- let updated = 0;
- const tryFetchApi = async () => {
- const apiUrl = `https://baijiahao.baidu.com/author/eco/statistics/appStatisticV3?type=all&start_day=${start_day}&end_day=${end_day}&stat=0&special_filter_days=30`;
- const res = await (context as any).request.get(apiUrl, {
- headers: { Referer: 'https://baijiahao.baidu.com/builder/rc/analysiscontent' },
- });
- if (!res.ok()) throw new Error(`appStatisticV3 http ${res.status()}`);
- const json = await res.json().catch(() => null);
- if (!json) throw new Error('appStatisticV3 json parse failed');
- if (process.env.BJ_IMPORT_DEBUG === '1') {
- const debugPath = path.join(this.downloadDir, `appStatisticV3_response_${account.id}_${Date.now()}.json`);
- await ensureDir(this.downloadDir);
- await fs.writeFile(debugPath, JSON.stringify(json, null, 2), 'utf-8');
- logger.info(`[BJ Import] DEBUG: appStatisticV3 原始响应已写入 ${debugPath}`);
- }
- return parseBaijiahaoAppStatisticV3(json);
- };
- try {
- perDay = await tryFetchApi();
- } catch (e) {
- logger.warn(`[BJ Import] appStatisticV3 failed, fallback to Excel. accountId=${account.id}`, e);
- }
- let filePath: string | null = null;
- if (perDay.size === 0) {
- const [download] = await Promise.all([
- page.waitForEvent('download', { timeout: 60_000 }),
- page.getByText('导出数据', { exact: true }).first().click(),
- ]);
- filePath = path.join(this.downloadDir, `${account.id}_${Date.now()}_${download.suggestedFilename()}`);
- await download.saveAs(filePath);
- perDay = parseBaijiahaoExcel(filePath);
- } else if (perDay.size < 20) {
- const [download] = await Promise.all([
- page.waitForEvent('download', { timeout: 60_000 }),
- page.getByText('导出数据', { exact: true }).first().click(),
- ]);
- filePath = path.join(this.downloadDir, `${account.id}_${Date.now()}_${download.suggestedFilename()}`);
- await download.saveAs(filePath);
- const excelMap = parseBaijiahaoExcel(filePath);
- for (const [k, v] of excelMap.entries()) {
- if (!perDay.has(k)) perDay.set(k, v);
- }
- }
- try {
- for (const v of perDay.values()) {
- const { recordDate, ...patch } = v;
- const r = await this.userDayStatisticsService.saveStatisticsForDate(account.id, recordDate, patch);
- inserted += r.inserted;
- updated += r.updated;
- }
- logger.info(`[BJ Import] basic-data (browser). accountId=${account.id} days=${perDay.size} inserted=${inserted} updated=${updated}`);
- } finally {
- if (filePath && process.env.KEEP_BJ_XLSX !== 'true') await fs.unlink(filePath).catch(() => undefined);
- }
- try {
- await this.importFansDataByApi(context, account);
- } catch (e) {
- logger.warn(`[BJ Import] Fans import failed (non-fatal). accountId=${account.id}`, e instanceof Error ? e.message : e);
- }
- await context.close();
- } finally {
- if (shouldClose) await browser.close().catch(() => undefined);
- }
- }
- /**
- * 粉丝数据:直接请求 getFansBasicInfo(近30天 = 中国时区昨天为结束,往前推 30 天),不打开页面
- * sum_fans_count → fans_count,new_fans_count → fans_increase
- * 使用中国时区计算日期,避免服务器非东八区时只拿到部分天数
- */
- private async importFansDataByApi(context: BrowserContext, account: PlatformAccount): Promise<void> {
- const chinaTz = 'Asia/Shanghai';
- const toChinaYMD = (date: Date): { y: number; m: number; d: number } => {
- const formatter = new Intl.DateTimeFormat('en-CA', { timeZone: chinaTz, year: 'numeric', month: '2-digit', day: '2-digit' });
- const parts = formatter.formatToParts(date);
- const get = (type: string) => parts.find((p) => p.type === type)?.value ?? '0';
- return { y: parseInt(get('year'), 10), m: parseInt(get('month'), 10), d: parseInt(get('day'), 10) };
- };
- const now = new Date();
- const today = toChinaYMD(now);
- const yesterdayDate = new Date(Date.UTC(today.y, today.m - 1, today.d, 0, 0, 0, 0) - 24 * 60 * 60 * 1000);
- const startDate = new Date(yesterdayDate.getTime() - 29 * 24 * 60 * 60 * 1000);
- const endYMD = toChinaYMD(yesterdayDate);
- const startYMD = toChinaYMD(startDate);
- const pad = (n: number) => String(n).padStart(2, '0');
- const startStr = `${startYMD.y}${pad(startYMD.m)}${pad(startYMD.d)}`;
- const endStr = `${endYMD.y}${pad(endYMD.m)}${pad(endYMD.d)}`;
- const apiUrl = `https://baijiahao.baidu.com/author/eco/statistics/getFansBasicInfo?start=${startStr}&end=${endStr}&fans_type=new%2Csum&sort=asc&is_page=0&show_type=chart`;
- logger.info(`[BJ Import] getFansBasicInfo range (China). accountId=${account.id} start=${startStr} end=${endStr}`);
- let body: Record<string, unknown> | null = null;
- try {
- const res = await (context as any).request.get(apiUrl, {
- headers: { Referer: 'https://baijiahao.baidu.com/builder/rc/analysisfans/basedata' },
- });
- if (res.ok()) body = await res.json().catch(() => null);
- } catch (e) {
- logger.warn(`[BJ Import] getFansBasicInfo request failed. accountId=${account.id}`, e);
- return;
- }
- if (!body || typeof body !== 'object') {
- logger.warn(`[BJ Import] getFansBasicInfo response not valid JSON, skip. accountId=${account.id}`);
- return;
- }
- const errno = (body as any).errno;
- if (errno !== 0 && errno !== undefined) {
- logger.warn(`[BJ Import] getFansBasicInfo errno=${errno}, skip. accountId=${account.id}`);
- return;
- }
- const list = this.parseGetFansBasicInfoResponse(body);
- if (!list.length) {
- logger.info(`[BJ Import] No fans data from getFansBasicInfo. accountId=${account.id}`);
- return;
- }
- const firstDay = list[0]?.recordDate;
- const lastDay = list[list.length - 1]?.recordDate;
- const fmtDay = (d: Date) => (d ? `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, '0')}-${String(d.getDate()).padStart(2, '0')}` : '');
- logger.info(`[BJ Import] getFansBasicInfo response. accountId=${account.id} count=${list.length} first=${fmtDay(firstDay)} last=${fmtDay(lastDay)}`);
- let updated = 0;
- for (const { recordDate, fansCount, fansIncrease } of list) {
- const r = await this.userDayStatisticsService.saveStatisticsForDate(account.id, recordDate, {
- fansCount,
- fansIncrease,
- });
- updated += r.inserted + r.updated;
- }
- logger.info(`[BJ Import] Fans data imported. accountId=${account.id} days=${list.length} updated=${updated}`);
- }
- /**
- * 解析 getFansBasicInfo 接口返回,提取 (recordDate, fansCount, fansIncrease) 列表
- * sum_fans_count → fans_count,new_fans_count → fans_increase;"--" 或无效值跳过或按 0 处理
- */
- private parseGetFansBasicInfoResponse(
- body: Record<string, unknown>
- ): Array<{ recordDate: Date; fansCount: number; fansIncrease: number }> {
- const list: Array<{ recordDate: Date; fansCount: number; fansIncrease: number }> = [];
- const data = body.data as Record<string, unknown> | undefined;
- if (!data || typeof data !== 'object') return list;
- const arr = data.list as unknown[] | undefined;
- if (!Array.isArray(arr)) return list;
- for (const item of arr) {
- if (!item || typeof item !== 'object') continue;
- const o = item as Record<string, unknown>;
- const dayRaw = o.day;
- if (dayRaw == null) continue;
- const dayStr = String(dayRaw).trim();
- if (!/^\d{8}$/.test(dayStr)) continue;
- const d = normalizeDateText(dayStr);
- if (!d) continue;
- const sumRaw = o.sum_fans_count;
- const newRaw = o.new_fans_count;
- const toNum = (v: unknown): number => {
- if (v === null || v === undefined) return 0;
- if (typeof v === 'number' && Number.isFinite(v)) return Math.max(0, Math.round(v));
- const s = String(v).trim();
- if (s === '' || s === '--') return 0;
- const n = Number(s.replace(/,/g, ''));
- return Number.isFinite(n) ? Math.max(0, Math.round(n)) : 0;
- };
- const fansCount = toNum(sumRaw);
- const fansIncrease = toNum(newRaw);
- list.push({ recordDate: d, fansCount, fansIncrease });
- }
- return list;
- }
- }
|