|
|
@@ -0,0 +1,655 @@
|
|
|
+import fs from 'node:fs/promises';
|
|
|
+import path from 'node:path';
|
|
|
+import { chromium, type Browser } from 'playwright';
|
|
|
+import * as XLSXNS from 'xlsx';
|
|
|
+import { AppDataSource, PlatformAccount } from '../models/index.js';
|
|
|
+import { BrowserManager } from '../automation/browser.js';
|
|
|
+import { logger } from '../utils/logger.js';
|
|
|
+import { UserDayStatisticsService } from './UserDayStatisticsService.js';
|
|
|
+import type { ProxyConfig } from '@media-manager/shared';
|
|
|
+import { WS_EVENTS } from '@media-manager/shared';
|
|
|
+import { wsManager } from '../websocket/index.js';
|
|
|
+
|
|
|
+// xlsx 在 ESM 下可能挂在 default 上;这里做一次兼容兜底
|
|
|
+// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
|
+const XLSX: any = (XLSXNS as any).default ?? (XLSXNS as any);
|
|
|
+
|
|
|
+type PlaywrightCookie = {
|
|
|
+ name: string;
|
|
|
+ value: string;
|
|
|
+ domain?: string;
|
|
|
+ path?: string;
|
|
|
+ url?: string;
|
|
|
+ expires?: number;
|
|
|
+ httpOnly?: boolean;
|
|
|
+ secure?: boolean;
|
|
|
+ sameSite?: 'Lax' | 'None' | 'Strict';
|
|
|
+};
|
|
|
+
|
|
|
+function ensureDir(p: string) {
|
|
|
+ return fs.mkdir(p, { recursive: true });
|
|
|
+}
|
|
|
+
|
|
|
+function normalizeDateText(input: unknown): Date | null {
|
|
|
+ if (!input) return null;
|
|
|
+ const s = String(input).trim();
|
|
|
+ if (!s) return null;
|
|
|
+
|
|
|
+ // 20260115 / 2026-01-15 / 2026/01/15
|
|
|
+ const mCompact = s.match(/^(\d{4})(\d{2})(\d{2})$/);
|
|
|
+ if (mCompact) {
|
|
|
+ const yyyy = Number(mCompact[1]);
|
|
|
+ const mm = Number(mCompact[2]);
|
|
|
+ const dd = Number(mCompact[3]);
|
|
|
+ if (!yyyy || !mm || !dd) return null;
|
|
|
+ const d = new Date(yyyy, mm - 1, dd);
|
|
|
+ d.setHours(0, 0, 0, 0);
|
|
|
+ return d;
|
|
|
+ }
|
|
|
+
|
|
|
+ const m1 = s.match(/(\d{4})\D(\d{1,2})\D(\d{1,2})/);
|
|
|
+ if (m1) {
|
|
|
+ const yyyy = Number(m1[1]);
|
|
|
+ const mm = Number(m1[2]);
|
|
|
+ const dd = Number(m1[3]);
|
|
|
+ if (!yyyy || !mm || !dd) return null;
|
|
|
+ const d = new Date(yyyy, mm - 1, dd);
|
|
|
+ d.setHours(0, 0, 0, 0);
|
|
|
+ return d;
|
|
|
+ }
|
|
|
+
|
|
|
+ return null;
|
|
|
+}
|
|
|
+
|
|
|
+function parseChineseNumberLike(input: unknown): number | null {
|
|
|
+ if (input === null || input === undefined) return null;
|
|
|
+ const s = String(input).trim();
|
|
|
+ if (!s) return null;
|
|
|
+ const plain = s.replace(/,/g, '');
|
|
|
+ const wan = plain.match(/^(\d+(\.\d+)?)\s*万$/);
|
|
|
+ if (wan) return Math.round(Number(wan[1]) * 10000);
|
|
|
+ const yi = plain.match(/^(\d+(\.\d+)?)\s*亿$/);
|
|
|
+ if (yi) return Math.round(Number(yi[1]) * 100000000);
|
|
|
+ const n = Number(plain.replace(/[^\d.-]/g, ''));
|
|
|
+ if (Number.isFinite(n)) return Math.round(n);
|
|
|
+ return null;
|
|
|
+}
|
|
|
+
|
|
|
+function parseCookiesFromAccount(cookieData: string | null): PlaywrightCookie[] {
|
|
|
+ if (!cookieData) return [];
|
|
|
+ const raw = cookieData.trim();
|
|
|
+ if (!raw) return [];
|
|
|
+
|
|
|
+ // 1) JSON array
|
|
|
+ if (raw.startsWith('[') || raw.startsWith('{')) {
|
|
|
+ try {
|
|
|
+ const parsed = JSON.parse(raw);
|
|
|
+ const arr = Array.isArray(parsed) ? parsed : (parsed?.cookies ? parsed.cookies : []);
|
|
|
+ if (!Array.isArray(arr)) return [];
|
|
|
+ return arr
|
|
|
+ .map((c: any) => {
|
|
|
+ const name = String(c?.name ?? '').trim();
|
|
|
+ const value = String(c?.value ?? '').trim();
|
|
|
+ if (!name) return null;
|
|
|
+ const domain = c?.domain ? String(c.domain) : undefined;
|
|
|
+ const pathVal = c?.path ? String(c.path) : '/';
|
|
|
+ const url = !domain ? 'https://baijiahao.baidu.com' : undefined;
|
|
|
+ const sameSiteRaw = c?.sameSite;
|
|
|
+ const sameSite =
|
|
|
+ sameSiteRaw === 'Lax' || sameSiteRaw === 'None' || sameSiteRaw === 'Strict'
|
|
|
+ ? sameSiteRaw
|
|
|
+ : undefined;
|
|
|
+
|
|
|
+ return {
|
|
|
+ name,
|
|
|
+ value,
|
|
|
+ domain,
|
|
|
+ path: pathVal,
|
|
|
+ url,
|
|
|
+ expires: typeof c?.expires === 'number' ? c.expires : undefined,
|
|
|
+ httpOnly: typeof c?.httpOnly === 'boolean' ? c.httpOnly : undefined,
|
|
|
+ secure: typeof c?.secure === 'boolean' ? c.secure : undefined,
|
|
|
+ sameSite,
|
|
|
+ } satisfies PlaywrightCookie;
|
|
|
+ })
|
|
|
+ .filter(Boolean) as PlaywrightCookie[];
|
|
|
+ } catch {
|
|
|
+ // fallthrough
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 2) "a=b; c=d"
|
|
|
+ const pairs = raw.split(';').map((p) => p.trim()).filter(Boolean);
|
|
|
+ const cookies: PlaywrightCookie[] = [];
|
|
|
+ for (const p of pairs) {
|
|
|
+ const idx = p.indexOf('=');
|
|
|
+ if (idx <= 0) continue;
|
|
|
+ const name = p.slice(0, idx).trim();
|
|
|
+ const value = p.slice(idx + 1).trim();
|
|
|
+ if (!name) continue;
|
|
|
+ cookies.push({ name, value, url: 'https://baijiahao.baidu.com' });
|
|
|
+ }
|
|
|
+ return cookies;
|
|
|
+}
|
|
|
+
|
|
|
+async function createBrowserForAccount(proxy: ProxyConfig | null): Promise<{ browser: Browser; shouldClose: boolean }> {
|
|
|
+ // 静默同步:默认一律 headless,不弹窗
|
|
|
+ // 只有在“引导登录/验证”时(BJ_STORAGE_STATE_BOOTSTRAP=1 且 BJ_IMPORT_HEADLESS=0)才允许 headful
|
|
|
+ const allowHeadfulForBootstrap =
|
|
|
+ process.env.BJ_STORAGE_STATE_BOOTSTRAP === '1' && process.env.BJ_IMPORT_HEADLESS === '0';
|
|
|
+ const headless = !allowHeadfulForBootstrap;
|
|
|
+ if (proxy?.enabled) {
|
|
|
+ const server = `${proxy.type}://${proxy.host}:${proxy.port}`;
|
|
|
+ const browser = await chromium.launch({
|
|
|
+ headless,
|
|
|
+ proxy: {
|
|
|
+ server,
|
|
|
+ username: proxy.username,
|
|
|
+ password: proxy.password,
|
|
|
+ },
|
|
|
+ args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-gpu', '--window-size=1920,1080'],
|
|
|
+ });
|
|
|
+ return { browser, shouldClose: true };
|
|
|
+ }
|
|
|
+ const browser = await BrowserManager.getBrowser({ headless });
|
|
|
+ return { browser, shouldClose: false };
|
|
|
+}
|
|
|
+
|
|
|
+function parseBaijiahaoExcel(
|
|
|
+ filePath: string
|
|
|
+): Map<string, { recordDate: Date } & Record<string, any>> {
|
|
|
+ const wb = XLSX.readFile(filePath);
|
|
|
+ const result = new Map<string, { recordDate: Date } & Record<string, any>>();
|
|
|
+
|
|
|
+ logger.info(
|
|
|
+ `[BJ Import] Excel loaded. file=${path.basename(filePath)} sheets=${wb.SheetNames.join(' | ')}`
|
|
|
+ );
|
|
|
+
|
|
|
+ for (const sheetName of wb.SheetNames) {
|
|
|
+ const sheet = wb.Sheets[sheetName];
|
|
|
+ const rows = XLSX.utils.sheet_to_json<Record<string, any>>(sheet, { defval: '' });
|
|
|
+
|
|
|
+ if (!rows.length) {
|
|
|
+ logger.warn(`[BJ Import] Sheet empty. name=${sheetName}`);
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
+ const keys = Object.keys(rows[0] || {});
|
|
|
+ logger.info(
|
|
|
+ `[BJ Import] Sheet parsed. name=${sheetName} rows=${rows.length} keys=${keys.join(',')}`
|
|
|
+ );
|
|
|
+
|
|
|
+ // 百家号 Excel 为 GBK 编码,列名在 node 环境下会变成乱码(但列顺序稳定),所以这里按“列位置”做映射:
|
|
|
+ // 0: 日期(形如 20260115)
|
|
|
+ // 1: 阅读量
|
|
|
+ // 2: 点击率
|
|
|
+ // 3: 互动率
|
|
|
+ // 4: 评论量
|
|
|
+ // 5: 评论率(%)
|
|
|
+ // 6: 点赞量
|
|
|
+ // 7: 点赞率(%)
|
|
|
+ // 8: 收藏量
|
|
|
+ // 9: 收藏率(%)
|
|
|
+ // 10: 分享量
|
|
|
+ // 11: 分享率(%)
|
|
|
+ // 12: 作品涨粉量
|
|
|
+ // 13: 作品涨粉率
|
|
|
+ // 14: 作品脱粉量
|
|
|
+ // ... 其余列暂不入库
|
|
|
+
|
|
|
+ for (const row of rows) {
|
|
|
+ const cols = Object.keys(row || {});
|
|
|
+ if (!cols.length) continue;
|
|
|
+
|
|
|
+ const dateVal = (row as any)[cols[0]];
|
|
|
+ const d = normalizeDateText(dateVal);
|
|
|
+ if (!d) continue;
|
|
|
+
|
|
|
+ const key = `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, '0')}-${String(
|
|
|
+ d.getDate()
|
|
|
+ ).padStart(2, '0')}`;
|
|
|
+ if (!result.has(key)) result.set(key, { recordDate: d });
|
|
|
+ const obj = result.get(key)!;
|
|
|
+
|
|
|
+ const safeGet = (idx: number): any =>
|
|
|
+ idx >= 0 && idx < cols.length ? (row as any)[cols[idx]] : undefined;
|
|
|
+
|
|
|
+ // 阅读量 → playCount
|
|
|
+ const readCount = parseChineseNumberLike(safeGet(1));
|
|
|
+ if (typeof readCount === 'number') (obj as any).playCount = readCount;
|
|
|
+
|
|
|
+ // 点赞量 → likeCount
|
|
|
+ const likeCount = parseChineseNumberLike(safeGet(6));
|
|
|
+ if (typeof likeCount === 'number') (obj as any).likeCount = likeCount;
|
|
|
+
|
|
|
+ // 评论量 → commentCount
|
|
|
+ const commentCount = parseChineseNumberLike(safeGet(4));
|
|
|
+ if (typeof commentCount === 'number') (obj as any).commentCount = commentCount;
|
|
|
+
|
|
|
+ // 收藏量 → collectCount
|
|
|
+ const collectCount = parseChineseNumberLike(safeGet(8));
|
|
|
+ if (typeof collectCount === 'number') (obj as any).collectCount = collectCount;
|
|
|
+
|
|
|
+ // 分享量 → shareCount
|
|
|
+ const shareCount = parseChineseNumberLike(safeGet(10));
|
|
|
+ if (typeof shareCount === 'number') (obj as any).shareCount = shareCount;
|
|
|
+
|
|
|
+ // 点击率 → cover_click_rate(通常是百分比字符串,原样入库)
|
|
|
+ const clickRateRaw = safeGet(2);
|
|
|
+ if (clickRateRaw !== undefined && clickRateRaw !== null) {
|
|
|
+ const s = String(clickRateRaw).trim();
|
|
|
+ if (s) (obj as any).coverClickRate = s;
|
|
|
+ }
|
|
|
+
|
|
|
+ // fans_increase 只看作品涨粉量(不再扣除作品脱粉量)
|
|
|
+ const inc = parseChineseNumberLike(safeGet(12));
|
|
|
+ if (typeof inc === 'number') {
|
|
|
+ (obj as any).fansIncrease = inc;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return result;
|
|
|
+}
|
|
|
+
|
|
|
+function formatPercentString(input: unknown): string | null {
|
|
|
+ if (input === null || input === undefined) return null;
|
|
|
+ const s = String(input).trim();
|
|
|
+ if (!s) return null;
|
|
|
+ if (s.includes('%')) return s;
|
|
|
+ const n = Number(s);
|
|
|
+ if (!Number.isFinite(n)) return null;
|
|
|
+ // 0.0423 -> 4.23%
|
|
|
+ if (n >= 0 && n <= 1) return `${(n * 100).toFixed(2)}%`;
|
|
|
+ // 4.23 -> 4.23%
|
|
|
+ return `${n.toFixed(2)}%`;
|
|
|
+}
|
|
|
+
|
|
|
+function findArrayWithDateLikeField(root: any): { arr: any[]; dateKey: string } | null {
|
|
|
+ const seen = new Set<any>();
|
|
|
+ const queue: any[] = [root];
|
|
|
+ const isDateLike = (v: any) => {
|
|
|
+ if (v === null || v === undefined) return false;
|
|
|
+ if (typeof v === 'number') return String(v).match(/^\d{8}$/);
|
|
|
+ const s = String(v).trim();
|
|
|
+ return /^\d{8}$/.test(s) || /^\d{4}[-/]\d{1,2}[-/]\d{1,2}$/.test(s);
|
|
|
+ };
|
|
|
+ const dateKeyCandidates = ['day', 'date', 'stat_day', 'statDay', 'dt', 'time', 'the_day'];
|
|
|
+ const candidates: Array<{ arr: any[]; dateKey: string }> = [];
|
|
|
+
|
|
|
+ while (queue.length) {
|
|
|
+ const cur = queue.shift();
|
|
|
+ if (!cur || typeof cur !== 'object') continue;
|
|
|
+ if (seen.has(cur)) continue;
|
|
|
+ seen.add(cur);
|
|
|
+
|
|
|
+ if (Array.isArray(cur)) {
|
|
|
+ // 数组元素为对象且含日期字段
|
|
|
+ for (const item of cur) {
|
|
|
+ if (!item || typeof item !== 'object') continue;
|
|
|
+ const keys = Object.keys(item);
|
|
|
+ for (const dk of dateKeyCandidates) {
|
|
|
+ if (keys.includes(dk) && isDateLike((item as any)[dk])) {
|
|
|
+ candidates.push({ arr: cur, dateKey: dk });
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ // 兜底:任意字段像日期
|
|
|
+ for (const k of keys) {
|
|
|
+ if (isDateLike((item as any)[k])) {
|
|
|
+ candidates.push({ arr: cur, dateKey: k });
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ for (const v of Object.values(cur)) {
|
|
|
+ if (v && typeof v === 'object') queue.push(v);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (!candidates.length) return null;
|
|
|
+ candidates.sort((a, b) => (b.arr?.length ?? 0) - (a.arr?.length ?? 0));
|
|
|
+ return candidates[0]!;
|
|
|
+}
|
|
|
+
|
|
|
+function parseBaijiahaoAppStatisticV3(json: any): Map<string, { recordDate: Date } & Record<string, any>> {
|
|
|
+ const result = new Map<string, { recordDate: Date } & Record<string, any>>();
|
|
|
+ const found = findArrayWithDateLikeField(json);
|
|
|
+ if (!found) return result;
|
|
|
+ const { arr, dateKey } = found;
|
|
|
+
|
|
|
+ const pickNumber = (obj: any, keys: string[]): number | null => {
|
|
|
+ for (const k of keys) {
|
|
|
+ if (obj?.[k] === undefined || obj?.[k] === null) continue;
|
|
|
+ const n = parseChineseNumberLike(obj[k]);
|
|
|
+ if (typeof n === 'number') return n;
|
|
|
+ }
|
|
|
+ return null;
|
|
|
+ };
|
|
|
+
|
|
|
+ const pickString = (obj: any, keys: string[]): string | null => {
|
|
|
+ for (const k of keys) {
|
|
|
+ if (obj?.[k] === undefined || obj?.[k] === null) continue;
|
|
|
+ const s = String(obj[k]).trim();
|
|
|
+ if (s) return s;
|
|
|
+ }
|
|
|
+ return null;
|
|
|
+ };
|
|
|
+
|
|
|
+ for (const item of arr) {
|
|
|
+ if (!item || typeof item !== 'object') continue;
|
|
|
+ const d = normalizeDateText(item[dateKey]);
|
|
|
+ if (!d) continue;
|
|
|
+ const key = `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, '0')}-${String(d.getDate()).padStart(2, '0')}`;
|
|
|
+ if (!result.has(key)) result.set(key, { recordDate: d });
|
|
|
+ const obj = result.get(key)!;
|
|
|
+
|
|
|
+ // 阅读量 → playCount
|
|
|
+ const play = pickNumber(item, ['read_cnt', 'readCount', 'read', 'pv', 'view_cnt', 'viewCount', 'views']);
|
|
|
+ if (typeof play === 'number') (obj as any).playCount = play;
|
|
|
+
|
|
|
+ // 点赞量 → likeCount
|
|
|
+ const like = pickNumber(item, ['like_cnt', 'praise_cnt', 'praise', 'likeCount', 'likes']);
|
|
|
+ if (typeof like === 'number') (obj as any).likeCount = like;
|
|
|
+
|
|
|
+ // 评论量 → commentCount
|
|
|
+ const comment = pickNumber(item, ['comment_cnt', 'commentCount', 'comments']);
|
|
|
+ if (typeof comment === 'number') (obj as any).commentCount = comment;
|
|
|
+
|
|
|
+ // 收藏量 → collectCount
|
|
|
+ const collect = pickNumber(item, ['collect_cnt', 'favorite_cnt', 'fav_cnt', 'collectCount', 'favorites']);
|
|
|
+ if (typeof collect === 'number') (obj as any).collectCount = collect;
|
|
|
+
|
|
|
+ // 分享量 → shareCount
|
|
|
+ const share = pickNumber(item, ['share_cnt', 'shareCount', 'shares']);
|
|
|
+ if (typeof share === 'number') (obj as any).shareCount = share;
|
|
|
+
|
|
|
+ // 点击率 → coverClickRate
|
|
|
+ const clickRateRaw =
|
|
|
+ pickString(item, ['click_rate', 'ctr', 'clickRate']) ??
|
|
|
+ (typeof pickNumber(item, ['click_rate', 'ctr', 'clickRate']) === 'number'
|
|
|
+ ? String(pickNumber(item, ['click_rate', 'ctr', 'clickRate']))
|
|
|
+ : null);
|
|
|
+ const clickRate = formatPercentString(clickRateRaw);
|
|
|
+ if (clickRate) (obj as any).coverClickRate = clickRate;
|
|
|
+
|
|
|
+ // 作品涨粉量 → fansIncrease(只取涨粉)
|
|
|
+ const fansInc = pickNumber(item, ['works_fans_inc', 'worksFansInc', 'content_fans_inc', 'fans_inc', 'fansIncrease']);
|
|
|
+ if (typeof fansInc === 'number') (obj as any).fansIncrease = fansInc;
|
|
|
+ }
|
|
|
+
|
|
|
+ return result;
|
|
|
+}
|
|
|
+
|
|
|
+export class BaijiahaoContentOverviewImportService {
|
|
|
+ private accountRepository = AppDataSource.getRepository(PlatformAccount);
|
|
|
+ private userDayStatisticsService = new UserDayStatisticsService();
|
|
|
+
|
|
|
+ private downloadDir = path.resolve(process.cwd(), 'tmp', 'baijiahao-content-overview');
|
|
|
+ private stateDir = path.resolve(process.cwd(), 'tmp', 'baijiahao-storage-state');
|
|
|
+
|
|
|
+ private getStatePath(accountId: number) {
|
|
|
+ return path.join(this.stateDir, `${accountId}.json`);
|
|
|
+ }
|
|
|
+
|
|
|
+ private async ensureStorageState(
|
|
|
+ account: PlatformAccount,
|
|
|
+ cookies: PlaywrightCookie[]
|
|
|
+ ): Promise<string | null> {
|
|
|
+ const statePath = this.getStatePath(account.id);
|
|
|
+ try {
|
|
|
+ await fs.access(statePath);
|
|
|
+ return statePath;
|
|
|
+ } catch {
|
|
|
+ // no state
|
|
|
+ }
|
|
|
+
|
|
|
+ // 需要你在弹出的浏览器里完成一次登录/验证,然后脚本会自动保存 storageState
|
|
|
+ // 启用方式:BJ_IMPORT_HEADLESS=0 且 BJ_STORAGE_STATE_BOOTSTRAP=1
|
|
|
+ if (!(process.env.BJ_IMPORT_HEADLESS === '0' && process.env.BJ_STORAGE_STATE_BOOTSTRAP === '1')) {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+
|
|
|
+ await ensureDir(this.stateDir);
|
|
|
+ logger.warn(
|
|
|
+ `[BJ Import] No storageState for accountId=${account.id}. Bootstrapping... 请在弹出的浏览器中完成登录/验证。`
|
|
|
+ );
|
|
|
+
|
|
|
+ const { browser, shouldClose } = await createBrowserForAccount(account.proxyConfig);
|
|
|
+ try {
|
|
|
+ const context = await browser.newContext({
|
|
|
+ viewport: { width: 1920, height: 1080 },
|
|
|
+ locale: 'zh-CN',
|
|
|
+ timezoneId: 'Asia/Shanghai',
|
|
|
+ });
|
|
|
+ await context.addCookies(cookies as any);
|
|
|
+ const page = await context.newPage();
|
|
|
+ await page.goto('https://baijiahao.baidu.com/builder/rc/analysiscontent', {
|
|
|
+ waitUntil: 'domcontentloaded',
|
|
|
+ });
|
|
|
+
|
|
|
+ // 最长等 5 分钟:让你手动完成登录/短信等
|
|
|
+ await page
|
|
|
+ .waitForFunction(() => {
|
|
|
+ const t = document.body?.innerText || '';
|
|
|
+ return t.includes('数据中心') || t.includes('内容分析') || t.includes('基础数据');
|
|
|
+ }, { timeout: 5 * 60_000 })
|
|
|
+ .catch(() => undefined);
|
|
|
+
|
|
|
+ await context.storageState({ path: statePath });
|
|
|
+ logger.info(`[BJ Import] storageState saved: ${statePath}`);
|
|
|
+ await context.close();
|
|
|
+ return statePath;
|
|
|
+ } finally {
|
|
|
+ if (shouldClose) await browser.close().catch(() => undefined);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 为所有百家号账号导出“数据中心-内容分析-基础数据-近30天”并导入 user_day_statistics
|
|
|
+ */
|
|
|
+ async runDailyImportForAllBaijiahaoAccounts(): Promise<void> {
|
|
|
+ await ensureDir(this.downloadDir);
|
|
|
+
|
|
|
+ const accounts = await this.accountRepository.find({
|
|
|
+ where: { platform: 'baijiahao' as any },
|
|
|
+ });
|
|
|
+
|
|
|
+ logger.info(`[BJ Import] Start. total_accounts=${accounts.length}`);
|
|
|
+
|
|
|
+ for (const account of accounts) {
|
|
|
+ try {
|
|
|
+ await this.importAccountLast30Days(account);
|
|
|
+ } catch (e) {
|
|
|
+ logger.error(
|
|
|
+ `[BJ Import] Account failed. accountId=${account.id} name=${account.accountName || ''}`,
|
|
|
+ e
|
|
|
+ );
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ logger.info('[BJ Import] Done.');
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 单账号:导出 Excel → 解析 → 入库 → 删除文件
|
|
|
+ */
|
|
|
+ async importAccountLast30Days(account: PlatformAccount): Promise<void> {
|
|
|
+ const cookies = parseCookiesFromAccount(account.cookieData);
|
|
|
+ if (!cookies.length) {
|
|
|
+ throw new Error('cookieData 为空或无法解析');
|
|
|
+ }
|
|
|
+
|
|
|
+ const { browser, shouldClose } = await createBrowserForAccount(account.proxyConfig);
|
|
|
+ try {
|
|
|
+ const statePath = await this.ensureStorageState(account, cookies);
|
|
|
+ const context = await browser.newContext({
|
|
|
+ acceptDownloads: true,
|
|
|
+ viewport: { width: 1920, height: 1080 },
|
|
|
+ locale: 'zh-CN',
|
|
|
+ timezoneId: 'Asia/Shanghai',
|
|
|
+ userAgent:
|
|
|
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
|
+ ...(statePath ? { storageState: statePath } : {}),
|
|
|
+ });
|
|
|
+ context.setDefaultTimeout(60_000);
|
|
|
+ if (!statePath) {
|
|
|
+ await context.addCookies(cookies as any);
|
|
|
+ }
|
|
|
+
|
|
|
+ const page = await context.newPage();
|
|
|
+ await page.goto('https://baijiahao.baidu.com/builder/rc/analysiscontent', {
|
|
|
+ waitUntil: 'domcontentloaded',
|
|
|
+ });
|
|
|
+ await page.waitForTimeout(1500);
|
|
|
+
|
|
|
+ if (page.url().includes('passport') || page.url().includes('login')) {
|
|
|
+ throw new Error('未登录/需要重新登录(跳转到登录页)');
|
|
|
+ }
|
|
|
+
|
|
|
+ const bodyText = (await page.textContent('body').catch(() => '')) || '';
|
|
|
+ if (bodyText.includes('暂无数据') || bodyText.includes('无权访问')) {
|
|
|
+ await this.accountRepository.update(account.id, { status: 'expired' as any });
|
|
|
+ wsManager.sendToUser(account.userId, WS_EVENTS.ACCOUNT_UPDATED, {
|
|
|
+ account: { id: account.id, status: 'expired', platform: 'baijiahao' },
|
|
|
+ });
|
|
|
+ wsManager.sendToUser(account.userId, WS_EVENTS.SYSTEM_MESSAGE, {
|
|
|
+ level: 'warning',
|
|
|
+ message: `百家号账号「${account.accountName || account.accountId || account.id}」暂无数据看板访问权限,请到百家号后台检查数据权限。`,
|
|
|
+ platform: 'baijiahao',
|
|
|
+ accountId: account.id,
|
|
|
+ });
|
|
|
+ throw new Error('百家号数据看板暂无访问权限/暂无数据,已标记 expired 并通知用户');
|
|
|
+ }
|
|
|
+
|
|
|
+ // 统一入口:数据中心 -> 内容分析 -> 基础数据
|
|
|
+ await page.getByText('数据中心', { exact: false }).first().click().catch(() => undefined);
|
|
|
+ await page.getByText('内容分析', { exact: false }).first().click().catch(() => undefined);
|
|
|
+ await page.getByText('基础数据', { exact: false }).first().click().catch(() => undefined);
|
|
|
+
|
|
|
+ // 切换“近30天”(容错:有些账号默认就是近30天,或文案略有差异)
|
|
|
+ try {
|
|
|
+ const trigger = page.getByText(/近\d+天?/, { exact: false }).first();
|
|
|
+ const hasTrigger = await trigger.count();
|
|
|
+ if (hasTrigger > 0) {
|
|
|
+ await trigger.click().catch(() => undefined);
|
|
|
+ }
|
|
|
+
|
|
|
+ const thirtyDay =
|
|
|
+ (await page.getByText('近30天', { exact: true }).first().count()) > 0
|
|
|
+ ? page.getByText('近30天', { exact: true }).first()
|
|
|
+ : page.getByText('近30日', { exact: false }).first();
|
|
|
+
|
|
|
+ await thirtyDay.click().catch(() => undefined);
|
|
|
+
|
|
|
+ // 等页面后端刷新统计数据和日期范围(百家号这里比较慢)
|
|
|
+ await page.waitForTimeout(5000);
|
|
|
+ } catch (e) {
|
|
|
+ logger.warn(
|
|
|
+ `[BJ Import] Unable to explicitly switch to last 30 days, continue with default range. accountId=${account.id}`,
|
|
|
+ e
|
|
|
+ );
|
|
|
+ }
|
|
|
+
|
|
|
+ // 优先抓取接口:/author/eco/statistics/appStatisticV3?type=all&start_day=YYYYMMDD&end_day=YYYYMMDD&stat=0&special_filter_days=30
|
|
|
+ // 这样可以拿到完整 30 天数据(避免导出 Excel 只有 7 天 / GBK 乱码)
|
|
|
+ const end = new Date();
|
|
|
+ end.setHours(0, 0, 0, 0);
|
|
|
+ end.setDate(end.getDate() - 1); // 默认取昨天
|
|
|
+ const start = new Date(end);
|
|
|
+ start.setDate(start.getDate() - 29);
|
|
|
+ const fmt = (d: Date) =>
|
|
|
+ `${d.getFullYear()}${String(d.getMonth() + 1).padStart(2, '0')}${String(d.getDate()).padStart(2, '0')}`;
|
|
|
+ const start_day = fmt(start);
|
|
|
+ const end_day = fmt(end);
|
|
|
+
|
|
|
+ let perDay = new Map<string, { recordDate: Date } & Record<string, any>>();
|
|
|
+ let inserted = 0;
|
|
|
+ let updated = 0;
|
|
|
+
|
|
|
+ const tryFetchApi = async () => {
|
|
|
+ const apiUrl = `https://baijiahao.baidu.com/author/eco/statistics/appStatisticV3?type=all&start_day=${start_day}&end_day=${end_day}&stat=0&special_filter_days=30`;
|
|
|
+ // 使用 browser context 的 request(带 cookie)
|
|
|
+ const res = await (context as any).request.get(apiUrl, {
|
|
|
+ headers: {
|
|
|
+ Referer: 'https://baijiahao.baidu.com/builder/rc/analysiscontent',
|
|
|
+ },
|
|
|
+ });
|
|
|
+ if (!res.ok()) {
|
|
|
+ throw new Error(`appStatisticV3 http ${res.status()}`);
|
|
|
+ }
|
|
|
+ const json = await res.json().catch(() => null);
|
|
|
+ if (!json) throw new Error('appStatisticV3 json parse failed');
|
|
|
+ const map = parseBaijiahaoAppStatisticV3(json);
|
|
|
+ logger.info(`[BJ Import] appStatisticV3 fetched. accountId=${account.id} days=${map.size} range=${start_day}-${end_day}`);
|
|
|
+ return map;
|
|
|
+ };
|
|
|
+
|
|
|
+ try {
|
|
|
+ perDay = await tryFetchApi();
|
|
|
+ } catch (e) {
|
|
|
+ logger.warn(`[BJ Import] appStatisticV3 fetch failed, fallback to Excel export. accountId=${account.id}`, e);
|
|
|
+ }
|
|
|
+
|
|
|
+ // 兜底:如果接口抓不到,则退回导出 Excel;如果接口抓到但天数偏少,则“合并 Excel 补齐空缺”
|
|
|
+ let filePath: string | null = null;
|
|
|
+ if (perDay.size === 0) {
|
|
|
+ const [download] = await Promise.all([
|
|
|
+ page.waitForEvent('download', { timeout: 60_000 }),
|
|
|
+ page.getByText('导出数据', { exact: true }).first().click(),
|
|
|
+ ]);
|
|
|
+
|
|
|
+ const filename = `${account.id}_${Date.now()}_${download.suggestedFilename()}`;
|
|
|
+ filePath = path.join(this.downloadDir, filename);
|
|
|
+ await download.saveAs(filePath);
|
|
|
+ perDay = parseBaijiahaoExcel(filePath);
|
|
|
+ } else if (perDay.size < 20) {
|
|
|
+ const [download] = await Promise.all([
|
|
|
+ page.waitForEvent('download', { timeout: 60_000 }),
|
|
|
+ page.getByText('导出数据', { exact: true }).first().click(),
|
|
|
+ ]);
|
|
|
+
|
|
|
+ const filename = `${account.id}_${Date.now()}_${download.suggestedFilename()}`;
|
|
|
+ filePath = path.join(this.downloadDir, filename);
|
|
|
+ await download.saveAs(filePath);
|
|
|
+ const excelMap = parseBaijiahaoExcel(filePath);
|
|
|
+ for (const [k, v] of excelMap.entries()) {
|
|
|
+ if (!perDay.has(k)) perDay.set(k, v);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ try {
|
|
|
+ for (const v of perDay.values()) {
|
|
|
+ const { recordDate, ...patch } = v;
|
|
|
+ const r = await this.userDayStatisticsService.saveStatisticsForDate(
|
|
|
+ account.id,
|
|
|
+ recordDate,
|
|
|
+ patch
|
|
|
+ );
|
|
|
+ inserted += r.inserted;
|
|
|
+ updated += r.updated;
|
|
|
+ }
|
|
|
+
|
|
|
+ logger.info(
|
|
|
+ `[BJ Import] basic-data imported. accountId=${account.id} days=${perDay.size} inserted=${inserted} updated=${updated}`
|
|
|
+ );
|
|
|
+ } finally {
|
|
|
+ if (filePath) {
|
|
|
+ if (process.env.KEEP_BJ_XLSX === 'true') {
|
|
|
+ logger.warn(`[BJ Import] KEEP_BJ_XLSX=true, keep file: ${filePath}`);
|
|
|
+ } else {
|
|
|
+ await fs.unlink(filePath).catch(() => undefined);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ await context.close();
|
|
|
+ } finally {
|
|
|
+ if (shouldClose) {
|
|
|
+ await browser.close().catch(() => undefined);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|