|
|
@@ -0,0 +1,415 @@
|
|
|
+import fs from 'node:fs/promises';
|
|
|
+import path from 'node:path';
|
|
|
+import { chromium, type Browser } from 'playwright';
|
|
|
+import * as XLSXNS from 'xlsx';
|
|
|
+import { AppDataSource, PlatformAccount } from '../models/index.js';
|
|
|
+import { BrowserManager } from '../automation/browser.js';
|
|
|
+import { logger } from '../utils/logger.js';
|
|
|
+import { UserDayStatisticsService } from './UserDayStatisticsService.js';
|
|
|
+import type { ProxyConfig } from '@media-manager/shared';
|
|
|
+import { WS_EVENTS } from '@media-manager/shared';
|
|
|
+import { wsManager } from '../websocket/index.js';
|
|
|
+
|
|
|
+// xlsx 在 ESM 下可能挂在 default 上;这里做一次兼容兜底
|
|
|
+// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
|
+const XLSX: any = (XLSXNS as any).default ?? (XLSXNS as any);
|
|
|
+
|
|
|
+type PlaywrightCookie = {
|
|
|
+ name: string;
|
|
|
+ value: string;
|
|
|
+ domain?: string;
|
|
|
+ path?: string;
|
|
|
+ url?: string;
|
|
|
+ expires?: number;
|
|
|
+ httpOnly?: boolean;
|
|
|
+ secure?: boolean;
|
|
|
+ sameSite?: 'Lax' | 'None' | 'Strict';
|
|
|
+};
|
|
|
+
|
|
|
+type MetricKind =
|
|
|
+ | 'playCount'
|
|
|
+ | 'coverClickRate'
|
|
|
+ | 'avgWatchDuration'
|
|
|
+ | 'totalWatchDuration'
|
|
|
+ | 'completionRate';
|
|
|
+
|
|
|
+function ensureDir(p: string) {
|
|
|
+ return fs.mkdir(p, { recursive: true });
|
|
|
+}
|
|
|
+
|
|
|
+function normalizeDateText(input: unknown): Date | null {
|
|
|
+ if (!input) return null;
|
|
|
+ if (input instanceof Date && !Number.isNaN(input.getTime())) {
|
|
|
+ const d = new Date(input);
|
|
|
+ d.setHours(0, 0, 0, 0);
|
|
|
+ return d;
|
|
|
+ }
|
|
|
+ const s = String(input).trim();
|
|
|
+ // 2026年01月27日
|
|
|
+ const m1 = s.match(/(\d{4})\D(\d{1,2})\D(\d{1,2})\D?/);
|
|
|
+ if (m1) {
|
|
|
+ const yyyy = Number(m1[1]);
|
|
|
+ const mm = Number(m1[2]);
|
|
|
+ const dd = Number(m1[3]);
|
|
|
+ if (!yyyy || !mm || !dd) return null;
|
|
|
+ const d = new Date(yyyy, mm - 1, dd);
|
|
|
+ d.setHours(0, 0, 0, 0);
|
|
|
+ return d;
|
|
|
+ }
|
|
|
+ // 01-27(兜底:用当前年份)
|
|
|
+ const m2 = s.match(/^(\d{1,2})[-/](\d{1,2})$/);
|
|
|
+ if (m2) {
|
|
|
+ const yyyy = new Date().getFullYear();
|
|
|
+ const mm = Number(m2[1]);
|
|
|
+ const dd = Number(m2[2]);
|
|
|
+ const d = new Date(yyyy, mm - 1, dd);
|
|
|
+ d.setHours(0, 0, 0, 0);
|
|
|
+ return d;
|
|
|
+ }
|
|
|
+ return null;
|
|
|
+}
|
|
|
+
|
|
|
+function parseChineseNumberLike(input: unknown): number | null {
|
|
|
+ if (input === null || input === undefined) return null;
|
|
|
+ const s = String(input).trim();
|
|
|
+ if (!s) return null;
|
|
|
+ // 8,077
|
|
|
+ const plain = s.replace(/,/g, '');
|
|
|
+ // 4.8万
|
|
|
+ const wan = plain.match(/^(\d+(\.\d+)?)\s*万$/);
|
|
|
+ if (wan) return Math.round(Number(wan[1]) * 10000);
|
|
|
+ const yi = plain.match(/^(\d+(\.\d+)?)\s*亿$/);
|
|
|
+ if (yi) return Math.round(Number(yi[1]) * 100000000);
|
|
|
+ const n = Number(plain.replace(/[^\d.-]/g, ''));
|
|
|
+ if (Number.isFinite(n)) return Math.round(n);
|
|
|
+ return null;
|
|
|
+}
|
|
|
+
|
|
|
+function detectMetricKind(sheetName: string): MetricKind | null {
|
|
|
+ const n = sheetName.trim();
|
|
|
+ // 小红书导出的子表命名可能是「观看趋势」或「观看数趋势」
|
|
|
+ if (n.includes('观看趋势') || n.includes('观看数')) return 'playCount';
|
|
|
+ if (n.includes('封面点击率')) return 'coverClickRate';
|
|
|
+ if (n.includes('平均观看时长')) return 'avgWatchDuration';
|
|
|
+ if (n.includes('观看总时长')) return 'totalWatchDuration';
|
|
|
+ if (n.includes('完播率')) return 'completionRate';
|
|
|
+ return null;
|
|
|
+}
|
|
|
+
|
|
|
+function parseCookiesFromAccount(cookieData: string | null): PlaywrightCookie[] {
|
|
|
+ if (!cookieData) return [];
|
|
|
+ const raw = cookieData.trim();
|
|
|
+ if (!raw) return [];
|
|
|
+
|
|
|
+ // 1) JSON array(最常见:浏览器插件导出/前端保存)
|
|
|
+ if (raw.startsWith('[') || raw.startsWith('{')) {
|
|
|
+ try {
|
|
|
+ const parsed = JSON.parse(raw);
|
|
|
+ const arr = Array.isArray(parsed) ? parsed : (parsed?.cookies ? parsed.cookies : []);
|
|
|
+ if (!Array.isArray(arr)) return [];
|
|
|
+ return arr
|
|
|
+ .map((c: any) => {
|
|
|
+ const name = String(c?.name ?? '').trim();
|
|
|
+ const value = String(c?.value ?? '').trim();
|
|
|
+ if (!name) return null;
|
|
|
+ const domain = c?.domain ? String(c.domain) : undefined;
|
|
|
+ const pathVal = c?.path ? String(c.path) : '/';
|
|
|
+ const url = !domain ? 'https://creator.xiaohongshu.com' : undefined;
|
|
|
+ const sameSiteRaw = c?.sameSite;
|
|
|
+ const sameSite =
|
|
|
+ sameSiteRaw === 'Lax' || sameSiteRaw === 'None' || sameSiteRaw === 'Strict'
|
|
|
+ ? sameSiteRaw
|
|
|
+ : undefined;
|
|
|
+
|
|
|
+ return {
|
|
|
+ name,
|
|
|
+ value,
|
|
|
+ domain,
|
|
|
+ path: pathVal,
|
|
|
+ url,
|
|
|
+ expires: typeof c?.expires === 'number' ? c.expires : undefined,
|
|
|
+ httpOnly: typeof c?.httpOnly === 'boolean' ? c.httpOnly : undefined,
|
|
|
+ secure: typeof c?.secure === 'boolean' ? c.secure : undefined,
|
|
|
+ sameSite,
|
|
|
+ } satisfies PlaywrightCookie;
|
|
|
+ })
|
|
|
+ .filter(Boolean) as PlaywrightCookie[];
|
|
|
+ } catch {
|
|
|
+ // fallthrough
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 2) "a=b; c=d" 拼接格式
|
|
|
+ const pairs = raw.split(';').map((p) => p.trim()).filter(Boolean);
|
|
|
+ const cookies: PlaywrightCookie[] = [];
|
|
|
+ for (const p of pairs) {
|
|
|
+ const idx = p.indexOf('=');
|
|
|
+ if (idx <= 0) continue;
|
|
|
+ const name = p.slice(0, idx).trim();
|
|
|
+ const value = p.slice(idx + 1).trim();
|
|
|
+ if (!name) continue;
|
|
|
+ cookies.push({ name, value, url: 'https://creator.xiaohongshu.com' });
|
|
|
+ }
|
|
|
+ return cookies;
|
|
|
+}
|
|
|
+
|
|
|
+async function createBrowserForAccount(proxy: ProxyConfig | null): Promise<{ browser: Browser; shouldClose: boolean }> {
|
|
|
+ const forceHeadful = process.env.XHS_IMPORT_HEADLESS === '0';
|
|
|
+ if (proxy?.enabled) {
|
|
|
+ const server = `${proxy.type}://${proxy.host}:${proxy.port}`;
|
|
|
+ const browser = await chromium.launch({
|
|
|
+ headless: !forceHeadful,
|
|
|
+ proxy: {
|
|
|
+ server,
|
|
|
+ username: proxy.username,
|
|
|
+ password: proxy.password,
|
|
|
+ },
|
|
|
+ args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-gpu', '--window-size=1920,1080'],
|
|
|
+ });
|
|
|
+ return { browser, shouldClose: true };
|
|
|
+ }
|
|
|
+ const browser = await BrowserManager.getBrowser({ headless: !forceHeadful });
|
|
|
+ return { browser, shouldClose: false };
|
|
|
+}
|
|
|
+
|
|
|
+function parseXhsExcel(filePath: string): Map<string, { recordDate: Date } & Record<string, any>> {
|
|
|
+ const wb = XLSX.readFile(filePath);
|
|
|
+ const result = new Map<string, { recordDate: Date } & Record<string, any>>();
|
|
|
+
|
|
|
+ logger.info(`[XHS Import] Excel loaded. file=${path.basename(filePath)} sheets=${wb.SheetNames.join(' | ')}`);
|
|
|
+
|
|
|
+ for (const sheetName of wb.SheetNames) {
|
|
|
+ const kind = detectMetricKind(sheetName);
|
|
|
+ if (!kind) continue;
|
|
|
+ const sheet = wb.Sheets[sheetName];
|
|
|
+ const rows = XLSX.utils.sheet_to_json<Record<string, any>>(sheet, { defval: '' });
|
|
|
+
|
|
|
+ if (rows.length) {
|
|
|
+ const keys = Object.keys(rows[0] || {});
|
|
|
+ logger.info(`[XHS Import] Sheet parsed. name=${sheetName} kind=${kind} rows=${rows.length} keys=${keys.join(',')}`);
|
|
|
+ } else {
|
|
|
+ logger.warn(`[XHS Import] Sheet empty. name=${sheetName} kind=${kind}`);
|
|
|
+ }
|
|
|
+
|
|
|
+ for (const row of rows) {
|
|
|
+ const dateVal = row['日期'] ?? row['date'] ?? row['Date'] ?? row[Object.keys(row)[0] ?? ''];
|
|
|
+ const valueVal = row['数值'] ?? row['value'] ?? row['Value'] ?? row[Object.keys(row)[1] ?? ''];
|
|
|
+ const d = normalizeDateText(dateVal);
|
|
|
+ if (!d) continue;
|
|
|
+ const key = `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, '0')}-${String(d.getDate()).padStart(2, '0')}`;
|
|
|
+ if (!result.has(key)) result.set(key, { recordDate: d });
|
|
|
+ const obj = result.get(key)!;
|
|
|
+
|
|
|
+ if (kind === 'playCount') {
|
|
|
+ const n = parseChineseNumberLike(valueVal);
|
|
|
+ if (typeof n === 'number') obj.playCount = n;
|
|
|
+ } else {
|
|
|
+ const s = String(valueVal ?? '').trim();
|
|
|
+ if (kind === 'coverClickRate') obj.coverClickRate = s || '0';
|
|
|
+ if (kind === 'avgWatchDuration') obj.avgWatchDuration = s || '0';
|
|
|
+ if (kind === 'totalWatchDuration') obj.totalWatchDuration = s || '0';
|
|
|
+ if (kind === 'completionRate') obj.completionRate = s || '0';
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return result;
|
|
|
+}
|
|
|
+
|
|
|
+export class XiaohongshuAccountOverviewImportService {
|
|
|
+ private accountRepository = AppDataSource.getRepository(PlatformAccount);
|
|
|
+ private userDayStatisticsService = new UserDayStatisticsService();
|
|
|
+
|
|
|
+ private downloadDir = path.resolve(process.cwd(), 'tmp', 'xhs-account-overview');
|
|
|
+ private stateDir = path.resolve(process.cwd(), 'tmp', 'xhs-storage-state');
|
|
|
+
|
|
|
+ private getStatePath(accountId: number) {
|
|
|
+ return path.join(this.stateDir, `${accountId}.json`);
|
|
|
+ }
|
|
|
+
|
|
|
+ private async ensureStorageState(account: PlatformAccount, cookies: PlaywrightCookie[]): Promise<string | null> {
|
|
|
+ const statePath = this.getStatePath(account.id);
|
|
|
+ try {
|
|
|
+ await fs.access(statePath);
|
|
|
+ return statePath;
|
|
|
+ } catch {
|
|
|
+ // no state
|
|
|
+ }
|
|
|
+
|
|
|
+ // 需要你在弹出的浏览器里完成一次登录/验证,然后脚本会自动保存 storageState
|
|
|
+ // 启用方式:XHS_IMPORT_HEADLESS=0 且 XHS_STORAGE_STATE_BOOTSTRAP=1
|
|
|
+ if (!(process.env.XHS_IMPORT_HEADLESS === '0' && process.env.XHS_STORAGE_STATE_BOOTSTRAP === '1')) {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+
|
|
|
+ await ensureDir(this.stateDir);
|
|
|
+ logger.warn(`[XHS Import] No storageState for accountId=${account.id}. Bootstrapping... 请在弹出的浏览器中完成登录/验证。`);
|
|
|
+
|
|
|
+ const { browser, shouldClose } = await createBrowserForAccount(account.proxyConfig);
|
|
|
+ try {
|
|
|
+ const context = await browser.newContext({
|
|
|
+ viewport: { width: 1920, height: 1080 },
|
|
|
+ locale: 'zh-CN',
|
|
|
+ timezoneId: 'Asia/Shanghai',
|
|
|
+ });
|
|
|
+ await context.addCookies(cookies as any);
|
|
|
+ const page = await context.newPage();
|
|
|
+ await page.goto('https://creator.xiaohongshu.com/statistics/account/v2', { waitUntil: 'domcontentloaded' });
|
|
|
+
|
|
|
+ // 最长等 5 分钟:让你手动完成登录/滑块/短信等
|
|
|
+ await page
|
|
|
+ .waitForFunction(() => {
|
|
|
+ const t = document.body?.innerText || '';
|
|
|
+ return t.includes('账号概览') || t.includes('数据总览') || t.includes('观看数据');
|
|
|
+ }, { timeout: 5 * 60_000 })
|
|
|
+ .catch(() => undefined);
|
|
|
+
|
|
|
+ await context.storageState({ path: statePath });
|
|
|
+ logger.info(`[XHS Import] storageState saved: ${statePath}`);
|
|
|
+ await context.close();
|
|
|
+ return statePath;
|
|
|
+ } finally {
|
|
|
+ if (shouldClose) await browser.close().catch(() => undefined);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 为所有小红书账号导出“观看数据-近30日”并导入 user_day_statistics
|
|
|
+ */
|
|
|
+ async runDailyImportForAllXhsAccounts(): Promise<void> {
|
|
|
+ await ensureDir(this.downloadDir);
|
|
|
+
|
|
|
+ const accounts = await this.accountRepository.find({
|
|
|
+ where: { platform: 'xiaohongshu' as any },
|
|
|
+ });
|
|
|
+
|
|
|
+ logger.info(`[XHS Import] Start. total_accounts=${accounts.length}`);
|
|
|
+
|
|
|
+ for (const account of accounts) {
|
|
|
+ try {
|
|
|
+ await this.importAccountLast30Days(account);
|
|
|
+ } catch (e) {
|
|
|
+ logger.error(`[XHS Import] Account failed. accountId=${account.id} name=${account.accountName || ''}`, e);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ logger.info('[XHS Import] Done.');
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 单账号:导出 Excel → 解析 → 入库 → 删除文件
|
|
|
+ */
|
|
|
+ async importAccountLast30Days(account: PlatformAccount): Promise<void> {
|
|
|
+ const cookies = parseCookiesFromAccount(account.cookieData);
|
|
|
+ if (!cookies.length) {
|
|
|
+ throw new Error('cookieData 为空或无法解析');
|
|
|
+ }
|
|
|
+
|
|
|
+ const { browser, shouldClose } = await createBrowserForAccount(account.proxyConfig);
|
|
|
+ try {
|
|
|
+ const statePath = await this.ensureStorageState(account, cookies);
|
|
|
+ const context = await browser.newContext({
|
|
|
+ acceptDownloads: true,
|
|
|
+ viewport: { width: 1920, height: 1080 },
|
|
|
+ locale: 'zh-CN',
|
|
|
+ timezoneId: 'Asia/Shanghai',
|
|
|
+ userAgent:
|
|
|
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
|
+ ...(statePath ? { storageState: statePath } : {}),
|
|
|
+ });
|
|
|
+ context.setDefaultTimeout(60_000);
|
|
|
+ // 如果没 state,就退回 cookie-only(可能导出为 0)
|
|
|
+ if (!statePath) {
|
|
|
+ await context.addCookies(cookies as any);
|
|
|
+ }
|
|
|
+
|
|
|
+ const page = await context.newPage();
|
|
|
+ await page.goto('https://creator.xiaohongshu.com/statistics/account/v2', { waitUntil: 'domcontentloaded' });
|
|
|
+ await page.waitForTimeout(1500);
|
|
|
+
|
|
|
+ if (page.url().includes('login')) {
|
|
|
+ throw new Error('未登录/需要重新登录(跳转到 login)');
|
|
|
+ }
|
|
|
+
|
|
|
+ // 检测“暂无访问权限 / 权限申请中”提示:标记账号 expired + 推送提示
|
|
|
+ const bodyText = (await page.textContent('body').catch(() => '')) || '';
|
|
|
+ if (bodyText.includes('暂无访问权限') || bodyText.includes('数据权限申请中') || bodyText.includes('次日再来查看')) {
|
|
|
+ await this.accountRepository.update(account.id, { status: 'expired' as any });
|
|
|
+ wsManager.sendToUser(account.userId, WS_EVENTS.ACCOUNT_UPDATED, {
|
|
|
+ account: { id: account.id, status: 'expired', platform: 'xiaohongshu' },
|
|
|
+ });
|
|
|
+ wsManager.sendToUser(account.userId, WS_EVENTS.SYSTEM_MESSAGE, {
|
|
|
+ level: 'warning',
|
|
|
+ message: `小红书账号「${account.accountName || account.accountId || account.id}」暂无数据看板访问权限,请到小红书创作服务平台申请数据权限(通过后一般次日生效)。`,
|
|
|
+ platform: 'xiaohongshu',
|
|
|
+ accountId: account.id,
|
|
|
+ });
|
|
|
+ throw new Error('小红书数据看板暂无访问权限/申请中,已标记 expired 并通知用户');
|
|
|
+ }
|
|
|
+
|
|
|
+ // 尽量按用户描述进入:数据看板 -> 账号概览 -> 笔记数据 -> 观看数据 -> 近30日
|
|
|
+ // 页面结构可能会变,这里用“文本定位 + 容错”策略
|
|
|
+ await page.getByText('账号概览', { exact: true }).first().click().catch(() => undefined);
|
|
|
+ await page.getByText('笔记数据', { exact: true }).first().click();
|
|
|
+ await page.getByText('观看数据', { exact: true }).first().click();
|
|
|
+
|
|
|
+ // 选择近30日:先点开时间范围,再点“近30日”
|
|
|
+ await page.getByText(/近\d+日/).first().click().catch(() => undefined);
|
|
|
+ await page.getByText('近30日', { exact: true }).click();
|
|
|
+
|
|
|
+ // 等待数据刷新完成(避免导出到全 0)
|
|
|
+ // 以页面上“观看数”卡片出现非 0 数字作为信号(页面文本会包含类似 8,077 / 4.8万)
|
|
|
+ await page
|
|
|
+ .waitForFunction(() => {
|
|
|
+ const t = document.body?.innerText || '';
|
|
|
+ if (!t.includes('观看数')) return false;
|
|
|
+ // 匹配“观看数”后出现非 0 的数值(允许逗号/万/亿)
|
|
|
+ return /观看数[\s\S]{0,50}([1-9]\d{0,2}(,\d{3})+|[1-9]\d*|[1-9]\d*(\.\d+)?\s*[万亿])/.test(t);
|
|
|
+ }, { timeout: 30_000 })
|
|
|
+ .catch(() => {
|
|
|
+ logger.warn('[XHS Import] Wait for non-zero watch count timed out. Continue export anyway.');
|
|
|
+ });
|
|
|
+
|
|
|
+ // 导出数据
|
|
|
+ const [download] = await Promise.all([
|
|
|
+ page.waitForEvent('download', { timeout: 60_000 }),
|
|
|
+ page.getByText('导出数据', { exact: true }).first().click(),
|
|
|
+ ]);
|
|
|
+
|
|
|
+ const filename = `${account.id}_${Date.now()}_${download.suggestedFilename()}`;
|
|
|
+ const filePath = path.join(this.downloadDir, filename);
|
|
|
+ await download.saveAs(filePath);
|
|
|
+
|
|
|
+ // 解析并入库
|
|
|
+ const perDay = parseXhsExcel(filePath);
|
|
|
+ let inserted = 0;
|
|
|
+ let updated = 0;
|
|
|
+
|
|
|
+ // 每天一条:accountId + date
|
|
|
+ for (const v of perDay.values()) {
|
|
|
+ const { recordDate, ...patch } = v;
|
|
|
+ const r = await this.userDayStatisticsService.saveStatisticsForDate(account.id, recordDate, patch);
|
|
|
+ inserted += r.inserted;
|
|
|
+ updated += r.updated;
|
|
|
+ }
|
|
|
+
|
|
|
+ // 删除 Excel(默认删除;设置 KEEP_XHS_XLSX=1 可保留用于排查)
|
|
|
+ if (process.env.KEEP_XHS_XLSX === '1') {
|
|
|
+ logger.warn(`[XHS Import] KEEP_XHS_XLSX=1, keep file: ${filePath}`);
|
|
|
+ } else {
|
|
|
+ await fs.unlink(filePath).catch(() => undefined);
|
|
|
+ }
|
|
|
+
|
|
|
+ logger.info(
|
|
|
+ `[XHS Import] Account done. accountId=${account.id} days=${perDay.size} inserted=${inserted} updated=${updated}`
|
|
|
+ );
|
|
|
+
|
|
|
+ await context.close();
|
|
|
+ } finally {
|
|
|
+ if (shouldClose) {
|
|
|
+ await browser.close().catch(() => undefined);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|