XiaohongshuAccountOverviewImportService.ts 28 KB


  1. import fs from 'node:fs/promises';
  2. import path from 'node:path';
  3. import { chromium, type Browser, type Page, type BrowserContext } from 'playwright';
  4. import * as XLSXNS from 'xlsx';
  5. import { AppDataSource, PlatformAccount } from '../models/index.js';
  6. import { BrowserManager } from '../automation/browser.js';
  7. import { logger } from '../utils/logger.js';
  8. import { UserDayStatisticsService } from './UserDayStatisticsService.js';
  9. import { AccountService } from './AccountService.js';
  10. import type { ProxyConfig } from '@media-manager/shared';
  11. import { WS_EVENTS } from '@media-manager/shared';
  12. import { wsManager } from '../websocket/index.js';
  13. // xlsx 在 ESM 下可能挂在 default 上;这里做一次兼容兜底
  14. // eslint-disable-next-line @typescript-eslint/no-explicit-any
  15. const XLSX: any = (XLSXNS as any).default ?? (XLSXNS as any);
  16. type PlaywrightCookie = {
  17. name: string;
  18. value: string;
  19. domain?: string;
  20. path?: string;
  21. url?: string;
  22. expires?: number;
  23. httpOnly?: boolean;
  24. secure?: boolean;
  25. sameSite?: 'Lax' | 'None' | 'Strict';
  26. };
  27. type MetricKind =
  28. | 'playCount'
  29. | 'exposureCount'
  30. | 'likeCount'
  31. | 'commentCount'
  32. | 'shareCount'
  33. | 'collectCount'
  34. | 'fansIncrease'
  35. | 'worksCount'
  36. | 'coverClickRate'
  37. | 'avgWatchDuration'
  38. | 'totalWatchDuration'
  39. | 'completionRate';
  40. type ExportMode = 'watch' | 'interaction' | 'fans' | 'publish';
  41. function ensureDir(p: string) {
  42. return fs.mkdir(p, { recursive: true });
  43. }
  44. function normalizeDateText(input: unknown): Date | null {
  45. if (!input) return null;
  46. if (input instanceof Date && !Number.isNaN(input.getTime())) {
  47. const d = new Date(input);
  48. d.setHours(0, 0, 0, 0);
  49. return d;
  50. }
  51. const s = String(input).trim();
  52. // 2026年01月27日
  53. const m1 = s.match(/(\d{4})\D(\d{1,2})\D(\d{1,2})\D?/);
  54. if (m1) {
  55. const yyyy = Number(m1[1]);
  56. const mm = Number(m1[2]);
  57. const dd = Number(m1[3]);
  58. if (!yyyy || !mm || !dd) return null;
  59. const d = new Date(yyyy, mm - 1, dd);
  60. d.setHours(0, 0, 0, 0);
  61. return d;
  62. }
  63. // 01-27(兜底:用当前年份)
  64. const m2 = s.match(/^(\d{1,2})[-/](\d{1,2})$/);
  65. if (m2) {
  66. const yyyy = new Date().getFullYear();
  67. const mm = Number(m2[1]);
  68. const dd = Number(m2[2]);
  69. const d = new Date(yyyy, mm - 1, dd);
  70. d.setHours(0, 0, 0, 0);
  71. return d;
  72. }
  73. return null;
  74. }
  75. function parseChineseNumberLike(input: unknown): number | null {
  76. if (input === null || input === undefined) return null;
  77. const s = String(input).trim();
  78. if (!s) return null;
  79. // 8,077
  80. const plain = s.replace(/,/g, '');
  81. // 4.8万
  82. const wan = plain.match(/^(\d+(\.\d+)?)\s*万$/);
  83. if (wan) return Math.round(Number(wan[1]) * 10000);
  84. const yi = plain.match(/^(\d+(\.\d+)?)\s*亿$/);
  85. if (yi) return Math.round(Number(yi[1]) * 100000000);
  86. const n = Number(plain.replace(/[^\d.-]/g, ''));
  87. if (Number.isFinite(n)) return Math.round(n);
  88. return null;
  89. }
  90. function detectMetricKind(sheetName: string): MetricKind | null {
  91. const n = sheetName.trim();
  92. // 观看数据:子表命名可能是「观看趋势」或「观看数趋势」
  93. if (n.includes('观看趋势') || n.includes('观看数')) return 'playCount';
  94. if (n.includes('曝光趋势')) return 'exposureCount';
  95. if (n.includes('封面点击率')) return 'coverClickRate';
  96. if (n.includes('平均观看时长')) return 'avgWatchDuration';
  97. if (n.includes('观看总时长')) return 'totalWatchDuration';
  98. if (n.includes('完播率')) return 'completionRate';
  99. // 互动数据
  100. if (n.includes('点赞') && n.includes('趋势')) return 'likeCount';
  101. if (n.includes('评论') && n.includes('趋势')) return 'commentCount';
  102. if (n.includes('分享') && n.includes('趋势')) return 'shareCount';
  103. if (n.includes('收藏') && n.includes('趋势')) return 'collectCount';
  104. // 涨粉数据(只取净涨粉趋势)
  105. if (n.includes('净涨粉') && n.includes('趋势')) return 'fansIncrease';
  106. // 发布数据:总发布趋势 → 每日发布数,入库 works_count
  107. if (n.includes('总发布趋势')) return 'worksCount';
  108. return null;
  109. }
  110. function parseCookiesFromAccount(cookieData: string | null): PlaywrightCookie[] {
  111. if (!cookieData) return [];
  112. const raw = cookieData.trim();
  113. if (!raw) return [];
  114. // 1) JSON array(最常见:浏览器插件导出/前端保存)
  115. if (raw.startsWith('[') || raw.startsWith('{')) {
  116. try {
  117. const parsed = JSON.parse(raw);
  118. const arr = Array.isArray(parsed) ? parsed : (parsed?.cookies ? parsed.cookies : []);
  119. if (!Array.isArray(arr)) return [];
  120. return arr
  121. .map((c: any) => {
  122. const name = String(c?.name ?? '').trim();
  123. const value = String(c?.value ?? '').trim();
  124. if (!name) return null;
  125. const domain = c?.domain ? String(c.domain) : undefined;
  126. const pathVal = c?.path ? String(c.path) : '/';
  127. const url = !domain ? 'https://creator.xiaohongshu.com' : undefined;
  128. const sameSiteRaw = c?.sameSite;
  129. const sameSite =
  130. sameSiteRaw === 'Lax' || sameSiteRaw === 'None' || sameSiteRaw === 'Strict'
  131. ? sameSiteRaw
  132. : undefined;
  133. return {
  134. name,
  135. value,
  136. domain,
  137. path: pathVal,
  138. url,
  139. expires: typeof c?.expires === 'number' ? c.expires : undefined,
  140. httpOnly: typeof c?.httpOnly === 'boolean' ? c.httpOnly : undefined,
  141. secure: typeof c?.secure === 'boolean' ? c.secure : undefined,
  142. sameSite,
  143. } satisfies PlaywrightCookie;
  144. })
  145. .filter(Boolean) as PlaywrightCookie[];
  146. } catch {
  147. // fallthrough
  148. }
  149. }
  150. // 2) "a=b; c=d" 拼接格式
  151. const pairs = raw.split(';').map((p) => p.trim()).filter(Boolean);
  152. const cookies: PlaywrightCookie[] = [];
  153. for (const p of pairs) {
  154. const idx = p.indexOf('=');
  155. if (idx <= 0) continue;
  156. const name = p.slice(0, idx).trim();
  157. const value = p.slice(idx + 1).trim();
  158. if (!name) continue;
  159. cookies.push({ name, value, url: 'https://creator.xiaohongshu.com' });
  160. }
  161. return cookies;
  162. }
  163. async function createBrowserForAccount(proxy: ProxyConfig | null): Promise<{ browser: Browser; shouldClose: boolean }> {
  164. // 静默同步:默认一律 headless,不弹窗
  165. // 只有在“引导登录/验证”时(XHS_STORAGE_STATE_BOOTSTRAP=1 且 XHS_IMPORT_HEADLESS=0)才允许 headful
  166. const allowHeadfulForBootstrap = process.env.XHS_STORAGE_STATE_BOOTSTRAP === '1' && process.env.XHS_IMPORT_HEADLESS === '0';
  167. const headless = !allowHeadfulForBootstrap;
  168. if (proxy?.enabled) {
  169. const server = `${proxy.type}://${proxy.host}:${proxy.port}`;
  170. const browser = await chromium.launch({
  171. headless,
  172. proxy: {
  173. server,
  174. username: proxy.username,
  175. password: proxy.password,
  176. },
  177. args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-gpu', '--window-size=1920,1080'],
  178. });
  179. return { browser, shouldClose: true };
  180. }
  181. const browser = await BrowserManager.getBrowser({ headless });
  182. return { browser, shouldClose: false };
  183. }
  184. export function parseXhsExcel(
  185. filePath: string,
  186. mode: ExportMode
  187. ): Map<string, { recordDate: Date } & Record<string, any>> {
  188. const wb = XLSX.readFile(filePath);
  189. const result = new Map<string, { recordDate: Date } & Record<string, any>>();
  190. logger.info(
  191. `[XHS Import] Excel loaded. mode=${mode} file=${path.basename(filePath)} sheets=${wb.SheetNames.join(' | ')}`
  192. );
  193. for (const sheetName of wb.SheetNames) {
  194. const kind = detectMetricKind(sheetName);
  195. if (!kind) continue;
  196. // 按导出类型过滤不相关子表,避免误写字段
  197. if (
  198. (mode === 'watch' &&
  199. !['playCount', 'exposureCount', 'coverClickRate', 'avgWatchDuration', 'totalWatchDuration', 'completionRate'].includes(kind)) ||
  200. (mode === 'interaction' && !['likeCount', 'commentCount', 'shareCount', 'collectCount'].includes(kind)) ||
  201. (mode === 'fans' && kind !== 'fansIncrease') ||
  202. (mode === 'publish' && kind !== 'worksCount')
  203. ) {
  204. continue;
  205. }
  206. const sheet = wb.Sheets[sheetName];
  207. const rows = (XLSX.utils.sheet_to_json(sheet, { defval: '' }) as Record<string, any>[]);
  208. if (rows.length) {
  209. const keys = Object.keys(rows[0] || {});
  210. logger.info(`[XHS Import] Sheet parsed. name=${sheetName} kind=${kind} rows=${rows.length} keys=${keys.join(',')}`);
  211. } else {
  212. logger.warn(`[XHS Import] Sheet empty. name=${sheetName} kind=${kind}`);
  213. }
  214. for (const row of rows) {
  215. const dateVal = row['日期'] ?? row['date'] ?? row['Date'] ?? row[Object.keys(row)[0] ?? ''];
  216. const valueVal = row['数值'] ?? row['value'] ?? row['Value'] ?? row[Object.keys(row)[1] ?? ''];
  217. const d = normalizeDateText(dateVal);
  218. if (!d) continue;
  219. const key = `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, '0')}-${String(d.getDate()).padStart(2, '0')}`;
  220. if (!result.has(key)) result.set(key, { recordDate: d });
  221. const obj = result.get(key)!;
  222. if (kind === 'playCount' || kind === 'exposureCount' || kind === 'likeCount' || kind === 'commentCount' || kind === 'shareCount' || kind === 'collectCount' || kind === 'fansIncrease' || kind === 'worksCount') {
  223. const n = parseChineseNumberLike(valueVal);
  224. if (typeof n === 'number') {
  225. if (kind === 'playCount') obj.playCount = n;
  226. if (kind === 'exposureCount') obj.exposureCount = n;
  227. if (kind === 'likeCount') obj.likeCount = n;
  228. if (kind === 'worksCount') obj.worksCount = n;
  229. if (kind === 'commentCount') obj.commentCount = n;
  230. if (kind === 'shareCount') obj.shareCount = n;
  231. if (kind === 'collectCount') obj.collectCount = n;
  232. if (kind === 'fansIncrease') obj.fansIncrease = n; // 允许负数
  233. }
  234. } else {
  235. const s = String(valueVal ?? '').trim();
  236. if (kind === 'coverClickRate') obj.coverClickRate = s || '0';
  237. if (kind === 'avgWatchDuration') obj.avgWatchDuration = s || '0';
  238. if (kind === 'totalWatchDuration') obj.totalWatchDuration = s || '0';
  239. if (kind === 'completionRate') obj.completionRate = s || '0';
  240. }
  241. }
  242. }
  243. return result;
  244. }
  245. export { parseCookiesFromAccount, createBrowserForAccount };
  246. export class XiaohongshuAccountOverviewImportService {
  247. private accountRepository = AppDataSource.getRepository(PlatformAccount);
  248. private userDayStatisticsService = new UserDayStatisticsService();
  249. private downloadDir = path.resolve(process.cwd(), 'tmp', 'xhs-account-overview');
  250. private stateDir = path.resolve(process.cwd(), 'tmp', 'xhs-storage-state');
  251. private getStatePath(accountId: number) {
  252. return path.join(this.stateDir, `${accountId}.json`);
  253. }
  254. private async ensureStorageState(account: PlatformAccount, cookies: PlaywrightCookie[]): Promise<string | null> {
  255. const statePath = this.getStatePath(account.id);
  256. try {
  257. await fs.access(statePath);
  258. return statePath;
  259. } catch {
  260. // no state
  261. }
  262. // 需要你在弹出的浏览器里完成一次登录/验证,然后脚本会自动保存 storageState
  263. // 启用方式:XHS_IMPORT_HEADLESS=0 且 XHS_STORAGE_STATE_BOOTSTRAP=1
  264. if (!(process.env.XHS_IMPORT_HEADLESS === '0' && process.env.XHS_STORAGE_STATE_BOOTSTRAP === '1')) {
  265. return null;
  266. }
  267. await ensureDir(this.stateDir);
  268. logger.warn(`[XHS Import] No storageState for accountId=${account.id}. Bootstrapping... 请在弹出的浏览器中完成登录/验证。`);
  269. const { browser, shouldClose } = await createBrowserForAccount(account.proxyConfig);
  270. try {
  271. const context = await browser.newContext({
  272. viewport: { width: 1920, height: 1080 },
  273. locale: 'zh-CN',
  274. timezoneId: 'Asia/Shanghai',
  275. });
  276. await context.addCookies(cookies as any);
  277. const page = await context.newPage();
  278. await page.goto('https://creator.xiaohongshu.com/statistics/account/v2', { waitUntil: 'domcontentloaded' });
  279. // 最长等 5 分钟:让你手动完成登录/滑块/短信等
  280. await page
  281. .waitForFunction(() => {
  282. const t = document.body?.innerText || '';
  283. return t.includes('账号概览') || t.includes('数据总览') || t.includes('观看数据');
  284. }, { timeout: 5 * 60_000 })
  285. .catch(() => undefined);
  286. await context.storageState({ path: statePath });
  287. logger.info(`[XHS Import] storageState saved: ${statePath}`);
  288. await context.close();
  289. return statePath;
  290. } finally {
  291. if (shouldClose) await browser.close().catch(() => undefined);
  292. }
  293. }
  294. /**
  295. * 统一入口:定时任务与添加账号均调用此方法,执行“账号概览-观看/互动/涨粉-近30日 + 粉丝 overall_new”
  296. */
  297. static async runDailyImport(): Promise<void> {
  298. const svc = new XiaohongshuAccountOverviewImportService();
  299. await svc.runDailyImportForAllXhsAccounts();
  300. }
  301. /**
  302. * 为所有小红书账号导出“观看数据-近30日”并导入 user_day_statistics
  303. */
  304. async runDailyImportForAllXhsAccounts(): Promise<void> {
  305. await ensureDir(this.downloadDir);
  306. const accounts = await this.accountRepository.find({
  307. where: { platform: 'xiaohongshu' as any },
  308. });
  309. logger.info(`[XHS Import] Start. total_accounts=${accounts.length}`);
  310. for (const account of accounts) {
  311. try {
  312. await this.importAccountLast30Days(account);
  313. } catch (e) {
  314. logger.error(`[XHS Import] Account failed. accountId=${account.id} name=${account.accountName || ''}`, e);
  315. }
  316. }
  317. logger.info('[XHS Import] Done.');
  318. }
  319. /**
  320. * 单账号:导出 Excel → 解析 → 入库 → 删除文件
  321. */
  322. async importAccountLast30Days(account: PlatformAccount, isRetry = false): Promise<void> {
  323. const cookies = parseCookiesFromAccount(account.cookieData);
  324. if (!cookies.length) {
  325. throw new Error('cookieData 为空或无法解析');
  326. }
  327. const { browser, shouldClose } = await createBrowserForAccount(account.proxyConfig);
  328. try {
  329. const statePath = await this.ensureStorageState(account, cookies);
  330. const context = await browser.newContext({
  331. acceptDownloads: true,
  332. viewport: { width: 1920, height: 1080 },
  333. locale: 'zh-CN',
  334. timezoneId: 'Asia/Shanghai',
  335. userAgent:
  336. 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
  337. ...(statePath ? { storageState: statePath } : {}),
  338. });
  339. context.setDefaultTimeout(60_000);
  340. // 如果没 state,就退回 cookie-only(可能导出为 0)
  341. if (!statePath) {
  342. await context.addCookies(cookies as any);
  343. }
  344. const page = await context.newPage();
  345. // account/base 在页面加载时自动请求,先挂监听再访问
  346. const accountBasePattern = /\/api\/galaxy\/v2\/creator\/datacenter\/account\/base/i;
  347. const responsePromise = page.waitForResponse(
  348. (r) => r.url().match(accountBasePattern) != null && r.request().method() === 'GET',
  349. { timeout: 30_000 }
  350. );
  351. await page.goto('https://creator.xiaohongshu.com/statistics/account/v2', { waitUntil: 'domcontentloaded' });
  352. await page.waitForTimeout(3000); // 等几秒,让页面发起 account/base 请求
  353. if (page.url().includes('login')) {
  354. // 第一次检测到登录失效时,尝试刷新账号
  355. if (!isRetry) {
  356. logger.info(`[XHS Import] Login expired detected for account ${account.id}, attempting to refresh...`);
  357. await context.close();
  358. if (shouldClose) await browser.close();
  359. try {
  360. const accountService = new AccountService();
  361. const refreshResult = await accountService.refreshAccount(account.userId, account.id);
  362. if (refreshResult.needReLogin) {
  363. // 刷新后仍需要重新登录,走原先的失效流程
  364. logger.warn(`[XHS Import] Account ${account.id} refresh failed, still needs re-login`);
  365. throw new Error('未登录/需要重新登录(跳转到 login)');
  366. }
  367. // 刷新成功,重新获取账号信息并重试导入
  368. logger.info(`[XHS Import] Account ${account.id} refreshed successfully, retrying import...`);
  369. const refreshedAccount = await this.accountRepository.findOne({ where: { id: account.id } });
  370. if (!refreshedAccount) {
  371. throw new Error('账号刷新后未找到');
  372. }
  373. // 递归调用,标记为重试
  374. return await this.importAccountLast30Days(refreshedAccount, true);
  375. } catch (refreshError) {
  376. logger.error(`[XHS Import] Account ${account.id} refresh failed:`, refreshError);
  377. throw new Error('未登录/需要重新登录(跳转到 login)');
  378. }
  379. } else {
  380. // 已经是重试了,不再尝试刷新
  381. throw new Error('未登录/需要重新登录(跳转到 login)');
  382. }
  383. }
  384. // 检测“暂无访问权限 / 权限申请中”提示:仅推送提示,不修改账号状态(避免误判或用户不想自动变更)
  385. const bodyText = (await page.textContent('body').catch(() => '')) || '';
  386. if (bodyText.includes('暂无访问权限') || bodyText.includes('数据权限申请中') || bodyText.includes('次日再来查看')) {
  387. // await this.accountRepository.update(account.id, { status: 'expired' as any });
  388. wsManager.sendToUser(account.userId, WS_EVENTS.SYSTEM_MESSAGE, {
  389. level: 'warning',
  390. message: `小红书账号「${account.accountName || account.accountId || account.id}」暂无数据看板访问权限,请到小红书创作服务平台申请数据权限(通过后一般次日生效)。`,
  391. platform: 'xiaohongshu',
  392. accountId: account.id,
  393. });
  394. throw new Error('小红书数据看板暂无访问权限/申请中,已通知用户');
  395. }
  396. // 直接监听 account/base,无需点击 账号概览/笔记数据
  397. await this.importFromAccountBaseApi(responsePromise, page, account);
  398. // 粉丝数据页:打开粉丝数据、点击近30天,解析 overall_new 接口,将每日粉丝总数写入 user_day_statistics.fans_count
  399. await this.importFansDataTrendFromPage(context, page, account);
  400. logger.info(`[XHS Import] Account all tabs done. accountId=${account.id}`);
  401. await context.close();
  402. } finally {
  403. if (shouldClose) {
  404. await browser.close().catch(() => undefined);
  405. }
  406. }
  407. }
  408. /**
  409. * 等待 account/base 接口响应,解析 data.thirty 各 *_list 按 date 合并为按日数据并写入 user_day_statistics
  410. * 字段映射:view_list→playCount, impl_count_list→exposureCount, comment_list→commentCount,
  411. * like_list→likeCount, share_list→shareCount, collect_list→collectCount,
  412. * net_rise_fans_count_list→fansIncrease, cover_click_rate_list→coverClickRate(格式化为"14%"),
  413. * avg_view_time_list→avgWatchDuration("12秒"), view_time_list→totalWatchDuration("1866秒"),
  414. * video_full_view_rate_list→completionRate("15%"), publish_note_num_list→worksCount
  415. */
  416. private async importFromAccountBaseApi(
  417. responsePromise: Promise<import('playwright').Response>,
  418. _page: Page,
  419. account: PlatformAccount
  420. ): Promise<void> {
  421. let res: import('playwright').Response;
  422. try {
  423. res = await responsePromise;
  424. } catch {
  425. logger.warn(`[XHS Import] account/base response not captured, skip. accountId=${account.id}`);
  426. return;
  427. }
  428. const body = await res.json().catch(() => null);
  429. if (!body || typeof body !== 'object') {
  430. logger.warn(`[XHS Import] account/base not valid JSON. accountId=${account.id}`);
  431. return;
  432. }
  433. const data = (body as Record<string, unknown>).data as Record<string, unknown> | undefined;
  434. const thirty = data?.thirty as Record<string, unknown> | undefined;
  435. if (!thirty || typeof thirty !== 'object') {
  436. logger.warn(`[XHS Import] account/base data.thirty missing. accountId=${account.id}`);
  437. return;
  438. }
  439. const perDay = this.parseAccountBaseThirty(thirty);
  440. if (perDay.size === 0) {
  441. logger.info(`[XHS Import] account/base no days parsed. accountId=${account.id}`);
  442. return;
  443. }
  444. let inserted = 0;
  445. let updated = 0;
  446. const today = new Date();
  447. today.setHours(0, 0, 0, 0);
  448. for (const v of perDay.values()) {
  449. const { recordDate, ...patch } = v;
  450. if (recordDate.getTime() === today.getTime() && patch.fansCount === undefined && account.fansCount != null && account.fansCount > 0) {
  451. (patch as Record<string, unknown>).fansCount = account.fansCount;
  452. }
  453. const r = await this.userDayStatisticsService.saveStatisticsForDate(account.id, recordDate, patch);
  454. inserted += r.inserted;
  455. updated += r.updated;
  456. }
  457. logger.info(
  458. `[XHS Import] account/base imported. accountId=${account.id} days=${perDay.size} inserted=${inserted} updated=${updated}`
  459. );
  460. }
  461. /**
  462. * 解析 data.thirty:各 *_list 每项 { date: 毫秒, count[, count_with_double] },按 date 合并为按日一条
  463. * 注意:接口返回的 date 是「中国时区(Asia/Shanghai)该日 0 点」的 UTC 时间戳,需按中国时区解析日期
  464. */
  465. private parseAccountBaseThirty(thirty: Record<string, unknown>): Map<string, { recordDate: Date } & Record<string, unknown>> {
  466. const map = new Map<string, { recordDate: Date } & Record<string, unknown>>();
  467. // 使用 Intl.DateTimeFormat 获取中国时区的年月日
  468. const cstFormatter = new Intl.DateTimeFormat('en-CA', {
  469. timeZone: 'Asia/Shanghai',
  470. year: 'numeric',
  471. month: '2-digit',
  472. day: '2-digit',
  473. });
  474. const toKey = (ms: number): string => {
  475. // 将 UTC 时间戳转成中国时区的日期字符串 YYYY-MM-DD
  476. return cstFormatter.format(new Date(ms));
  477. };
  478. const toRecordDate = (ms: number): Date => {
  479. // 获取中国时区的年月日
  480. const parts = cstFormatter.formatToParts(new Date(ms));
  481. const get = (type: string) => parts.find((p) => p.type === type)?.value ?? '0';
  482. const y = parseInt(get('year'), 10);
  483. const m = parseInt(get('month'), 10) - 1; // month 是 1-12,Date 构造函数需要 0-11
  484. const d = parseInt(get('day'), 10);
  485. // 构造本地时区的该日 0 点(如果服务器在中国时区,就是中国时区的 0 点)
  486. return new Date(y, m, d, 0, 0, 0, 0);
  487. };
  488. const setFromList = (
  489. listKey: string,
  490. field: string,
  491. formatter?: (n: number) => string | number
  492. ) => {
  493. const arr = thirty[listKey];
  494. if (!Array.isArray(arr)) return;
  495. for (const item of arr) {
  496. if (!item || typeof item !== 'object') continue;
  497. const o = item as Record<string, unknown>;
  498. const dateMs = o.date;
  499. const countRaw = o.count;
  500. if (dateMs == null || countRaw == null) continue;
  501. const ts = typeof dateMs === 'number' ? dateMs : Number(dateMs);
  502. if (!Number.isFinite(ts)) continue;
  503. const key = toKey(ts);
  504. if (!map.has(key)) {
  505. map.set(key, { recordDate: toRecordDate(ts) });
  506. } else {
  507. (map.get(key)!.recordDate as Date) = toRecordDate(ts);
  508. }
  509. const rec = map.get(key)!;
  510. const n = typeof countRaw === 'number' ? countRaw : Number(countRaw);
  511. if (!Number.isFinite(n)) continue;
  512. const val = formatter ? formatter(n) : n;
  513. (rec as Record<string, unknown>)[field] = val;
  514. }
  515. };
  516. setFromList('view_list', 'playCount');
  517. setFromList('impl_count_list', 'exposureCount');
  518. setFromList('comment_list', 'commentCount');
  519. setFromList('like_list', 'likeCount');
  520. setFromList('share_list', 'shareCount');
  521. setFromList('collect_list', 'collectCount');
  522. setFromList('net_rise_fans_count_list', 'fansIncrease');
  523. setFromList('cover_click_rate_list', 'coverClickRate', (n) => `${Math.round(n)}%`);
  524. setFromList('avg_view_time_list', 'avgWatchDuration', (n) => `${Math.round(n)}秒`);
  525. setFromList('view_time_list', 'totalWatchDuration', (n) => `${Math.round(n)}秒`);
  526. setFromList('video_full_view_rate_list', 'completionRate', (n) => `${typeof n === 'number' ? Math.round(n) : n}%`);
  527. setFromList('publish_note_num_list', 'worksCount');
  528. return map;
  529. }
  530. /**
  531. * 粉丝数据页:打开粉丝数据、点击「粉丝数据概览」近30天,监听 overall_new 接口响应,解析每日粉丝总数并写入 user_day_statistics.fans_count
  532. */
  533. private async importFansDataTrendFromPage(
  534. _context: BrowserContext,
  535. page: Page,
  536. account: PlatformAccount
  537. ): Promise<void> {
  538. const fansDataUrl = 'https://creator.xiaohongshu.com/statistics/fans-data';
  539. const overallNewPattern = /\/api\/galaxy\/creator\/data\/fans\/overall_new/i;
  540. const near30ButtonSelector =
  541. '#content-area > main > div:nth-child(3) > div > div.content > div.css-12s9z8c.fans-data-container > div.title-container > div.extra-box > div > label:nth-child(2)';
  542. await page.goto(fansDataUrl, { waitUntil: 'domcontentloaded' });
  543. await page.waitForTimeout(2000);
  544. if (page.url().includes('login')) {
  545. logger.warn(`[XHS Import] Fans data page redirected to login, skip fans trend. accountId=${account.id}`);
  546. return;
  547. }
  548. const responsePromise = page.waitForResponse(
  549. (res) => res.url().match(overallNewPattern) != null && res.request().method() === 'GET',
  550. { timeout: 30_000 }
  551. );
  552. const btn = page.locator(near30ButtonSelector).or(page.locator('.fans-data-container').getByText('近30天').first());
  553. await btn.click().catch(() => undefined);
  554. await page.waitForTimeout(1500);
  555. let res;
  556. try {
  557. res = await responsePromise;
  558. } catch {
  559. try {
  560. res = await page.waitForResponse(
  561. (r) => r.url().match(overallNewPattern) != null && r.request().method() === 'GET',
  562. { timeout: 15_000 }
  563. );
  564. } catch {
  565. logger.warn(`[XHS Import] No overall_new response captured, skip fans trend. accountId=${account.id}`);
  566. return;
  567. }
  568. }
  569. const body = await res.json().catch(() => null);
  570. if (!body || typeof body !== 'object') {
  571. logger.warn(`[XHS Import] overall_new response not valid JSON, skip. accountId=${account.id}`);
  572. return;
  573. }
  574. const list = this.parseFansOverallNewResponse(body);
  575. if (!list.length) {
  576. logger.info(`[XHS Import] No fans trend items from overall_new. accountId=${account.id}`);
  577. return;
  578. }
  579. let updated = 0;
  580. for (const { recordDate, fansCount } of list) {
  581. const r = await this.userDayStatisticsService.saveStatisticsForDate(account.id, recordDate, { fansCount });
  582. updated += r.inserted + r.updated;
  583. }
  584. logger.info(`[XHS Import] Fans trend imported. accountId=${account.id} days=${list.length} updated=${updated}`);
  585. }
  586. /**
  587. * 解析 overall_new 接口返回的 JSON,提取 (recordDate, fansCount) 列表
  588. * 接口格式:data.thirty.fans_list(或 fans_list_iterator),每项 { date: 毫秒时间戳, count: 粉丝数 }
  589. */
  590. private parseFansOverallNewResponse(body: Record<string, unknown>): Array<{ recordDate: Date; fansCount: number }> {
  591. const list: Array<{ recordDate: Date; fansCount: number }> = [];
  592. const data = body.data as Record<string, unknown> | undefined;
  593. if (!data || typeof data !== 'object') return list;
  594. const thirty = data.thirty as Record<string, unknown> | undefined;
  595. if (!thirty || typeof thirty !== 'object') return list;
  596. const arr = (thirty.fans_list as unknown[]) ?? (thirty.fans_list_iterator as unknown[]) ?? [];
  597. if (!Array.isArray(arr)) return list;
  598. for (const item of arr) {
  599. if (!item || typeof item !== 'object') continue;
  600. const o = item as Record<string, unknown>;
  601. const dateMs = o.date;
  602. const countRaw = o.count;
  603. if (dateMs == null || countRaw == null) continue;
  604. const ts = typeof dateMs === 'number' ? dateMs : Number(dateMs);
  605. if (!Number.isFinite(ts)) continue;
  606. const d = new Date(ts);
  607. d.setHours(0, 0, 0, 0);
  608. const n = typeof countRaw === 'number' ? countRaw : Number(countRaw);
  609. if (!Number.isFinite(n) || n < 0) continue;
  610. list.push({ recordDate: d, fansCount: Math.round(n) });
  611. }
  612. return list;
  613. }
  614. }