XiaohongshuAccountOverviewImportService.ts 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609
  1. import fs from 'node:fs/promises';
  2. import path from 'node:path';
  3. import { chromium, type Browser, type Page, type BrowserContext } from 'playwright';
  4. import * as XLSXNS from 'xlsx';
  5. import { AppDataSource, PlatformAccount } from '../models/index.js';
  6. import { BrowserManager } from '../automation/browser.js';
  7. import { logger } from '../utils/logger.js';
  8. import { UserDayStatisticsService } from './UserDayStatisticsService.js';
  9. import { AccountService } from './AccountService.js';
  10. import type { ProxyConfig } from '@media-manager/shared';
  11. import { WS_EVENTS } from '@media-manager/shared';
  12. import { wsManager } from '../websocket/index.js';
  13. // xlsx 在 ESM 下可能挂在 default 上;这里做一次兼容兜底
  14. // eslint-disable-next-line @typescript-eslint/no-explicit-any
  15. const XLSX: any = (XLSXNS as any).default ?? (XLSXNS as any);
  16. type PlaywrightCookie = {
  17. name: string;
  18. value: string;
  19. domain?: string;
  20. path?: string;
  21. url?: string;
  22. expires?: number;
  23. httpOnly?: boolean;
  24. secure?: boolean;
  25. sameSite?: 'Lax' | 'None' | 'Strict';
  26. };
  27. type MetricKind =
  28. | 'playCount'
  29. | 'likeCount'
  30. | 'commentCount'
  31. | 'shareCount'
  32. | 'collectCount'
  33. | 'fansIncrease'
  34. | 'coverClickRate'
  35. | 'avgWatchDuration'
  36. | 'totalWatchDuration'
  37. | 'completionRate';
  38. type ExportMode = 'watch' | 'interaction' | 'fans';
  39. function ensureDir(p: string) {
  40. return fs.mkdir(p, { recursive: true });
  41. }
  42. function normalizeDateText(input: unknown): Date | null {
  43. if (!input) return null;
  44. if (input instanceof Date && !Number.isNaN(input.getTime())) {
  45. const d = new Date(input);
  46. d.setHours(0, 0, 0, 0);
  47. return d;
  48. }
  49. const s = String(input).trim();
  50. // 2026年01月27日
  51. const m1 = s.match(/(\d{4})\D(\d{1,2})\D(\d{1,2})\D?/);
  52. if (m1) {
  53. const yyyy = Number(m1[1]);
  54. const mm = Number(m1[2]);
  55. const dd = Number(m1[3]);
  56. if (!yyyy || !mm || !dd) return null;
  57. const d = new Date(yyyy, mm - 1, dd);
  58. d.setHours(0, 0, 0, 0);
  59. return d;
  60. }
  61. // 01-27(兜底:用当前年份)
  62. const m2 = s.match(/^(\d{1,2})[-/](\d{1,2})$/);
  63. if (m2) {
  64. const yyyy = new Date().getFullYear();
  65. const mm = Number(m2[1]);
  66. const dd = Number(m2[2]);
  67. const d = new Date(yyyy, mm - 1, dd);
  68. d.setHours(0, 0, 0, 0);
  69. return d;
  70. }
  71. return null;
  72. }
  73. function parseChineseNumberLike(input: unknown): number | null {
  74. if (input === null || input === undefined) return null;
  75. const s = String(input).trim();
  76. if (!s) return null;
  77. // 8,077
  78. const plain = s.replace(/,/g, '');
  79. // 4.8万
  80. const wan = plain.match(/^(\d+(\.\d+)?)\s*万$/);
  81. if (wan) return Math.round(Number(wan[1]) * 10000);
  82. const yi = plain.match(/^(\d+(\.\d+)?)\s*亿$/);
  83. if (yi) return Math.round(Number(yi[1]) * 100000000);
  84. const n = Number(plain.replace(/[^\d.-]/g, ''));
  85. if (Number.isFinite(n)) return Math.round(n);
  86. return null;
  87. }
  88. function detectMetricKind(sheetName: string): MetricKind | null {
  89. const n = sheetName.trim();
  90. // 观看数据:子表命名可能是「观看趋势」或「观看数趋势」
  91. if (n.includes('观看趋势') || n.includes('观看数')) return 'playCount';
  92. if (n.includes('封面点击率')) return 'coverClickRate';
  93. if (n.includes('平均观看时长')) return 'avgWatchDuration';
  94. if (n.includes('观看总时长')) return 'totalWatchDuration';
  95. if (n.includes('完播率')) return 'completionRate';
  96. // 互动数据
  97. if (n.includes('点赞') && n.includes('趋势')) return 'likeCount';
  98. if (n.includes('评论') && n.includes('趋势')) return 'commentCount';
  99. if (n.includes('分享') && n.includes('趋势')) return 'shareCount';
  100. if (n.includes('收藏') && n.includes('趋势')) return 'collectCount';
  101. // 涨粉数据(只取净涨粉趋势)
  102. if (n.includes('净涨粉') && n.includes('趋势')) return 'fansIncrease';
  103. return null;
  104. }
  105. function parseCookiesFromAccount(cookieData: string | null): PlaywrightCookie[] {
  106. if (!cookieData) return [];
  107. const raw = cookieData.trim();
  108. if (!raw) return [];
  109. // 1) JSON array(最常见:浏览器插件导出/前端保存)
  110. if (raw.startsWith('[') || raw.startsWith('{')) {
  111. try {
  112. const parsed = JSON.parse(raw);
  113. const arr = Array.isArray(parsed) ? parsed : (parsed?.cookies ? parsed.cookies : []);
  114. if (!Array.isArray(arr)) return [];
  115. return arr
  116. .map((c: any) => {
  117. const name = String(c?.name ?? '').trim();
  118. const value = String(c?.value ?? '').trim();
  119. if (!name) return null;
  120. const domain = c?.domain ? String(c.domain) : undefined;
  121. const pathVal = c?.path ? String(c.path) : '/';
  122. const url = !domain ? 'https://creator.xiaohongshu.com' : undefined;
  123. const sameSiteRaw = c?.sameSite;
  124. const sameSite =
  125. sameSiteRaw === 'Lax' || sameSiteRaw === 'None' || sameSiteRaw === 'Strict'
  126. ? sameSiteRaw
  127. : undefined;
  128. return {
  129. name,
  130. value,
  131. domain,
  132. path: pathVal,
  133. url,
  134. expires: typeof c?.expires === 'number' ? c.expires : undefined,
  135. httpOnly: typeof c?.httpOnly === 'boolean' ? c.httpOnly : undefined,
  136. secure: typeof c?.secure === 'boolean' ? c.secure : undefined,
  137. sameSite,
  138. } satisfies PlaywrightCookie;
  139. })
  140. .filter(Boolean) as PlaywrightCookie[];
  141. } catch {
  142. // fallthrough
  143. }
  144. }
  145. // 2) "a=b; c=d" 拼接格式
  146. const pairs = raw.split(';').map((p) => p.trim()).filter(Boolean);
  147. const cookies: PlaywrightCookie[] = [];
  148. for (const p of pairs) {
  149. const idx = p.indexOf('=');
  150. if (idx <= 0) continue;
  151. const name = p.slice(0, idx).trim();
  152. const value = p.slice(idx + 1).trim();
  153. if (!name) continue;
  154. cookies.push({ name, value, url: 'https://creator.xiaohongshu.com' });
  155. }
  156. return cookies;
  157. }
  158. async function createBrowserForAccount(proxy: ProxyConfig | null): Promise<{ browser: Browser; shouldClose: boolean }> {
  159. // 静默同步:默认一律 headless,不弹窗
  160. // 只有在“引导登录/验证”时(XHS_STORAGE_STATE_BOOTSTRAP=1 且 XHS_IMPORT_HEADLESS=0)才允许 headful
  161. const allowHeadfulForBootstrap = process.env.XHS_STORAGE_STATE_BOOTSTRAP === '1' && process.env.XHS_IMPORT_HEADLESS === '0';
  162. const headless = !allowHeadfulForBootstrap;
  163. if (proxy?.enabled) {
  164. const server = `${proxy.type}://${proxy.host}:${proxy.port}`;
  165. const browser = await chromium.launch({
  166. headless,
  167. proxy: {
  168. server,
  169. username: proxy.username,
  170. password: proxy.password,
  171. },
  172. args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-gpu', '--window-size=1920,1080'],
  173. });
  174. return { browser, shouldClose: true };
  175. }
  176. const browser = await BrowserManager.getBrowser({ headless });
  177. return { browser, shouldClose: false };
  178. }
  179. export function parseXhsExcel(
  180. filePath: string,
  181. mode: ExportMode
  182. ): Map<string, { recordDate: Date } & Record<string, any>> {
  183. const wb = XLSX.readFile(filePath);
  184. const result = new Map<string, { recordDate: Date } & Record<string, any>>();
  185. logger.info(
  186. `[XHS Import] Excel loaded. mode=${mode} file=${path.basename(filePath)} sheets=${wb.SheetNames.join(' | ')}`
  187. );
  188. for (const sheetName of wb.SheetNames) {
  189. const kind = detectMetricKind(sheetName);
  190. if (!kind) continue;
  191. // 按导出类型过滤不相关子表,避免误写字段
  192. if (
  193. (mode === 'watch' &&
  194. !['playCount', 'coverClickRate', 'avgWatchDuration', 'totalWatchDuration', 'completionRate'].includes(kind)) ||
  195. (mode === 'interaction' && !['likeCount', 'commentCount', 'shareCount', 'collectCount'].includes(kind)) ||
  196. (mode === 'fans' && kind !== 'fansIncrease')
  197. ) {
  198. continue;
  199. }
  200. const sheet = wb.Sheets[sheetName];
  201. const rows = (XLSX.utils.sheet_to_json(sheet, { defval: '' }) as Record<string, any>[]);
  202. if (rows.length) {
  203. const keys = Object.keys(rows[0] || {});
  204. logger.info(`[XHS Import] Sheet parsed. name=${sheetName} kind=${kind} rows=${rows.length} keys=${keys.join(',')}`);
  205. } else {
  206. logger.warn(`[XHS Import] Sheet empty. name=${sheetName} kind=${kind}`);
  207. }
  208. for (const row of rows) {
  209. const dateVal = row['日期'] ?? row['date'] ?? row['Date'] ?? row[Object.keys(row)[0] ?? ''];
  210. const valueVal = row['数值'] ?? row['value'] ?? row['Value'] ?? row[Object.keys(row)[1] ?? ''];
  211. const d = normalizeDateText(dateVal);
  212. if (!d) continue;
  213. const key = `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, '0')}-${String(d.getDate()).padStart(2, '0')}`;
  214. if (!result.has(key)) result.set(key, { recordDate: d });
  215. const obj = result.get(key)!;
  216. if (kind === 'playCount' || kind === 'likeCount' || kind === 'commentCount' || kind === 'shareCount' || kind === 'collectCount' || kind === 'fansIncrease') {
  217. const n = parseChineseNumberLike(valueVal);
  218. if (typeof n === 'number') {
  219. if (kind === 'playCount') obj.playCount = n;
  220. if (kind === 'likeCount') obj.likeCount = n;
  221. if (kind === 'commentCount') obj.commentCount = n;
  222. if (kind === 'shareCount') obj.shareCount = n;
  223. if (kind === 'collectCount') obj.collectCount = n;
  224. if (kind === 'fansIncrease') obj.fansIncrease = n; // 允许负数
  225. }
  226. } else {
  227. const s = String(valueVal ?? '').trim();
  228. if (kind === 'coverClickRate') obj.coverClickRate = s || '0';
  229. if (kind === 'avgWatchDuration') obj.avgWatchDuration = s || '0';
  230. if (kind === 'totalWatchDuration') obj.totalWatchDuration = s || '0';
  231. if (kind === 'completionRate') obj.completionRate = s || '0';
  232. }
  233. }
  234. }
  235. return result;
  236. }
  237. export { parseCookiesFromAccount, createBrowserForAccount };
  238. export class XiaohongshuAccountOverviewImportService {
  239. private accountRepository = AppDataSource.getRepository(PlatformAccount);
  240. private userDayStatisticsService = new UserDayStatisticsService();
  241. private downloadDir = path.resolve(process.cwd(), 'tmp', 'xhs-account-overview');
  242. private stateDir = path.resolve(process.cwd(), 'tmp', 'xhs-storage-state');
  243. private getStatePath(accountId: number) {
  244. return path.join(this.stateDir, `${accountId}.json`);
  245. }
  246. private async ensureStorageState(account: PlatformAccount, cookies: PlaywrightCookie[]): Promise<string | null> {
  247. const statePath = this.getStatePath(account.id);
  248. try {
  249. await fs.access(statePath);
  250. return statePath;
  251. } catch {
  252. // no state
  253. }
  254. // 需要你在弹出的浏览器里完成一次登录/验证,然后脚本会自动保存 storageState
  255. // 启用方式:XHS_IMPORT_HEADLESS=0 且 XHS_STORAGE_STATE_BOOTSTRAP=1
  256. if (!(process.env.XHS_IMPORT_HEADLESS === '0' && process.env.XHS_STORAGE_STATE_BOOTSTRAP === '1')) {
  257. return null;
  258. }
  259. await ensureDir(this.stateDir);
  260. logger.warn(`[XHS Import] No storageState for accountId=${account.id}. Bootstrapping... 请在弹出的浏览器中完成登录/验证。`);
  261. const { browser, shouldClose } = await createBrowserForAccount(account.proxyConfig);
  262. try {
  263. const context = await browser.newContext({
  264. viewport: { width: 1920, height: 1080 },
  265. locale: 'zh-CN',
  266. timezoneId: 'Asia/Shanghai',
  267. });
  268. await context.addCookies(cookies as any);
  269. const page = await context.newPage();
  270. await page.goto('https://creator.xiaohongshu.com/statistics/account/v2', { waitUntil: 'domcontentloaded' });
  271. // 最长等 5 分钟:让你手动完成登录/滑块/短信等
  272. await page
  273. .waitForFunction(() => {
  274. const t = document.body?.innerText || '';
  275. return t.includes('账号概览') || t.includes('数据总览') || t.includes('观看数据');
  276. }, { timeout: 5 * 60_000 })
  277. .catch(() => undefined);
  278. await context.storageState({ path: statePath });
  279. logger.info(`[XHS Import] storageState saved: ${statePath}`);
  280. await context.close();
  281. return statePath;
  282. } finally {
  283. if (shouldClose) await browser.close().catch(() => undefined);
  284. }
  285. }
  286. /**
  287. * 统一入口:定时任务与添加账号均调用此方法,执行“账号概览-观看/互动/涨粉-近30日 + 粉丝 overall_new”
  288. */
  289. static async runDailyImport(): Promise<void> {
  290. const svc = new XiaohongshuAccountOverviewImportService();
  291. await svc.runDailyImportForAllXhsAccounts();
  292. }
  293. /**
  294. * 为所有小红书账号导出“观看数据-近30日”并导入 user_day_statistics
  295. */
  296. async runDailyImportForAllXhsAccounts(): Promise<void> {
  297. await ensureDir(this.downloadDir);
  298. const accounts = await this.accountRepository.find({
  299. where: { platform: 'xiaohongshu' as any },
  300. });
  301. logger.info(`[XHS Import] Start. total_accounts=${accounts.length}`);
  302. for (const account of accounts) {
  303. try {
  304. await this.importAccountLast30Days(account);
  305. } catch (e) {
  306. logger.error(`[XHS Import] Account failed. accountId=${account.id} name=${account.accountName || ''}`, e);
  307. }
  308. }
  309. logger.info('[XHS Import] Done.');
  310. }
  311. /**
  312. * 单账号:导出 Excel → 解析 → 入库 → 删除文件
  313. */
  314. async importAccountLast30Days(account: PlatformAccount, isRetry = false): Promise<void> {
  315. const cookies = parseCookiesFromAccount(account.cookieData);
  316. if (!cookies.length) {
  317. throw new Error('cookieData 为空或无法解析');
  318. }
  319. const { browser, shouldClose } = await createBrowserForAccount(account.proxyConfig);
  320. try {
  321. const statePath = await this.ensureStorageState(account, cookies);
  322. const context = await browser.newContext({
  323. acceptDownloads: true,
  324. viewport: { width: 1920, height: 1080 },
  325. locale: 'zh-CN',
  326. timezoneId: 'Asia/Shanghai',
  327. userAgent:
  328. 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
  329. ...(statePath ? { storageState: statePath } : {}),
  330. });
  331. context.setDefaultTimeout(60_000);
  332. // 如果没 state,就退回 cookie-only(可能导出为 0)
  333. if (!statePath) {
  334. await context.addCookies(cookies as any);
  335. }
  336. const page = await context.newPage();
  337. await page.goto('https://creator.xiaohongshu.com/statistics/account/v2', { waitUntil: 'domcontentloaded' });
  338. await page.waitForTimeout(1500);
  339. if (page.url().includes('login')) {
  340. // 第一次检测到登录失效时,尝试刷新账号
  341. if (!isRetry) {
  342. logger.info(`[XHS Import] Login expired detected for account ${account.id}, attempting to refresh...`);
  343. await context.close();
  344. if (shouldClose) await browser.close();
  345. try {
  346. const accountService = new AccountService();
  347. const refreshResult = await accountService.refreshAccount(account.userId, account.id);
  348. if (refreshResult.needReLogin) {
  349. // 刷新后仍需要重新登录,走原先的失效流程
  350. logger.warn(`[XHS Import] Account ${account.id} refresh failed, still needs re-login`);
  351. throw new Error('未登录/需要重新登录(跳转到 login)');
  352. }
  353. // 刷新成功,重新获取账号信息并重试导入
  354. logger.info(`[XHS Import] Account ${account.id} refreshed successfully, retrying import...`);
  355. const refreshedAccount = await this.accountRepository.findOne({ where: { id: account.id } });
  356. if (!refreshedAccount) {
  357. throw new Error('账号刷新后未找到');
  358. }
  359. // 递归调用,标记为重试
  360. return await this.importAccountLast30Days(refreshedAccount, true);
  361. } catch (refreshError) {
  362. logger.error(`[XHS Import] Account ${account.id} refresh failed:`, refreshError);
  363. throw new Error('未登录/需要重新登录(跳转到 login)');
  364. }
  365. } else {
  366. // 已经是重试了,不再尝试刷新
  367. throw new Error('未登录/需要重新登录(跳转到 login)');
  368. }
  369. }
  370. // 检测“暂无访问权限 / 权限申请中”提示:仅推送提示,不修改账号状态(避免误判或用户不想自动变更)
  371. const bodyText = (await page.textContent('body').catch(() => '')) || '';
  372. if (bodyText.includes('暂无访问权限') || bodyText.includes('数据权限申请中') || bodyText.includes('次日再来查看')) {
  373. // await this.accountRepository.update(account.id, { status: 'expired' as any });
  374. wsManager.sendToUser(account.userId, WS_EVENTS.SYSTEM_MESSAGE, {
  375. level: 'warning',
  376. message: `小红书账号「${account.accountName || account.accountId || account.id}」暂无数据看板访问权限,请到小红书创作服务平台申请数据权限(通过后一般次日生效)。`,
  377. platform: 'xiaohongshu',
  378. accountId: account.id,
  379. });
  380. throw new Error('小红书数据看板暂无访问权限/申请中,已通知用户');
  381. }
  382. // 统一入口:账号概览 -> 笔记数据
  383. await page.getByText('账号概览', { exact: true }).first().click().catch(() => undefined);
  384. await page.getByText('笔记数据', { exact: true }).first().click();
  385. const exportAndImport = async (tabText: '观看数据' | '互动数据' | '涨粉数据', mode: ExportMode) => {
  386. await page.getByText(tabText, { exact: true }).first().click();
  387. await page.getByText(/近\d+日/).first().click().catch(() => undefined);
  388. await page.getByText('近30日', { exact: true }).click();
  389. await page.waitForTimeout(1200);
  390. const [download] = await Promise.all([
  391. page.waitForEvent('download', { timeout: 60_000 }),
  392. page.getByText('导出数据', { exact: true }).first().click(),
  393. ]);
  394. const filename = `${account.id}_${Date.now()}_${download.suggestedFilename()}`;
  395. const filePath = path.join(this.downloadDir, filename);
  396. await download.saveAs(filePath);
  397. let perDay = new Map<string, { recordDate: Date } & Record<string, any>>();
  398. let inserted = 0;
  399. let updated = 0;
  400. try {
  401. perDay = parseXhsExcel(filePath, mode);
  402. for (const v of perDay.values()) {
  403. const { recordDate, ...patch } = v;
  404. // 修正:如果导入的数据是今天的,且没有粉丝总数(Excel只有涨粉数),则使用账号当前的粉丝数
  405. // 避免因为导入导致今天的粉丝数被重置为 0
  406. const today = new Date();
  407. today.setHours(0, 0, 0, 0);
  408. // 比较时间戳
  409. if (recordDate.getTime() === today.getTime()) {
  410. if ((patch as any).fansCount === undefined && account.fansCount !== undefined && account.fansCount > 0) {
  411. (patch as any).fansCount = account.fansCount;
  412. logger.info(`[XHS Import] Injected current fansCount=${account.fansCount} for today's record (accountId=${account.id})`);
  413. }
  414. }
  415. const r = await this.userDayStatisticsService.saveStatisticsForDate(account.id, recordDate, patch);
  416. inserted += r.inserted;
  417. updated += r.updated;
  418. }
  419. logger.info(
  420. `[XHS Import] ${tabText} imported. accountId=${account.id} days=${perDay.size} inserted=${inserted} updated=${updated}`
  421. );
  422. } finally {
  423. // 默认导入后删除 Excel,避免磁盘堆积;仅在显式 KEEP_XHS_XLSX=true 时保留(用于调试)
  424. if (process.env.KEEP_XHS_XLSX === 'true') {
  425. logger.warn(`[XHS Import] KEEP_XHS_XLSX=true, keep file: ${filePath}`);
  426. } else {
  427. await fs.unlink(filePath).catch(() => undefined);
  428. }
  429. }
  430. };
  431. // 1) 观看数据:播放数 + 点击率/时长/完播率
  432. await exportAndImport('观看数据', 'watch');
  433. // 2) 互动数据:点赞/评论/收藏/分享
  434. await exportAndImport('互动数据', 'interaction');
  435. // 3) 涨粉数据:只取“净涨粉趋势”(解析器已过滤)
  436. await exportAndImport('涨粉数据', 'fans');
  437. // 4) 粉丝数据页:打开粉丝数据、点击近30天,解析 overall_new 接口,将每日粉丝总数写入 user_day_statistics.fans_count
  438. await this.importFansDataTrendFromPage(context, page, account);
  439. logger.info(`[XHS Import] Account all tabs done. accountId=${account.id}`);
  440. await context.close();
  441. } finally {
  442. if (shouldClose) {
  443. await browser.close().catch(() => undefined);
  444. }
  445. }
  446. }
  447. /**
  448. * 粉丝数据页:打开粉丝数据、点击「粉丝数据概览」近30天,监听 overall_new 接口响应,解析每日粉丝总数并写入 user_day_statistics.fans_count
  449. */
  450. private async importFansDataTrendFromPage(
  451. _context: BrowserContext,
  452. page: Page,
  453. account: PlatformAccount
  454. ): Promise<void> {
  455. const fansDataUrl = 'https://creator.xiaohongshu.com/statistics/fans-data';
  456. const overallNewPattern = /\/api\/galaxy\/creator\/data\/fans\/overall_new/i;
  457. const near30ButtonSelector =
  458. '#content-area > main > div:nth-child(3) > div > div.content > div.css-12s9z8c.fans-data-container > div.title-container > div.extra-box > div > label:nth-child(2)';
  459. await page.goto(fansDataUrl, { waitUntil: 'domcontentloaded' });
  460. await page.waitForTimeout(2000);
  461. if (page.url().includes('login')) {
  462. logger.warn(`[XHS Import] Fans data page redirected to login, skip fans trend. accountId=${account.id}`);
  463. return;
  464. }
  465. const responsePromise = page.waitForResponse(
  466. (res) => res.url().match(overallNewPattern) != null && res.request().method() === 'GET',
  467. { timeout: 30_000 }
  468. );
  469. const btn = page.locator(near30ButtonSelector).or(page.locator('.fans-data-container').getByText('近30天').first());
  470. await btn.click().catch(() => undefined);
  471. await page.waitForTimeout(1500);
  472. let res;
  473. try {
  474. res = await responsePromise;
  475. } catch {
  476. try {
  477. res = await page.waitForResponse(
  478. (r) => r.url().match(overallNewPattern) != null && r.request().method() === 'GET',
  479. { timeout: 15_000 }
  480. );
  481. } catch {
  482. logger.warn(`[XHS Import] No overall_new response captured, skip fans trend. accountId=${account.id}`);
  483. return;
  484. }
  485. }
  486. const body = await res.json().catch(() => null);
  487. if (!body || typeof body !== 'object') {
  488. logger.warn(`[XHS Import] overall_new response not valid JSON, skip. accountId=${account.id}`);
  489. return;
  490. }
  491. const list = this.parseFansOverallNewResponse(body);
  492. if (!list.length) {
  493. logger.info(`[XHS Import] No fans trend items from overall_new. accountId=${account.id}`);
  494. return;
  495. }
  496. let updated = 0;
  497. for (const { recordDate, fansCount } of list) {
  498. const r = await this.userDayStatisticsService.saveStatisticsForDate(account.id, recordDate, { fansCount });
  499. updated += r.inserted + r.updated;
  500. }
  501. logger.info(`[XHS Import] Fans trend imported. accountId=${account.id} days=${list.length} updated=${updated}`);
  502. }
  503. /**
  504. * 解析 overall_new 接口返回的 JSON,提取 (recordDate, fansCount) 列表
  505. * 接口格式:data.thirty.fans_list(或 fans_list_iterator),每项 { date: 毫秒时间戳, count: 粉丝数 }
  506. */
  507. private parseFansOverallNewResponse(body: Record<string, unknown>): Array<{ recordDate: Date; fansCount: number }> {
  508. const list: Array<{ recordDate: Date; fansCount: number }> = [];
  509. const data = body.data as Record<string, unknown> | undefined;
  510. if (!data || typeof data !== 'object') return list;
  511. const thirty = data.thirty as Record<string, unknown> | undefined;
  512. if (!thirty || typeof thirty !== 'object') return list;
  513. const arr = (thirty.fans_list as unknown[]) ?? (thirty.fans_list_iterator as unknown[]) ?? [];
  514. if (!Array.isArray(arr)) return list;
  515. for (const item of arr) {
  516. if (!item || typeof item !== 'object') continue;
  517. const o = item as Record<string, unknown>;
  518. const dateMs = o.date;
  519. const countRaw = o.count;
  520. if (dateMs == null || countRaw == null) continue;
  521. const ts = typeof dateMs === 'number' ? dateMs : Number(dateMs);
  522. if (!Number.isFinite(ts)) continue;
  523. const d = new Date(ts);
  524. d.setHours(0, 0, 0, 0);
  525. const n = typeof countRaw === 'number' ? countRaw : Number(countRaw);
  526. if (!Number.isFinite(n) || n < 0) continue;
  527. list.push({ recordDate: d, fansCount: Math.round(n) });
  528. }
  529. return list;
  530. }
  531. }