Ethanfly 16 時間 前
コミット
27acfdb3a7

+ 27 - 0
docs/xiaohongshu-account-base-api-field-mapping.md

@@ -0,0 +1,27 @@
+# 小红书账号概览接口 data.thirty 字段映射
+
+接口:`GET https://creator.xiaohongshu.com/api/galaxy/v2/creator/datacenter/account/base`  
+使用返回体中的 **`data.thirty`**(近 30 天)各 `*_list`,按 `date`(毫秒时间戳)合并为「按日一条」后写入 `user_day_statistics`。
+
+**流程**:访问 `https://creator.xiaohongshu.com/statistics/account/v2` 后等待几秒,页面加载时会自动请求 account/base,监听该响应即可,无需点击 账号概览/笔记数据。
+
+## 映射表(供核对)
+
+| user_day_statistics 字段 | data.thirty 来源 | 说明 |
+|--------------------------|------------------|------|
+| record_date | 各 list 项的 `date`(毫秒 → 当天 0 点) | 按日期合并多条 list |
+| play_count | view_list[].count | 播放(阅读)量 |
+| exposure_count | impl_count_list[].count | 曝光量 |
+| comment_count | comment_list[].count | 评论量 |
+| like_count | like_list[].count | 点赞量 |
+| share_count | share_list[].count | 分享量 |
+| collect_count | collect_list[].count | 收藏量 |
+| fans_increase | net_rise_fans_count_list[].count | 净涨粉(可为负) |
+| cover_click_rate | cover_click_rate_list[].count | 封面点击率,格式化为 "14%" 等字符串 |
+| avg_watch_duration | avg_view_time_list[].count | 平均观看时长(秒),格式化为 "12秒" 等 |
+| total_watch_duration | view_time_list[].count | 观看总时长(秒),格式化为 "1866秒" 等 |
+| completion_rate | video_full_view_rate_list[].count | 完播率(%),格式化为 "15%" 等 |
+| works_count | publish_note_num_list[].count | 发布笔记数 |
+| fans_count | **不来自本接口** | 仍由「粉丝数据」页 overall_new 接口写入 |
+
+说明:`date` 为毫秒时间戳,按 UTC 取年月日转成「当天 0 点」作为 `record_date`;多条 list 按同一 `date` 合并成一条日维度记录后写入。

+ 148 - 71
server/src/services/XiaohongshuAccountOverviewImportService.ts

@@ -386,8 +386,16 @@ export class XiaohongshuAccountOverviewImportService {
       }
 
       const page = await context.newPage();
+
+      // account/base 在页面加载时自动请求,先挂监听再访问
+      const accountBasePattern = /\/api\/galaxy\/v2\/creator\/datacenter\/account\/base/i;
+      const responsePromise = page.waitForResponse(
+        (r) => r.url().match(accountBasePattern) != null && r.request().method() === 'GET',
+        { timeout: 30_000 }
+      );
+
       await page.goto('https://creator.xiaohongshu.com/statistics/account/v2', { waitUntil: 'domcontentloaded' });
-      await page.waitForTimeout(1500);
+      await page.waitForTimeout(3000); // 等几秒,让页面发起 account/base 请求
 
       if (page.url().includes('login')) {
         // 第一次检测到登录失效时,尝试刷新账号
@@ -438,77 +446,10 @@ export class XiaohongshuAccountOverviewImportService {
         throw new Error('小红书数据看板暂无访问权限/申请中,已通知用户');
       }
 
-      // 统一入口:账号概览 -> 笔记数据
-      await page.getByText('账号概览', { exact: true }).first().click().catch(() => undefined);
-      await page.getByText('笔记数据', { exact: true }).first().click();
-
-      const exportAndImport = async (tabText: '观看数据' | '互动数据' | '涨粉数据' | '发布数据', mode: ExportMode) => {
-        await page.getByText(tabText, { exact: true }).first().click();
-        await page.getByText(/近\d+日/).first().click().catch(() => undefined);
-        await page.getByText('近30日', { exact: true }).click();
-        await page.waitForTimeout(1200);
-
-        const [download] = await Promise.all([
-          page.waitForEvent('download', { timeout: 60_000 }),
-          page.getByText('导出数据', { exact: true }).first().click(),
-        ]);
-
-        const filename = `${account.id}_${Date.now()}_${download.suggestedFilename()}`;
-        const filePath = path.join(this.downloadDir, filename);
-        await download.saveAs(filePath);
-
-        let perDay = new Map<string, { recordDate: Date } & Record<string, any>>();
-        let inserted = 0;
-        let updated = 0;
-        try {
-          perDay = parseXhsExcel(filePath, mode);
-          for (const v of perDay.values()) {
-            const { recordDate, ...patch } = v;
-
-            // 修正:如果导入的数据是今天的,且没有粉丝总数(Excel只有涨粉数),则使用账号当前的粉丝数
-            // 避免因为导入导致今天的粉丝数被重置为 0
-            const today = new Date();
-            today.setHours(0, 0, 0, 0);
-
-            // 比较时间戳
-            if (recordDate.getTime() === today.getTime()) {
-              if ((patch as any).fansCount === undefined && account.fansCount !== undefined && account.fansCount > 0) {
-                (patch as any).fansCount = account.fansCount;
-                logger.info(`[XHS Import] Injected current fansCount=${account.fansCount} for today's record (accountId=${account.id})`);
-              }
-            }
+      // 直接监听 account/base,无需点击 账号概览/笔记数据
+      await this.importFromAccountBaseApi(responsePromise, page, account);
 
-            const r = await this.userDayStatisticsService.saveStatisticsForDate(account.id, recordDate, patch);
-            inserted += r.inserted;
-            updated += r.updated;
-          }
-
-          logger.info(
-            `[XHS Import] ${tabText} imported. accountId=${account.id} days=${perDay.size} inserted=${inserted} updated=${updated}`
-          );
-        } finally {
-          // 默认导入后删除 Excel,避免磁盘堆积;仅在显式 KEEP_XHS_XLSX=true 时保留(用于调试)
-          if (process.env.KEEP_XHS_XLSX === 'true') {
-            logger.warn(`[XHS Import] KEEP_XHS_XLSX=true, keep file: ${filePath}`);
-          } else {
-            await fs.unlink(filePath).catch(() => undefined);
-          }
-        }
-      };
-
-      // 1) 观看数据:播放数 + 点击率/时长/完播率
-      await exportAndImport('观看数据', 'watch');
-
-      // 2) 互动数据:点赞/评论/收藏/分享
-      await exportAndImport('互动数据', 'interaction');
-
-      // 3) 涨粉数据:只取“净涨粉趋势”(解析器已过滤)
-      await exportAndImport('涨粉数据', 'fans');
-
-      // 4) 发布数据:近30日导出,解析「总发布趋势」→ user_day_statistics.works_count
-      await exportAndImport('发布数据', 'publish');
-
-      // 5) 粉丝数据页:打开粉丝数据、点击近30天,解析 overall_new 接口,将每日粉丝总数写入 user_day_statistics.fans_count
+      // 粉丝数据页:打开粉丝数据、点击近30天,解析 overall_new 接口,将每日粉丝总数写入 user_day_statistics.fans_count
       await this.importFansDataTrendFromPage(context, page, account);
 
       logger.info(`[XHS Import] Account all tabs done. accountId=${account.id}`);
@@ -522,6 +463,142 @@ export class XiaohongshuAccountOverviewImportService {
   }
 
   /**
+   * 等待 account/base 接口响应,解析 data.thirty 各 *_list 按 date 合并为按日数据并写入 user_day_statistics
+   * 字段映射:view_list→playCount, impl_count_list→exposureCount, comment_list→commentCount,
+   * like_list→likeCount, share_list→shareCount, collect_list→collectCount,
+   * net_rise_fans_count_list→fansIncrease, cover_click_rate_list→coverClickRate(格式化为"14%"),
+   * avg_view_time_list→avgWatchDuration("12秒"), view_time_list→totalWatchDuration("1866秒"),
+   * video_full_view_rate_list→completionRate("15%"), publish_note_num_list→worksCount
+   */
+  private async importFromAccountBaseApi(
+    responsePromise: Promise<import('playwright').Response>,
+    _page: Page,
+    account: PlatformAccount
+  ): Promise<void> {
+    let res: import('playwright').Response;
+    try {
+      res = await responsePromise;
+    } catch {
+      logger.warn(`[XHS Import] account/base response not captured, skip. accountId=${account.id}`);
+      return;
+    }
+
+    const body = await res.json().catch(() => null);
+    if (!body || typeof body !== 'object') {
+      logger.warn(`[XHS Import] account/base not valid JSON. accountId=${account.id}`);
+      return;
+    }
+
+    const data = (body as Record<string, unknown>).data as Record<string, unknown> | undefined;
+    const thirty = data?.thirty as Record<string, unknown> | undefined;
+    if (!thirty || typeof thirty !== 'object') {
+      logger.warn(`[XHS Import] account/base data.thirty missing. accountId=${account.id}`);
+      return;
+    }
+
+    const perDay = this.parseAccountBaseThirty(thirty);
+    if (perDay.size === 0) {
+      logger.info(`[XHS Import] account/base no days parsed. accountId=${account.id}`);
+      return;
+    }
+
+    let inserted = 0;
+    let updated = 0;
+    const today = new Date();
+    today.setHours(0, 0, 0, 0);
+
+    for (const v of perDay.values()) {
+      const { recordDate, ...patch } = v;
+      if (recordDate.getTime() === today.getTime() && patch.fansCount === undefined && account.fansCount != null && account.fansCount > 0) {
+        (patch as Record<string, unknown>).fansCount = account.fansCount;
+      }
+      const r = await this.userDayStatisticsService.saveStatisticsForDate(account.id, recordDate, patch);
+      inserted += r.inserted;
+      updated += r.updated;
+    }
+
+    logger.info(
+      `[XHS Import] account/base imported. accountId=${account.id} days=${perDay.size} inserted=${inserted} updated=${updated}`
+    );
+  }
+
+  /**
+   * 解析 data.thirty:各 *_list 每项 { date: 毫秒, count[, count_with_double] },按 date 合并为按日一条
+   * 注意:接口返回的 date 是「中国时区(Asia/Shanghai)该日 0 点」的 UTC 时间戳,需按中国时区解析日期
+   */
+  private parseAccountBaseThirty(thirty: Record<string, unknown>): Map<string, { recordDate: Date } & Record<string, unknown>> {
+    const map = new Map<string, { recordDate: Date } & Record<string, unknown>>();
+
+    // 使用 Intl.DateTimeFormat 获取中国时区的年月日
+    const cstFormatter = new Intl.DateTimeFormat('en-CA', {
+      timeZone: 'Asia/Shanghai',
+      year: 'numeric',
+      month: '2-digit',
+      day: '2-digit',
+    });
+
+    const toKey = (ms: number): string => {
+      // 将 UTC 时间戳转成中国时区的日期字符串 YYYY-MM-DD
+      return cstFormatter.format(new Date(ms));
+    };
+
+    const toRecordDate = (ms: number): Date => {
+      // 获取中国时区的年月日
+      const parts = cstFormatter.formatToParts(new Date(ms));
+      const get = (type: string) => parts.find((p) => p.type === type)?.value ?? '0';
+      const y = parseInt(get('year'), 10);
+      const m = parseInt(get('month'), 10) - 1; // month 是 1-12,Date 构造函数需要 0-11
+      const d = parseInt(get('day'), 10);
+      // 构造本地时区的该日 0 点(如果服务器在中国时区,就是中国时区的 0 点)
+      return new Date(y, m, d, 0, 0, 0, 0);
+    };
+
+    const setFromList = (
+      listKey: string,
+      field: string,
+      formatter?: (n: number) => string | number
+    ) => {
+      const arr = thirty[listKey];
+      if (!Array.isArray(arr)) return;
+      for (const item of arr) {
+        if (!item || typeof item !== 'object') continue;
+        const o = item as Record<string, unknown>;
+        const dateMs = o.date;
+        const countRaw = o.count;
+        if (dateMs == null || countRaw == null) continue;
+        const ts = typeof dateMs === 'number' ? dateMs : Number(dateMs);
+        if (!Number.isFinite(ts)) continue;
+        const key = toKey(ts);
+        if (!map.has(key)) {
+          map.set(key, { recordDate: toRecordDate(ts) });
+        } else {
+          (map.get(key)!.recordDate as Date) = toRecordDate(ts);
+        }
+        const rec = map.get(key)!;
+        const n = typeof countRaw === 'number' ? countRaw : Number(countRaw);
+        if (!Number.isFinite(n)) continue;
+        const val = formatter ? formatter(n) : n;
+        (rec as Record<string, unknown>)[field] = val;
+      }
+    };
+
+    setFromList('view_list', 'playCount');
+    setFromList('impl_count_list', 'exposureCount');
+    setFromList('comment_list', 'commentCount');
+    setFromList('like_list', 'likeCount');
+    setFromList('share_list', 'shareCount');
+    setFromList('collect_list', 'collectCount');
+    setFromList('net_rise_fans_count_list', 'fansIncrease');
+    setFromList('cover_click_rate_list', 'coverClickRate', (n) => `${Math.round(n)}%`);
+    setFromList('avg_view_time_list', 'avgWatchDuration', (n) => `${Math.round(n)}秒`);
+    setFromList('view_time_list', 'totalWatchDuration', (n) => `${Math.round(n)}秒`);
+    setFromList('video_full_view_rate_list', 'completionRate', (n) => `${typeof n === 'number' ? Math.round(n) : n}%`);
+    setFromList('publish_note_num_list', 'worksCount');
+
+    return map;
+  }
+
+  /**
    * 粉丝数据页:打开粉丝数据、点击「粉丝数据概览」近30天,监听 overall_new 接口响应,解析每日粉丝总数并写入 user_day_statistics.fans_count
    */
   private async importFansDataTrendFromPage(