|
|
@@ -386,8 +386,16 @@ export class XiaohongshuAccountOverviewImportService {
|
|
|
}
|
|
|
|
|
|
const page = await context.newPage();
|
|
|
+
|
|
|
+ // account/base 在页面加载时自动请求,先挂监听再访问
|
|
|
+ const accountBasePattern = /\/api\/galaxy\/v2\/creator\/datacenter\/account\/base/i;
|
|
|
+ const responsePromise = page.waitForResponse(
|
|
|
+ (r) => r.url().match(accountBasePattern) != null && r.request().method() === 'GET',
|
|
|
+ { timeout: 30_000 }
|
|
|
+ );
|
|
|
+
|
|
|
await page.goto('https://creator.xiaohongshu.com/statistics/account/v2', { waitUntil: 'domcontentloaded' });
|
|
|
- await page.waitForTimeout(1500);
|
|
|
+ await page.waitForTimeout(3000); // 等几秒,让页面发起 account/base 请求
|
|
|
|
|
|
if (page.url().includes('login')) {
|
|
|
// 第一次检测到登录失效时,尝试刷新账号
|
|
|
@@ -438,77 +446,10 @@ export class XiaohongshuAccountOverviewImportService {
|
|
|
throw new Error('小红书数据看板暂无访问权限/申请中,已通知用户');
|
|
|
}
|
|
|
|
|
|
- // 统一入口:账号概览 -> 笔记数据
|
|
|
- await page.getByText('账号概览', { exact: true }).first().click().catch(() => undefined);
|
|
|
- await page.getByText('笔记数据', { exact: true }).first().click();
|
|
|
-
|
|
|
- const exportAndImport = async (tabText: '观看数据' | '互动数据' | '涨粉数据' | '发布数据', mode: ExportMode) => {
|
|
|
- await page.getByText(tabText, { exact: true }).first().click();
|
|
|
- await page.getByText(/近\d+日/).first().click().catch(() => undefined);
|
|
|
- await page.getByText('近30日', { exact: true }).click();
|
|
|
- await page.waitForTimeout(1200);
|
|
|
-
|
|
|
- const [download] = await Promise.all([
|
|
|
- page.waitForEvent('download', { timeout: 60_000 }),
|
|
|
- page.getByText('导出数据', { exact: true }).first().click(),
|
|
|
- ]);
|
|
|
-
|
|
|
- const filename = `${account.id}_${Date.now()}_${download.suggestedFilename()}`;
|
|
|
- const filePath = path.join(this.downloadDir, filename);
|
|
|
- await download.saveAs(filePath);
|
|
|
-
|
|
|
- let perDay = new Map<string, { recordDate: Date } & Record<string, any>>();
|
|
|
- let inserted = 0;
|
|
|
- let updated = 0;
|
|
|
- try {
|
|
|
- perDay = parseXhsExcel(filePath, mode);
|
|
|
- for (const v of perDay.values()) {
|
|
|
- const { recordDate, ...patch } = v;
|
|
|
-
|
|
|
- // 修正:如果导入的数据是今天的,且没有粉丝总数(Excel只有涨粉数),则使用账号当前的粉丝数
|
|
|
- // 避免因为导入导致今天的粉丝数被重置为 0
|
|
|
- const today = new Date();
|
|
|
- today.setHours(0, 0, 0, 0);
|
|
|
-
|
|
|
- // 比较时间戳
|
|
|
- if (recordDate.getTime() === today.getTime()) {
|
|
|
- if ((patch as any).fansCount === undefined && account.fansCount !== undefined && account.fansCount > 0) {
|
|
|
- (patch as any).fansCount = account.fansCount;
|
|
|
- logger.info(`[XHS Import] Injected current fansCount=${account.fansCount} for today's record (accountId=${account.id})`);
|
|
|
- }
|
|
|
- }
|
|
|
+ // 直接监听 account/base,无需点击 账号概览/笔记数据
|
|
|
+ await this.importFromAccountBaseApi(responsePromise, page, account);
|
|
|
|
|
|
- const r = await this.userDayStatisticsService.saveStatisticsForDate(account.id, recordDate, patch);
|
|
|
- inserted += r.inserted;
|
|
|
- updated += r.updated;
|
|
|
- }
|
|
|
-
|
|
|
- logger.info(
|
|
|
- `[XHS Import] ${tabText} imported. accountId=${account.id} days=${perDay.size} inserted=${inserted} updated=${updated}`
|
|
|
- );
|
|
|
- } finally {
|
|
|
- // 默认导入后删除 Excel,避免磁盘堆积;仅在显式 KEEP_XHS_XLSX=true 时保留(用于调试)
|
|
|
- if (process.env.KEEP_XHS_XLSX === 'true') {
|
|
|
- logger.warn(`[XHS Import] KEEP_XHS_XLSX=true, keep file: ${filePath}`);
|
|
|
- } else {
|
|
|
- await fs.unlink(filePath).catch(() => undefined);
|
|
|
- }
|
|
|
- }
|
|
|
- };
|
|
|
-
|
|
|
- // 1) 观看数据:播放数 + 点击率/时长/完播率
|
|
|
- await exportAndImport('观看数据', 'watch');
|
|
|
-
|
|
|
- // 2) 互动数据:点赞/评论/收藏/分享
|
|
|
- await exportAndImport('互动数据', 'interaction');
|
|
|
-
|
|
|
- // 3) 涨粉数据:只取“净涨粉趋势”(解析器已过滤)
|
|
|
- await exportAndImport('涨粉数据', 'fans');
|
|
|
-
|
|
|
- // 4) 发布数据:近30日导出,解析「总发布趋势」→ user_day_statistics.works_count
|
|
|
- await exportAndImport('发布数据', 'publish');
|
|
|
-
|
|
|
- // 5) 粉丝数据页:打开粉丝数据、点击近30天,解析 overall_new 接口,将每日粉丝总数写入 user_day_statistics.fans_count
|
|
|
+ // 粉丝数据页:打开粉丝数据、点击近30天,解析 overall_new 接口,将每日粉丝总数写入 user_day_statistics.fans_count
|
|
|
await this.importFansDataTrendFromPage(context, page, account);
|
|
|
|
|
|
logger.info(`[XHS Import] Account all tabs done. accountId=${account.id}`);
|
|
|
@@ -522,6 +463,142 @@ export class XiaohongshuAccountOverviewImportService {
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
+ * 等待 account/base 接口响应,解析 data.thirty 各 *_list 按 date 合并为按日数据并写入 user_day_statistics
|
|
|
+ * 字段映射:view_list→playCount, impl_count_list→exposureCount, comment_list→commentCount,
|
|
|
+ * like_list→likeCount, share_list→shareCount, collect_list→collectCount,
|
|
|
+ * net_rise_fans_count_list→fansIncrease, cover_click_rate_list→coverClickRate(格式化为"14%"),
|
|
|
+ * avg_view_time_list→avgWatchDuration("12秒"), view_time_list→totalWatchDuration("1866秒"),
|
|
|
+ * video_full_view_rate_list→completionRate("15%"), publish_note_num_list→worksCount
|
|
|
+ */
|
|
|
+ private async importFromAccountBaseApi(
|
|
|
+ responsePromise: Promise<import('playwright').Response>,
|
|
|
+ _page: Page,
|
|
|
+ account: PlatformAccount
|
|
|
+ ): Promise<void> {
|
|
|
+ let res: import('playwright').Response;
|
|
|
+ try {
|
|
|
+ res = await responsePromise;
|
|
|
+ } catch {
|
|
|
+ logger.warn(`[XHS Import] account/base response not captured, skip. accountId=${account.id}`);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ const body = await res.json().catch(() => null);
|
|
|
+ if (!body || typeof body !== 'object') {
|
|
|
+ logger.warn(`[XHS Import] account/base not valid JSON. accountId=${account.id}`);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ const data = (body as Record<string, unknown>).data as Record<string, unknown> | undefined;
|
|
|
+ const thirty = data?.thirty as Record<string, unknown> | undefined;
|
|
|
+ if (!thirty || typeof thirty !== 'object') {
|
|
|
+ logger.warn(`[XHS Import] account/base data.thirty missing. accountId=${account.id}`);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ const perDay = this.parseAccountBaseThirty(thirty);
|
|
|
+ if (perDay.size === 0) {
|
|
|
+ logger.info(`[XHS Import] account/base no days parsed. accountId=${account.id}`);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ let inserted = 0;
|
|
|
+ let updated = 0;
|
|
|
+ const today = new Date();
|
|
|
+ today.setHours(0, 0, 0, 0);
|
|
|
+
|
|
|
+ for (const v of perDay.values()) {
|
|
|
+ const { recordDate, ...patch } = v;
|
|
|
+ if (recordDate.getTime() === today.getTime() && patch.fansCount === undefined && account.fansCount != null && account.fansCount > 0) {
|
|
|
+ (patch as Record<string, unknown>).fansCount = account.fansCount;
|
|
|
+ }
|
|
|
+ const r = await this.userDayStatisticsService.saveStatisticsForDate(account.id, recordDate, patch);
|
|
|
+ inserted += r.inserted;
|
|
|
+ updated += r.updated;
|
|
|
+ }
|
|
|
+
|
|
|
+ logger.info(
|
|
|
+ `[XHS Import] account/base imported. accountId=${account.id} days=${perDay.size} inserted=${inserted} updated=${updated}`
|
|
|
+ );
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 解析 data.thirty:各 *_list 每项 { date: 毫秒, count[, count_with_double] },按 date 合并为按日一条
|
|
|
+ * 注意:接口返回的 date 是「中国时区(Asia/Shanghai)该日 0 点」的 UTC 时间戳,需按中国时区解析日期
|
|
|
+ */
|
|
|
+ private parseAccountBaseThirty(thirty: Record<string, unknown>): Map<string, { recordDate: Date } & Record<string, unknown>> {
|
|
|
+ const map = new Map<string, { recordDate: Date } & Record<string, unknown>>();
|
|
|
+
|
|
|
+ // 使用 Intl.DateTimeFormat 获取中国时区的年月日
|
|
|
+ const cstFormatter = new Intl.DateTimeFormat('en-CA', {
|
|
|
+ timeZone: 'Asia/Shanghai',
|
|
|
+ year: 'numeric',
|
|
|
+ month: '2-digit',
|
|
|
+ day: '2-digit',
|
|
|
+ });
|
|
|
+
|
|
|
+ const toKey = (ms: number): string => {
|
|
|
+ // 将 UTC 时间戳转成中国时区的日期字符串 YYYY-MM-DD
|
|
|
+ return cstFormatter.format(new Date(ms));
|
|
|
+ };
|
|
|
+
|
|
|
+ const toRecordDate = (ms: number): Date => {
|
|
|
+ // 获取中国时区的年月日
|
|
|
+ const parts = cstFormatter.formatToParts(new Date(ms));
|
|
|
+ const get = (type: string) => parts.find((p) => p.type === type)?.value ?? '0';
|
|
|
+ const y = parseInt(get('year'), 10);
|
|
|
+ const m = parseInt(get('month'), 10) - 1; // month 是 1-12,Date 构造函数需要 0-11
|
|
|
+ const d = parseInt(get('day'), 10);
|
|
|
+ // 构造本地时区的该日 0 点(如果服务器在中国时区,就是中国时区的 0 点)
|
|
|
+ return new Date(y, m, d, 0, 0, 0, 0);
|
|
|
+ };
|
|
|
+
|
|
|
+ const setFromList = (
|
|
|
+ listKey: string,
|
|
|
+ field: string,
|
|
|
+ formatter?: (n: number) => string | number
|
|
|
+ ) => {
|
|
|
+ const arr = thirty[listKey];
|
|
|
+ if (!Array.isArray(arr)) return;
|
|
|
+ for (const item of arr) {
|
|
|
+ if (!item || typeof item !== 'object') continue;
|
|
|
+ const o = item as Record<string, unknown>;
|
|
|
+ const dateMs = o.date;
|
|
|
+ const countRaw = o.count;
|
|
|
+ if (dateMs == null || countRaw == null) continue;
|
|
|
+ const ts = typeof dateMs === 'number' ? dateMs : Number(dateMs);
|
|
|
+ if (!Number.isFinite(ts)) continue;
|
|
|
+ const key = toKey(ts);
|
|
|
+ if (!map.has(key)) {
|
|
|
+ map.set(key, { recordDate: toRecordDate(ts) });
|
|
|
+ } else {
|
|
|
+ (map.get(key)!.recordDate as Date) = toRecordDate(ts);
|
|
|
+ }
|
|
|
+ const rec = map.get(key)!;
|
|
|
+ const n = typeof countRaw === 'number' ? countRaw : Number(countRaw);
|
|
|
+ if (!Number.isFinite(n)) continue;
|
|
|
+ const val = formatter ? formatter(n) : n;
|
|
|
+ (rec as Record<string, unknown>)[field] = val;
|
|
|
+ }
|
|
|
+ };
|
|
|
+
|
|
|
+ setFromList('view_list', 'playCount');
|
|
|
+ setFromList('impl_count_list', 'exposureCount');
|
|
|
+ setFromList('comment_list', 'commentCount');
|
|
|
+ setFromList('like_list', 'likeCount');
|
|
|
+ setFromList('share_list', 'shareCount');
|
|
|
+ setFromList('collect_list', 'collectCount');
|
|
|
+ setFromList('net_rise_fans_count_list', 'fansIncrease');
|
|
|
+ setFromList('cover_click_rate_list', 'coverClickRate', (n) => `${Math.round(n)}%`);
|
|
|
+ setFromList('avg_view_time_list', 'avgWatchDuration', (n) => `${Math.round(n)}秒`);
|
|
|
+ setFromList('view_time_list', 'totalWatchDuration', (n) => `${Math.round(n)}秒`);
|
|
|
+ setFromList('video_full_view_rate_list', 'completionRate', (n) => `${typeof n === 'number' ? Math.round(n) : n}%`);
|
|
|
+ setFromList('publish_note_num_list', 'worksCount');
|
|
|
+
|
|
|
+ return map;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
* 粉丝数据页:打开粉丝数据、点击「粉丝数据概览」近30天,监听 overall_new 接口响应,解析每日粉丝总数并写入 user_day_statistics.fans_count
|
|
|
*/
|
|
|
private async importFansDataTrendFromPage(
|