|
@@ -28,11 +28,18 @@ type PlaywrightCookie = {
|
|
|
|
|
|
|
|
type MetricKind =
|
|
type MetricKind =
|
|
|
| 'playCount'
|
|
| 'playCount'
|
|
|
|
|
+ | 'likeCount'
|
|
|
|
|
+ | 'commentCount'
|
|
|
|
|
+ | 'shareCount'
|
|
|
|
|
+ | 'collectCount'
|
|
|
|
|
+ | 'fansIncrease'
|
|
|
| 'coverClickRate'
|
|
| 'coverClickRate'
|
|
|
| 'avgWatchDuration'
|
|
| 'avgWatchDuration'
|
|
|
| 'totalWatchDuration'
|
|
| 'totalWatchDuration'
|
|
|
| 'completionRate';
|
|
| 'completionRate';
|
|
|
|
|
|
|
|
|
|
+type ExportMode = 'watch' | 'interaction' | 'fans';
|
|
|
|
|
+
|
|
|
function ensureDir(p: string) {
|
|
function ensureDir(p: string) {
|
|
|
return fs.mkdir(p, { recursive: true });
|
|
return fs.mkdir(p, { recursive: true });
|
|
|
}
|
|
}
|
|
@@ -87,12 +94,21 @@ function parseChineseNumberLike(input: unknown): number | null {
|
|
|
|
|
|
|
|
function detectMetricKind(sheetName: string): MetricKind | null {
|
|
function detectMetricKind(sheetName: string): MetricKind | null {
|
|
|
const n = sheetName.trim();
|
|
const n = sheetName.trim();
|
|
|
- // 小红书导出的子表命名可能是「观看趋势」或「观看数趋势」
|
|
|
|
|
|
|
+ // 观看数据:子表命名可能是「观看趋势」或「观看数趋势」
|
|
|
if (n.includes('观看趋势') || n.includes('观看数')) return 'playCount';
|
|
if (n.includes('观看趋势') || n.includes('观看数')) return 'playCount';
|
|
|
if (n.includes('封面点击率')) return 'coverClickRate';
|
|
if (n.includes('封面点击率')) return 'coverClickRate';
|
|
|
if (n.includes('平均观看时长')) return 'avgWatchDuration';
|
|
if (n.includes('平均观看时长')) return 'avgWatchDuration';
|
|
|
if (n.includes('观看总时长')) return 'totalWatchDuration';
|
|
if (n.includes('观看总时长')) return 'totalWatchDuration';
|
|
|
if (n.includes('完播率')) return 'completionRate';
|
|
if (n.includes('完播率')) return 'completionRate';
|
|
|
|
|
+
|
|
|
|
|
+ // 互动数据
|
|
|
|
|
+ if (n.includes('点赞') && n.includes('趋势')) return 'likeCount';
|
|
|
|
|
+ if (n.includes('评论') && n.includes('趋势')) return 'commentCount';
|
|
|
|
|
+ if (n.includes('分享') && n.includes('趋势')) return 'shareCount';
|
|
|
|
|
+ if (n.includes('收藏') && n.includes('趋势')) return 'collectCount';
|
|
|
|
|
+
|
|
|
|
|
+ // 涨粉数据(只取净涨粉趋势)
|
|
|
|
|
+ if (n.includes('净涨粉') && n.includes('趋势')) return 'fansIncrease';
|
|
|
return null;
|
|
return null;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
@@ -172,15 +188,31 @@ async function createBrowserForAccount(proxy: ProxyConfig | null): Promise<{ bro
|
|
|
return { browser, shouldClose: false };
|
|
return { browser, shouldClose: false };
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-function parseXhsExcel(filePath: string): Map<string, { recordDate: Date } & Record<string, any>> {
|
|
|
|
|
|
|
+function parseXhsExcel(
|
|
|
|
|
+ filePath: string,
|
|
|
|
|
+ mode: ExportMode
|
|
|
|
|
+): Map<string, { recordDate: Date } & Record<string, any>> {
|
|
|
const wb = XLSX.readFile(filePath);
|
|
const wb = XLSX.readFile(filePath);
|
|
|
const result = new Map<string, { recordDate: Date } & Record<string, any>>();
|
|
const result = new Map<string, { recordDate: Date } & Record<string, any>>();
|
|
|
|
|
|
|
|
- logger.info(`[XHS Import] Excel loaded. file=${path.basename(filePath)} sheets=${wb.SheetNames.join(' | ')}`);
|
|
|
|
|
|
|
+ logger.info(
|
|
|
|
|
+ `[XHS Import] Excel loaded. mode=${mode} file=${path.basename(filePath)} sheets=${wb.SheetNames.join(' | ')}`
|
|
|
|
|
+ );
|
|
|
|
|
|
|
|
for (const sheetName of wb.SheetNames) {
|
|
for (const sheetName of wb.SheetNames) {
|
|
|
const kind = detectMetricKind(sheetName);
|
|
const kind = detectMetricKind(sheetName);
|
|
|
if (!kind) continue;
|
|
if (!kind) continue;
|
|
|
|
|
+
|
|
|
|
|
+ // 按导出类型过滤不相关子表,避免误写字段
|
|
|
|
|
+ if (
|
|
|
|
|
+ (mode === 'watch' &&
|
|
|
|
|
+ !['playCount', 'coverClickRate', 'avgWatchDuration', 'totalWatchDuration', 'completionRate'].includes(kind)) ||
|
|
|
|
|
+ (mode === 'interaction' && !['likeCount', 'commentCount', 'shareCount', 'collectCount'].includes(kind)) ||
|
|
|
|
|
+ (mode === 'fans' && kind !== 'fansIncrease')
|
|
|
|
|
+ ) {
|
|
|
|
|
+ continue;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
const sheet = wb.Sheets[sheetName];
|
|
const sheet = wb.Sheets[sheetName];
|
|
|
const rows = XLSX.utils.sheet_to_json<Record<string, any>>(sheet, { defval: '' });
|
|
const rows = XLSX.utils.sheet_to_json<Record<string, any>>(sheet, { defval: '' });
|
|
|
|
|
|
|
@@ -200,9 +232,16 @@ function parseXhsExcel(filePath: string): Map<string, { recordDate: Date } & Rec
|
|
|
if (!result.has(key)) result.set(key, { recordDate: d });
|
|
if (!result.has(key)) result.set(key, { recordDate: d });
|
|
|
const obj = result.get(key)!;
|
|
const obj = result.get(key)!;
|
|
|
|
|
|
|
|
- if (kind === 'playCount') {
|
|
|
|
|
|
|
+ if (kind === 'playCount' || kind === 'likeCount' || kind === 'commentCount' || kind === 'shareCount' || kind === 'collectCount' || kind === 'fansIncrease') {
|
|
|
const n = parseChineseNumberLike(valueVal);
|
|
const n = parseChineseNumberLike(valueVal);
|
|
|
- if (typeof n === 'number') obj.playCount = n;
|
|
|
|
|
|
|
+ if (typeof n === 'number') {
|
|
|
|
|
+ if (kind === 'playCount') obj.playCount = n;
|
|
|
|
|
+ if (kind === 'likeCount') obj.likeCount = n;
|
|
|
|
|
+ if (kind === 'commentCount') obj.commentCount = n;
|
|
|
|
|
+ if (kind === 'shareCount') obj.shareCount = n;
|
|
|
|
|
+ if (kind === 'collectCount') obj.collectCount = n;
|
|
|
|
|
+ if (kind === 'fansIncrease') obj.fansIncrease = n; // 允许负数
|
|
|
|
|
+ }
|
|
|
} else {
|
|
} else {
|
|
|
const s = String(valueVal ?? '').trim();
|
|
const s = String(valueVal ?? '').trim();
|
|
|
if (kind === 'coverClickRate') obj.coverClickRate = s || '0';
|
|
if (kind === 'coverClickRate') obj.coverClickRate = s || '0';
|
|
@@ -347,62 +386,56 @@ export class XiaohongshuAccountOverviewImportService {
|
|
|
throw new Error('小红书数据看板暂无访问权限/申请中,已标记 expired 并通知用户');
|
|
throw new Error('小红书数据看板暂无访问权限/申请中,已标记 expired 并通知用户');
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- // 尽量按用户描述进入:数据看板 -> 账号概览 -> 笔记数据 -> 观看数据 -> 近30日
|
|
|
|
|
- // 页面结构可能会变,这里用“文本定位 + 容错”策略
|
|
|
|
|
|
|
+ // 统一入口:账号概览 -> 笔记数据
|
|
|
await page.getByText('账号概览', { exact: true }).first().click().catch(() => undefined);
|
|
await page.getByText('账号概览', { exact: true }).first().click().catch(() => undefined);
|
|
|
await page.getByText('笔记数据', { exact: true }).first().click();
|
|
await page.getByText('笔记数据', { exact: true }).first().click();
|
|
|
- await page.getByText('观看数据', { exact: true }).first().click();
|
|
|
|
|
-
|
|
|
|
|
- // 选择近30日:先点开时间范围,再点“近30日”
|
|
|
|
|
- await page.getByText(/近\d+日/).first().click().catch(() => undefined);
|
|
|
|
|
- await page.getByText('近30日', { exact: true }).click();
|
|
|
|
|
-
|
|
|
|
|
- // 等待数据刷新完成(避免导出到全 0)
|
|
|
|
|
- // 以页面上“观看数”卡片出现非 0 数字作为信号(页面文本会包含类似 8,077 / 4.8万)
|
|
|
|
|
- await page
|
|
|
|
|
- .waitForFunction(() => {
|
|
|
|
|
- const t = document.body?.innerText || '';
|
|
|
|
|
- if (!t.includes('观看数')) return false;
|
|
|
|
|
- // 匹配“观看数”后出现非 0 的数值(允许逗号/万/亿)
|
|
|
|
|
- return /观看数[\s\S]{0,50}([1-9]\d{0,2}(,\d{3})+|[1-9]\d*|[1-9]\d*(\.\d+)?\s*[万亿])/.test(t);
|
|
|
|
|
- }, { timeout: 30_000 })
|
|
|
|
|
- .catch(() => {
|
|
|
|
|
- logger.warn('[XHS Import] Wait for non-zero watch count timed out. Continue export anyway.');
|
|
|
|
|
- });
|
|
|
|
|
-
|
|
|
|
|
- // 导出数据
|
|
|
|
|
- const [download] = await Promise.all([
|
|
|
|
|
- page.waitForEvent('download', { timeout: 60_000 }),
|
|
|
|
|
- page.getByText('导出数据', { exact: true }).first().click(),
|
|
|
|
|
- ]);
|
|
|
|
|
-
|
|
|
|
|
- const filename = `${account.id}_${Date.now()}_${download.suggestedFilename()}`;
|
|
|
|
|
- const filePath = path.join(this.downloadDir, filename);
|
|
|
|
|
- await download.saveAs(filePath);
|
|
|
|
|
-
|
|
|
|
|
- // 解析并入库
|
|
|
|
|
- const perDay = parseXhsExcel(filePath);
|
|
|
|
|
- let inserted = 0;
|
|
|
|
|
- let updated = 0;
|
|
|
|
|
-
|
|
|
|
|
- // 每天一条:accountId + date
|
|
|
|
|
- for (const v of perDay.values()) {
|
|
|
|
|
- const { recordDate, ...patch } = v;
|
|
|
|
|
- const r = await this.userDayStatisticsService.saveStatisticsForDate(account.id, recordDate, patch);
|
|
|
|
|
- inserted += r.inserted;
|
|
|
|
|
- updated += r.updated;
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- // 删除 Excel(默认删除;设置 KEEP_XHS_XLSX=1 可保留用于排查)
|
|
|
|
|
- if (process.env.KEEP_XHS_XLSX === '1') {
|
|
|
|
|
- logger.warn(`[XHS Import] KEEP_XHS_XLSX=1, keep file: ${filePath}`);
|
|
|
|
|
- } else {
|
|
|
|
|
- await fs.unlink(filePath).catch(() => undefined);
|
|
|
|
|
- }
|
|
|
|
|
|
|
|
|
|
- logger.info(
|
|
|
|
|
- `[XHS Import] Account done. accountId=${account.id} days=${perDay.size} inserted=${inserted} updated=${updated}`
|
|
|
|
|
- );
|
|
|
|
|
|
|
+ const exportAndImport = async (tabText: '观看数据' | '互动数据' | '涨粉数据', mode: ExportMode) => {
|
|
|
|
|
+ await page.getByText(tabText, { exact: true }).first().click();
|
|
|
|
|
+ await page.getByText(/近\d+日/).first().click().catch(() => undefined);
|
|
|
|
|
+ await page.getByText('近30日', { exact: true }).click();
|
|
|
|
|
+ await page.waitForTimeout(1200);
|
|
|
|
|
+
|
|
|
|
|
+ const [download] = await Promise.all([
|
|
|
|
|
+ page.waitForEvent('download', { timeout: 60_000 }),
|
|
|
|
|
+ page.getByText('导出数据', { exact: true }).first().click(),
|
|
|
|
|
+ ]);
|
|
|
|
|
+
|
|
|
|
|
+ const filename = `${account.id}_${Date.now()}_${download.suggestedFilename()}`;
|
|
|
|
|
+ const filePath = path.join(this.downloadDir, filename);
|
|
|
|
|
+ await download.saveAs(filePath);
|
|
|
|
|
+
|
|
|
|
|
+ const perDay = parseXhsExcel(filePath, mode);
|
|
|
|
|
+ let inserted = 0;
|
|
|
|
|
+ let updated = 0;
|
|
|
|
|
+ for (const v of perDay.values()) {
|
|
|
|
|
+ const { recordDate, ...patch } = v;
|
|
|
|
|
+ const r = await this.userDayStatisticsService.saveStatisticsForDate(account.id, recordDate, patch);
|
|
|
|
|
+ inserted += r.inserted;
|
|
|
|
|
+ updated += r.updated;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if (process.env.KEEP_XHS_XLSX === '1') {
|
|
|
|
|
+ logger.warn(`[XHS Import] KEEP_XHS_XLSX=1, keep file: ${filePath}`);
|
|
|
|
|
+ } else {
|
|
|
|
|
+ await fs.unlink(filePath).catch(() => undefined);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ logger.info(
|
|
|
|
|
+ `[XHS Import] ${tabText} imported. accountId=${account.id} days=${perDay.size} inserted=${inserted} updated=${updated}`
|
|
|
|
|
+ );
|
|
|
|
|
+ };
|
|
|
|
|
+
|
|
|
|
|
+ // 1) 观看数据:播放数 + 点击率/时长/完播率
|
|
|
|
|
+ await exportAndImport('观看数据', 'watch');
|
|
|
|
|
+
|
|
|
|
|
+ // 2) 互动数据:点赞/评论/收藏/分享
|
|
|
|
|
+ await exportAndImport('互动数据', 'interaction');
|
|
|
|
|
+
|
|
|
|
|
+ // 3) 涨粉数据:只取“净涨粉趋势”(解析器已过滤)
|
|
|
|
|
+ await exportAndImport('涨粉数据', 'fans');
|
|
|
|
|
+
|
|
|
|
|
+ logger.info(`[XHS Import] Account all tabs done. accountId=${account.id}`);
|
|
|
|
|
|
|
|
await context.close();
|
|
await context.close();
|
|
|
} finally {
|
|
} finally {
|