|
@@ -3,7 +3,6 @@ import { AppDataSource, PlatformAccount, Work } from '../models/index.js';
|
|
|
import { logger } from '../utils/logger.js';
|
|
import { logger } from '../utils/logger.js';
|
|
|
import { WorkDayStatisticsService } from './WorkDayStatisticsService.js';
|
|
import { WorkDayStatisticsService } from './WorkDayStatisticsService.js';
|
|
|
import { AccountService } from './AccountService.js';
|
|
import { AccountService } from './AccountService.js';
|
|
|
-import { getPythonServiceBaseUrl } from './PythonServiceConfigService.js';
|
|
|
|
|
import type { ProxyConfig } from '@media-manager/shared';
|
|
import type { ProxyConfig } from '@media-manager/shared';
|
|
|
import { BrowserManager } from '../automation/browser.js';
|
|
import { BrowserManager } from '../automation/browser.js';
|
|
|
import { In } from 'typeorm';
|
|
import { In } from 'typeorm';
|
|
@@ -98,32 +97,41 @@ function normalizeSameSite(value: unknown): 'Strict' | 'Lax' | 'None' | undefine
|
|
|
return undefined;
|
|
return undefined;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+/** 与进入后台/每日用户数据一致:每条 cookie 仅保留 url 或 domain 其一,且 domain 仅在有有效值时设置,避免 Playwright addCookies 报错 */
|
|
|
function parseCookiesFromAccount(cookieData: string | null): PlaywrightCookie[] {
|
|
function parseCookiesFromAccount(cookieData: string | null): PlaywrightCookie[] {
|
|
|
if (!cookieData) return [];
|
|
if (!cookieData) return [];
|
|
|
const raw = cookieData.trim();
|
|
const raw = cookieData.trim();
|
|
|
if (!raw) return [];
|
|
if (!raw) return [];
|
|
|
|
|
|
|
|
- try {
|
|
|
|
|
- const parsed = JSON.parse(raw) as (PlaywrightCookie & { sameSite?: unknown })[];
|
|
|
|
|
- if (Array.isArray(parsed)) {
|
|
|
|
|
- return parsed.map((c) => {
|
|
|
|
|
- const sameSite = normalizeSameSite(c.sameSite);
|
|
|
|
|
- const out: PlaywrightCookie = {
|
|
|
|
|
- name: String(c.name ?? '').trim(),
|
|
|
|
|
- value: String(c.value ?? '').trim(),
|
|
|
|
|
- url: c.url || 'https://creator.xiaohongshu.com',
|
|
|
|
|
- };
|
|
|
|
|
- if (c.domain != null) out.domain = String(c.domain);
|
|
|
|
|
- if (c.path != null) out.path = String(c.path);
|
|
|
|
|
- if (c.expires != null && Number.isFinite(Number(c.expires))) out.expires = Number(c.expires);
|
|
|
|
|
- if (typeof c.httpOnly === 'boolean') out.httpOnly = c.httpOnly;
|
|
|
|
|
- if (typeof c.secure === 'boolean') out.secure = c.secure;
|
|
|
|
|
- if (sameSite) out.sameSite = sameSite;
|
|
|
|
|
- return out;
|
|
|
|
|
- }).filter((c) => c.name.length > 0);
|
|
|
|
|
|
|
+ if (raw.startsWith('[') || raw.startsWith('{')) {
|
|
|
|
|
+ try {
|
|
|
|
|
+ const parsed = JSON.parse(raw);
|
|
|
|
|
+ const arr = Array.isArray(parsed) ? parsed : (parsed?.cookies ? parsed.cookies : []);
|
|
|
|
|
+ if (!Array.isArray(arr)) return [];
|
|
|
|
|
+ return arr
|
|
|
|
|
+ .map((c: Record<string, unknown>) => {
|
|
|
|
|
+ const name = String(c?.name ?? '').trim();
|
|
|
|
|
+ const value = String(c?.value ?? '').trim();
|
|
|
|
|
+ if (!name) return null;
|
|
|
|
|
+ const domain = c?.domain ? String(c.domain).trim() : undefined;
|
|
|
|
|
+ const pathVal = c?.path ? String(c.path) : '/';
|
|
|
|
|
+ const url = !domain ? 'https://creator.xiaohongshu.com' : undefined;
|
|
|
|
|
+ const sameSite = normalizeSameSite(c?.sameSite);
|
|
|
|
|
+ return {
|
|
|
|
|
+ name,
|
|
|
|
|
+ value,
|
|
|
|
|
+ ...(domain ? { domain } : { url }),
|
|
|
|
|
+ path: pathVal,
|
|
|
|
|
+ expires: typeof c?.expires === 'number' ? c.expires : undefined,
|
|
|
|
|
+ httpOnly: typeof c?.httpOnly === 'boolean' ? c.httpOnly : undefined,
|
|
|
|
|
+ secure: typeof c?.secure === 'boolean' ? c.secure : undefined,
|
|
|
|
|
+ ...(sameSite ? { sameSite } : {}),
|
|
|
|
|
+ } as PlaywrightCookie;
|
|
|
|
|
+ })
|
|
|
|
|
+ .filter(Boolean) as PlaywrightCookie[];
|
|
|
|
|
+ } catch {
|
|
|
|
|
+ // fallthrough
|
|
|
}
|
|
}
|
|
|
- } catch {
|
|
|
|
|
- // fallthrough
|
|
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
const pairs = raw.split(';').map((p) => p.trim()).filter(Boolean);
|
|
const pairs = raw.split(';').map((p) => p.trim()).filter(Boolean);
|
|
@@ -367,6 +375,9 @@ export class XiaohongshuWorkNoteStatisticsImportService {
|
|
|
let totalInserted = 0;
|
|
let totalInserted = 0;
|
|
|
let totalUpdated = 0;
|
|
let totalUpdated = 0;
|
|
|
const total = works.length;
|
|
const total = works.length;
|
|
|
|
|
+ let skippedNoNoteId = 0;
|
|
|
|
|
+ let skippedNoData = 0;
|
|
|
|
|
+ let skippedNoPatches = 0;
|
|
|
|
|
|
|
|
for (let i = 0; i < works.length; i++) {
|
|
for (let i = 0; i < works.length; i++) {
|
|
|
const work = works[i];
|
|
const work = works[i];
|
|
@@ -374,15 +385,22 @@ export class XiaohongshuWorkNoteStatisticsImportService {
|
|
|
options.onProgress({ index: i + 1, total, work });
|
|
options.onProgress({ index: i + 1, total, work });
|
|
|
}
|
|
}
|
|
|
const noteId = (work.platformVideoId || '').trim();
|
|
const noteId = (work.platformVideoId || '').trim();
|
|
|
- if (!noteId) continue;
|
|
|
|
|
|
|
+ if (!noteId) {
|
|
|
|
|
+ skippedNoNoteId++;
|
|
|
|
|
+ continue;
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
try {
|
|
try {
|
|
|
- let data: NoteBaseData | null = await this.fetchNoteBaseViaPython(account, noteId);
|
|
|
|
|
|
|
+ // 直接走浏览器(无 Python 前置),保持与「进入后台 / 作品同步」完全一致的登录方式
|
|
|
|
|
+ await ensureBrowser();
|
|
|
|
|
+ const data: NoteBaseData | null = await this.fetchNoteBaseData(page!, noteId);
|
|
|
if (!data) {
|
|
if (!data) {
|
|
|
- await ensureBrowser();
|
|
|
|
|
- data = await this.fetchNoteBaseData(page!, noteId);
|
|
|
|
|
|
|
+ skippedNoData++;
|
|
|
|
|
+ logger.warn(
|
|
|
|
|
+ `[XHS WorkStats] Python 与浏览器均未拿到 note/base 数据,跳过。 accountId=${account.id} workId=${work.id} noteId=${noteId}`
|
|
|
|
|
+ );
|
|
|
|
|
+ continue;
|
|
|
}
|
|
}
|
|
|
- if (!data) continue;
|
|
|
|
|
|
|
|
|
|
// 同步 base 顶层“汇总指标”到 works 表(用于作品列表/总览等按 work 累计口径展示)
|
|
// 同步 base 顶层“汇总指标”到 works 表(用于作品列表/总览等按 work 累计口径展示)
|
|
|
await this.applyWorkSnapshotFromBaseData(work.id, data).catch((e) => {
|
|
await this.applyWorkSnapshotFromBaseData(work.id, data).catch((e) => {
|
|
@@ -401,7 +419,14 @@ export class XiaohongshuWorkNoteStatisticsImportService {
|
|
|
lastAllowed.setDate(lastAllowed.getDate() + 13); // 发布当日 + 13 天 = 共 14 天
|
|
lastAllowed.setDate(lastAllowed.getDate() + 13); // 发布当日 + 13 天 = 共 14 天
|
|
|
patches = patches.filter((p) => p.recordDate.getTime() <= lastAllowed.getTime());
|
|
patches = patches.filter((p) => p.recordDate.getTime() <= lastAllowed.getTime());
|
|
|
}
|
|
}
|
|
|
- if (!patches.length) continue;
|
|
|
|
|
|
|
+ if (!patches.length) {
|
|
|
|
|
+ skippedNoPatches++;
|
|
|
|
|
+ const dayKeys = data?.day && typeof data.day === 'object' ? Object.keys(data.day) : [];
|
|
|
|
|
+ logger.warn(
|
|
|
|
|
+ `[XHS WorkStats] note/base 无日维度数据,跳过入库。 accountId=${account.id} workId=${work.id} noteId=${noteId} data.day keys=[${dayKeys.join(',')}]`
|
|
|
|
|
+ );
|
|
|
|
|
+ continue;
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
const result = await this.workDayStatisticsService.saveStatisticsForDateBatch(
|
|
const result = await this.workDayStatisticsService.saveStatisticsForDateBatch(
|
|
|
patches.map((p) => ({
|
|
patches.map((p) => ({
|
|
@@ -477,6 +502,11 @@ export class XiaohongshuWorkNoteStatisticsImportService {
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ if (skippedNoNoteId > 0 || skippedNoData > 0 || skippedNoPatches > 0) {
|
|
|
|
|
+ logger.info(
|
|
|
|
|
+ `[XHS WorkStats] accountId=${account.id} 跳过统计: 无 noteId=${skippedNoNoteId} 无数据=${skippedNoData} 无日维度=${skippedNoPatches}`
|
|
|
|
|
+ );
|
|
|
|
|
+ }
|
|
|
logger.info(
|
|
logger.info(
|
|
|
`[XHS WorkStats] accountId=${account.id} completed. inserted=${totalInserted}, updated=${totalUpdated}`
|
|
`[XHS WorkStats] accountId=${account.id} completed. inserted=${totalInserted}, updated=${totalUpdated}`
|
|
|
);
|
|
);
|
|
@@ -492,7 +522,10 @@ export class XiaohongshuWorkNoteStatisticsImportService {
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- /** 通过 Python 调用 note/base(登录与打开后台一致:使用账号已存 Cookie) */
|
|
|
|
|
|
|
+ /**
|
|
|
|
|
+ * 通过 Python 调用 note/base(与进入后台/每日用户数据一致:仅用账号已存 Cookie,不启浏览器)。
|
|
|
|
|
+ * 若返回 null,会触发浏览器兜底;通常原因:Python 服务未开/地址错、Cookie 失效(创作者端 401)、或响应结构变化。
|
|
|
|
|
+ */
|
|
|
private async fetchNoteBaseViaPython(
|
|
private async fetchNoteBaseViaPython(
|
|
|
account: PlatformAccount,
|
|
account: PlatformAccount,
|
|
|
noteId: string
|
|
noteId: string
|
|
@@ -500,7 +533,10 @@ export class XiaohongshuWorkNoteStatisticsImportService {
|
|
|
const base = (await getPythonServiceBaseUrl()).replace(/\/$/, '');
|
|
const base = (await getPythonServiceBaseUrl()).replace(/\/$/, '');
|
|
|
const url = `${base}/xiaohongshu/note_base`;
|
|
const url = `${base}/xiaohongshu/note_base`;
|
|
|
const cookie = String(account.cookieData || '').trim();
|
|
const cookie = String(account.cookieData || '').trim();
|
|
|
- if (!cookie) return null;
|
|
|
|
|
|
|
+ if (!cookie) {
|
|
|
|
|
+ logger.warn(`[XHS WorkStats] fetchNoteBaseViaPython: accountId=${account.id} cookie 为空`);
|
|
|
|
|
+ return null;
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
const controller = new AbortController();
|
|
const controller = new AbortController();
|
|
|
const timeoutId = setTimeout(() => controller.abort(), 35_000);
|
|
const timeoutId = setTimeout(() => controller.abort(), 35_000);
|
|
@@ -512,12 +548,36 @@ export class XiaohongshuWorkNoteStatisticsImportService {
|
|
|
body: JSON.stringify({ cookie, note_id: noteId }),
|
|
body: JSON.stringify({ cookie, note_id: noteId }),
|
|
|
});
|
|
});
|
|
|
const text = await res.text();
|
|
const text = await res.text();
|
|
|
- const body = text ? (JSON.parse(text) as { data?: unknown; code?: number }) : null;
|
|
|
|
|
- if (!body || typeof body !== 'object') return null;
|
|
|
|
|
- const data = body.data;
|
|
|
|
|
- if (!data || typeof data !== 'object') return null;
|
|
|
|
|
|
|
+ if (!res.ok) {
|
|
|
|
|
+ logger.warn(
|
|
|
|
|
+ `[XHS WorkStats] fetchNoteBaseViaPython: Python 返回 HTTP ${res.status} accountId=${account.id} noteId=${noteId} body=${text.slice(0, 200)}`
|
|
|
|
|
+ );
|
|
|
|
|
+ return null;
|
|
|
|
|
+ }
|
|
|
|
|
+ const body = text ? (JSON.parse(text) as { data?: unknown; code?: number; msg?: string }) : null;
|
|
|
|
|
+ if (!body || typeof body !== 'object') {
|
|
|
|
|
+ logger.warn(`[XHS WorkStats] fetchNoteBaseViaPython: 响应非 JSON accountId=${account.id} noteId=${noteId}`);
|
|
|
|
|
+ return null;
|
|
|
|
|
+ }
|
|
|
|
|
+ const code = body.code;
|
|
|
|
|
+ let data = body.data;
|
|
|
|
|
+ if (!data || typeof data !== 'object') {
|
|
|
|
|
+ logger.warn(
|
|
|
|
|
+ `[XHS WorkStats] fetchNoteBaseViaPython: 无 data 或 data 非对象 accountId=${account.id} noteId=${noteId} code=${code} msg=${(body as any).msg ?? ''}`
|
|
|
|
|
+ );
|
|
|
|
|
+ return null;
|
|
|
|
|
+ }
|
|
|
|
|
+ // 兼容接口返回 data.data(如 { data: { data: { day, view_count, ... } } })
|
|
|
|
|
+ const inner = (data as Record<string, unknown>).data;
|
|
|
|
|
+ if (inner && typeof inner === 'object' && (inner as Record<string, unknown>).day !== undefined) {
|
|
|
|
|
+ data = inner;
|
|
|
|
|
+ }
|
|
|
return data as NoteBaseData;
|
|
return data as NoteBaseData;
|
|
|
- } catch {
|
|
|
|
|
|
|
+ } catch (e) {
|
|
|
|
|
+ logger.warn(
|
|
|
|
|
+ `[XHS WorkStats] fetchNoteBaseViaPython: 请求异常 accountId=${account.id} noteId=${noteId}`,
|
|
|
|
|
+ e instanceof Error ? e.message : e
|
|
|
|
|
+ );
|
|
|
return null;
|
|
return null;
|
|
|
} finally {
|
|
} finally {
|
|
|
clearTimeout(timeoutId);
|
|
clearTimeout(timeoutId);
|