Ethanfly 1 napja
szülő
commit
33929b61e2

+ 1 - 18
server/python/platforms/xiaohongshu.py

@@ -2009,7 +2009,7 @@ class XiaohongshuPublisher(BasePublisher):
     async def get_note_base(self, cookies: str, note_id: str) -> dict:
         """
         调用创作者中心「笔记数据- note/base」接口,用于每日作品数据同步。
-        登录方式与打开后台一致:使用账号已存 Cookie,不启浏览器。
+        使用账号已存 Cookie,不启浏览器,直接带 Referer 调 note/base
         """
         import aiohttp
 
@@ -2020,13 +2020,6 @@ class XiaohongshuPublisher(BasePublisher):
         cookie_list = self.parse_cookies(cookies)
         cookie_dict = {c.get("name") or "": c.get("value") or "" for c in cookie_list if c.get("name")}
 
-        session_headers = {
-            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
-            "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
-            "Accept-Encoding": "gzip, deflate, br",
-            "Connection": "keep-alive",
-        }
         api_headers = {
             "Accept": "application/json, text/plain, */*",
             "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
@@ -2036,19 +2029,9 @@ class XiaohongshuPublisher(BasePublisher):
             "Connection": "keep-alive",
         }
 
-        warmup_url = "https://creator.xiaohongshu.com/statistics/account/v2"
         api_url = f"https://creator.xiaohongshu.com/api/galaxy/creator/datacenter/note/base?note_id={note_id}"
 
         async with aiohttp.ClientSession(cookies=cookie_dict) as session:
-            try:
-                await session.get(
-                    warmup_url,
-                    headers=session_headers,
-                    timeout=aiohttp.ClientTimeout(total=20),
-                )
-            except Exception as e:
-                print(f"[{self.platform_name}] warmup statistics/account/v2 failed (non-fatal): {e}")
-
             async with session.get(
                 api_url,
                 headers=api_headers,

+ 94 - 34
server/src/services/XiaohongshuWorkNoteStatisticsImportService.ts

@@ -3,7 +3,6 @@ import { AppDataSource, PlatformAccount, Work } from '../models/index.js';
 import { logger } from '../utils/logger.js';
 import { WorkDayStatisticsService } from './WorkDayStatisticsService.js';
 import { AccountService } from './AccountService.js';
-import { getPythonServiceBaseUrl } from './PythonServiceConfigService.js';
 import type { ProxyConfig } from '@media-manager/shared';
 import { BrowserManager } from '../automation/browser.js';
 import { In } from 'typeorm';
@@ -98,32 +97,41 @@ function normalizeSameSite(value: unknown): 'Strict' | 'Lax' | 'None' | undefine
   return undefined;
 }
 
+/** 与进入后台/每日用户数据一致:每条 cookie 仅保留 url 或 domain 其一,且 domain 仅在有有效值时设置,避免 Playwright addCookies 报错 */
 function parseCookiesFromAccount(cookieData: string | null): PlaywrightCookie[] {
   if (!cookieData) return [];
   const raw = cookieData.trim();
   if (!raw) return [];
 
-  try {
-    const parsed = JSON.parse(raw) as (PlaywrightCookie & { sameSite?: unknown })[];
-    if (Array.isArray(parsed)) {
-      return parsed.map((c) => {
-        const sameSite = normalizeSameSite(c.sameSite);
-        const out: PlaywrightCookie = {
-          name: String(c.name ?? '').trim(),
-          value: String(c.value ?? '').trim(),
-          url: c.url || 'https://creator.xiaohongshu.com',
-        };
-        if (c.domain != null) out.domain = String(c.domain);
-        if (c.path != null) out.path = String(c.path);
-        if (c.expires != null && Number.isFinite(Number(c.expires))) out.expires = Number(c.expires);
-        if (typeof c.httpOnly === 'boolean') out.httpOnly = c.httpOnly;
-        if (typeof c.secure === 'boolean') out.secure = c.secure;
-        if (sameSite) out.sameSite = sameSite;
-        return out;
-      }).filter((c) => c.name.length > 0);
+  if (raw.startsWith('[') || raw.startsWith('{')) {
+    try {
+      const parsed = JSON.parse(raw);
+      const arr = Array.isArray(parsed) ? parsed : (parsed?.cookies ? parsed.cookies : []);
+      if (!Array.isArray(arr)) return [];
+      return arr
+        .map((c: Record<string, unknown>) => {
+          const name = String(c?.name ?? '').trim();
+          const value = String(c?.value ?? '').trim();
+          if (!name) return null;
+          const domain = c?.domain ? String(c.domain).trim() : undefined;
+          const pathVal = c?.path ? String(c.path) : '/';
+          const url = !domain ? 'https://creator.xiaohongshu.com' : undefined;
+          const sameSite = normalizeSameSite(c?.sameSite);
+          return {
+            name,
+            value,
+            ...(domain ? { domain } : { url }),
+            path: pathVal,
+            expires: typeof c?.expires === 'number' ? c.expires : undefined,
+            httpOnly: typeof c?.httpOnly === 'boolean' ? c.httpOnly : undefined,
+            secure: typeof c?.secure === 'boolean' ? c.secure : undefined,
+            ...(sameSite ? { sameSite } : {}),
+          } as PlaywrightCookie;
+        })
+        .filter(Boolean) as PlaywrightCookie[];
+    } catch {
+      // fallthrough
     }
-  } catch {
-    // fallthrough
   }
 
   const pairs = raw.split(';').map((p) => p.trim()).filter(Boolean);
@@ -367,6 +375,9 @@ export class XiaohongshuWorkNoteStatisticsImportService {
       let totalInserted = 0;
       let totalUpdated = 0;
       const total = works.length;
+      let skippedNoNoteId = 0;
+      let skippedNoData = 0;
+      let skippedNoPatches = 0;
 
       for (let i = 0; i < works.length; i++) {
         const work = works[i];
@@ -374,15 +385,22 @@ export class XiaohongshuWorkNoteStatisticsImportService {
           options.onProgress({ index: i + 1, total, work });
         }
         const noteId = (work.platformVideoId || '').trim();
-        if (!noteId) continue;
+        if (!noteId) {
+          skippedNoNoteId++;
+          continue;
+        }
 
         try {
-          let data: NoteBaseData | null = await this.fetchNoteBaseViaPython(account, noteId);
+          // 直接走浏览器(无 Python 前置),保持与「进入后台 / 作品同步」完全一致的登录方式
+          await ensureBrowser();
+          const data: NoteBaseData | null = await this.fetchNoteBaseData(page!, noteId);
           if (!data) {
-            await ensureBrowser();
-            data = await this.fetchNoteBaseData(page!, noteId);
+            skippedNoData++;
+            logger.warn(
+              `[XHS WorkStats] Python 与浏览器均未拿到 note/base 数据,跳过。 accountId=${account.id} workId=${work.id} noteId=${noteId}`
+            );
+            continue;
           }
-          if (!data) continue;
 
           // 同步 base 顶层“汇总指标”到 works 表(用于作品列表/总览等按 work 累计口径展示)
           await this.applyWorkSnapshotFromBaseData(work.id, data).catch((e) => {
@@ -401,7 +419,14 @@ export class XiaohongshuWorkNoteStatisticsImportService {
             lastAllowed.setDate(lastAllowed.getDate() + 13); // 发布当日 + 13 天 = 共 14 天
             patches = patches.filter((p) => p.recordDate.getTime() <= lastAllowed.getTime());
           }
-          if (!patches.length) continue;
+          if (!patches.length) {
+            skippedNoPatches++;
+            const dayKeys = data?.day && typeof data.day === 'object' ? Object.keys(data.day) : [];
+            logger.warn(
+              `[XHS WorkStats] note/base 无日维度数据,跳过入库。 accountId=${account.id} workId=${work.id} noteId=${noteId} data.day keys=[${dayKeys.join(',')}]`
+            );
+            continue;
+          }
 
           const result = await this.workDayStatisticsService.saveStatisticsForDateBatch(
             patches.map((p) => ({
@@ -477,6 +502,11 @@ export class XiaohongshuWorkNoteStatisticsImportService {
         }
       }
 
+      if (skippedNoNoteId > 0 || skippedNoData > 0 || skippedNoPatches > 0) {
+        logger.info(
+          `[XHS WorkStats] accountId=${account.id} 跳过统计: 无 noteId=${skippedNoNoteId} 无数据=${skippedNoData} 无日维度=${skippedNoPatches}`
+        );
+      }
       logger.info(
         `[XHS WorkStats] accountId=${account.id} completed. inserted=${totalInserted}, updated=${totalUpdated}`
       );
@@ -492,7 +522,10 @@ export class XiaohongshuWorkNoteStatisticsImportService {
     }
   }
 
-  /** 通过 Python 调用 note/base(登录与打开后台一致:使用账号已存 Cookie) */
+  /**
+   * 通过 Python 调用 note/base(与进入后台/每日用户数据一致:仅用账号已存 Cookie,不启浏览器)。
+   * 若返回 null,会触发浏览器兜底;通常原因:Python 服务未开/地址错、Cookie 失效(创作者端 401)、或响应结构变化。
+   */
   private async fetchNoteBaseViaPython(
     account: PlatformAccount,
     noteId: string
@@ -500,7 +533,10 @@ export class XiaohongshuWorkNoteStatisticsImportService {
     const base = (await getPythonServiceBaseUrl()).replace(/\/$/, '');
     const url = `${base}/xiaohongshu/note_base`;
     const cookie = String(account.cookieData || '').trim();
-    if (!cookie) return null;
+    if (!cookie) {
+      logger.warn(`[XHS WorkStats] fetchNoteBaseViaPython: accountId=${account.id} cookie 为空`);
+      return null;
+    }
 
     const controller = new AbortController();
     const timeoutId = setTimeout(() => controller.abort(), 35_000);
@@ -512,12 +548,36 @@ export class XiaohongshuWorkNoteStatisticsImportService {
         body: JSON.stringify({ cookie, note_id: noteId }),
       });
       const text = await res.text();
-      const body = text ? (JSON.parse(text) as { data?: unknown; code?: number }) : null;
-      if (!body || typeof body !== 'object') return null;
-      const data = body.data;
-      if (!data || typeof data !== 'object') return null;
+      if (!res.ok) {
+        logger.warn(
+          `[XHS WorkStats] fetchNoteBaseViaPython: Python 返回 HTTP ${res.status} accountId=${account.id} noteId=${noteId} body=${text.slice(0, 200)}`
+        );
+        return null;
+      }
+      const body = text ? (JSON.parse(text) as { data?: unknown; code?: number; msg?: string }) : null;
+      if (!body || typeof body !== 'object') {
+        logger.warn(`[XHS WorkStats] fetchNoteBaseViaPython: 响应非 JSON accountId=${account.id} noteId=${noteId}`);
+        return null;
+      }
+      const code = body.code;
+      let data = body.data;
+      if (!data || typeof data !== 'object') {
+        logger.warn(
+          `[XHS WorkStats] fetchNoteBaseViaPython: 无 data 或 data 非对象 accountId=${account.id} noteId=${noteId} code=${code} msg=${(body as any).msg ?? ''}`
+        );
+        return null;
+      }
+      // 兼容接口返回 data.data(如 { data: { data: { day, view_count, ... } } })
+      const inner = (data as Record<string, unknown>).data;
+      if (inner && typeof inner === 'object' && (inner as Record<string, unknown>).day !== undefined) {
+        data = inner;
+      }
       return data as NoteBaseData;
-    } catch {
+    } catch (e) {
+      logger.warn(
+        `[XHS WorkStats] fetchNoteBaseViaPython: 请求异常 accountId=${account.id} noteId=${noteId}`,
+        e instanceof Error ? e.message : e
+      );
       return null;
     } finally {
       clearTimeout(timeoutId);