소스 검색

小红书用户数据

Ethanfly 1 일 전
부모
커밋
3d8e1a333f
3개의 변경된 파일165개의 추가작업 그리고 12개의 파일을 삭제
  1. 26 0
      server/python/app.py
  2. 61 0
      server/python/platforms/xiaohongshu.py
  3. 78 12
      server/src/services/XiaohongshuWorkNoteStatisticsImportService.ts

+ 26 - 0
server/python/app.py

@@ -1736,6 +1736,32 @@ def baijiahao_fans_basic_info():
         return jsonify({"success": False, "error": str(e), "errno": -1}), 500
 
 
+@app.route("/xiaohongshu/note_base", methods=["POST"])
+def xiaohongshu_note_base():
+    """
+    小红书每日作品数据:代理调用创作者中心 note/base(单条笔记数据)。
+    登录方式与打开后台一致:使用账号已存 Cookie。
+    请求体: { "cookie": "...", "note_id": "xxx" }
+    """
+    try:
+        data = request.json or {}
+        cookie_str = data.get("cookie", "")
+        note_id = (data.get("note_id") or "").strip()
+
+        if not cookie_str:
+            return jsonify({"data": None, "code": 400, "error": "缺少 cookie 参数"}), 400
+        if not note_id:
+            return jsonify({"data": None, "code": 400, "error": "缺少 note_id 参数"}), 400
+
+        PublisherClass = get_publisher("xiaohongshu")
+        publisher = PublisherClass(headless=HEADLESS_MODE)
+        result = asyncio.run(publisher.get_note_base(cookie_str, note_id))
+        return jsonify(result)
+    except Exception as e:
+        traceback.print_exc()
+        return jsonify({"data": None, "code": 500, "error": str(e)}), 500
+
+
 # ==================== 健康检查 ====================
 
 @app.route("/health", methods=["GET"])

+ 61 - 0
server/python/platforms/xiaohongshu.py

@@ -2005,3 +2005,64 @@ class XiaohongshuPublisher(BasePublisher):
             'work_comments': all_work_comments,
             'total': len(all_work_comments)
         }
+
+    async def get_note_base(self, cookies: str, note_id: str) -> dict:
+        """
+        调用创作者中心「笔记数据- note/base」接口,用于每日作品数据同步。
+        登录方式与打开后台一致:使用账号已存 Cookie,不启浏览器。
+        """
+        import aiohttp
+
+        note_id = (note_id or "").strip()
+        if not note_id:
+            return {"data": None, "code": -1, "msg": "missing note_id"}
+
+        cookie_list = self.parse_cookies(cookies)
+        cookie_dict = {c.get("name") or "": c.get("value") or "" for c in cookie_list if c.get("name")}
+
+        session_headers = {
+            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
+            "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
+            "Accept-Encoding": "gzip, deflate, br",
+            "Connection": "keep-alive",
+        }
+        api_headers = {
+            "Accept": "application/json, text/plain, */*",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
+            "Referer": f"https://creator.xiaohongshu.com/statistics/note-detail?noteId={note_id}",
+            "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
+            "Accept-Encoding": "gzip, deflate, br",
+            "Connection": "keep-alive",
+        }
+
+        warmup_url = "https://creator.xiaohongshu.com/statistics/account/v2"
+        api_url = f"https://creator.xiaohongshu.com/api/galaxy/creator/datacenter/note/base?note_id={note_id}"
+
+        async with aiohttp.ClientSession(cookies=cookie_dict) as session:
+            try:
+                await session.get(
+                    warmup_url,
+                    headers=session_headers,
+                    timeout=aiohttp.ClientTimeout(total=20),
+                )
+            except Exception as e:
+                print(f"[{self.platform_name}] warmup statistics/account/v2 failed (non-fatal): {e}")
+
+            async with session.get(
+                api_url,
+                headers=api_headers,
+                timeout=aiohttp.ClientTimeout(total=30),
+            ) as resp:
+                status = resp.status
+                try:
+                    data = await resp.json()
+                except Exception:
+                    text = await resp.text()
+                    print(f"[{self.platform_name}] note/base non-JSON: {text[:500]}")
+                    return {"data": None, "code": status, "msg": "invalid response"}
+
+        if status != 200:
+            return {"data": None, "code": status, "msg": data.get("msg") if isinstance(data, dict) else "request failed"}
+
+        return data if isinstance(data, dict) else {"data": None, "code": -1, "msg": "invalid response"}

+ 78 - 12
server/src/services/XiaohongshuWorkNoteStatisticsImportService.ts

@@ -3,6 +3,7 @@ import { AppDataSource, PlatformAccount, Work } from '../models/index.js';
 import { logger } from '../utils/logger.js';
 import { WorkDayStatisticsService } from './WorkDayStatisticsService.js';
 import { AccountService } from './AccountService.js';
+import { getPythonServiceBaseUrl } from './PythonServiceConfigService.js';
 import type { ProxyConfig } from '@media-manager/shared';
 import { BrowserManager } from '../automation/browser.js';
 import { In } from 'typeorm';
@@ -87,18 +88,39 @@ interface DailyWorkStatPatch {
   twoSecondExitRate?: string;
 }
 
+/** Playwright 只接受 Strict | Lax | None,浏览器导出可能是小写或 no_restriction,需规范化 */
+function normalizeSameSite(value: unknown): 'Strict' | 'Lax' | 'None' | undefined {
+  if (value === undefined || value === null) return undefined;
+  const s = String(value).trim().toLowerCase();
+  if (s === 'strict') return 'Strict';
+  if (s === 'lax') return 'Lax';
+  if (s === 'none' || s === 'no_restriction') return 'None';
+  return undefined;
+}
+
 function parseCookiesFromAccount(cookieData: string | null): PlaywrightCookie[] {
   if (!cookieData) return [];
   const raw = cookieData.trim();
   if (!raw) return [];
 
   try {
-    const parsed = JSON.parse(raw) as PlaywrightCookie[];
+    const parsed = JSON.parse(raw) as (PlaywrightCookie & { sameSite?: unknown })[];
     if (Array.isArray(parsed)) {
-      return parsed.map((c) => ({
-        ...c,
-        url: c.url || 'https://creator.xiaohongshu.com',
-      }));
+      return parsed.map((c) => {
+        const sameSite = normalizeSameSite(c.sameSite);
+        const out: PlaywrightCookie = {
+          name: String(c.name ?? '').trim(),
+          value: String(c.value ?? '').trim(),
+          url: c.url || 'https://creator.xiaohongshu.com',
+        };
+        if (c.domain != null) out.domain = String(c.domain);
+        if (c.path != null) out.path = String(c.path);
+        if (c.expires != null && Number.isFinite(Number(c.expires))) out.expires = Number(c.expires);
+        if (typeof c.httpOnly === 'boolean') out.httpOnly = c.httpOnly;
+        if (typeof c.secure === 'boolean') out.secure = c.secure;
+        if (sameSite) out.sameSite = sameSite;
+        return out;
+      }).filter((c) => c.name.length > 0);
     }
   } catch {
     // fallthrough
@@ -318,10 +340,17 @@ export class XiaohongshuWorkNoteStatisticsImportService {
       }
     }
 
-    const { browser, shouldClose } = await createBrowserForAccount(account.proxyConfig);
+    let browser: Browser | null = null;
+    let shouldClose = false;
     let context: BrowserContext | null = null;
+    let page: Page | null = null;
     let closedDueToLoginExpired = false;
-    try {
+
+    const ensureBrowser = async (): Promise<void> => {
+      if (page) return;
+      const x = await createBrowserForAccount(account.proxyConfig);
+      browser = x.browser;
+      shouldClose = x.shouldClose;
       context = await browser.newContext({
         viewport: { width: 1920, height: 1080 },
         locale: 'zh-CN',
@@ -331,13 +360,14 @@ export class XiaohongshuWorkNoteStatisticsImportService {
       });
       await context.addCookies(cookies as any);
       context.setDefaultTimeout(60_000);
+      page = await context.newPage();
+    };
 
-      const page = await context.newPage();
-
+    try {
       let totalInserted = 0;
       let totalUpdated = 0;
-
       const total = works.length;
+
       for (let i = 0; i < works.length; i++) {
         const work = works[i];
         if (options?.onProgress) {
@@ -347,7 +377,11 @@ export class XiaohongshuWorkNoteStatisticsImportService {
         if (!noteId) continue;
 
         try {
-          const data = await this.fetchNoteBaseData(page, noteId);
+          let data: NoteBaseData | null = await this.fetchNoteBaseViaPython(account, noteId);
+          if (!data) {
+            await ensureBrowser();
+            data = await this.fetchNoteBaseData(page!, noteId);
+          }
           if (!data) continue;
 
           // 同步 base 顶层“汇总指标”到 works 表(用于作品列表/总览等按 work 累计口径展示)
@@ -451,13 +485,45 @@ export class XiaohongshuWorkNoteStatisticsImportService {
         if (context) {
           await context.close().catch(() => undefined);
         }
-        if (shouldClose) {
+        if (shouldClose && browser) {
           await browser.close().catch(() => undefined);
         }
       }
     }
   }
 
+  /** 通过 Python 调用 note/base(登录与打开后台一致:使用账号已存 Cookie) */
+  private async fetchNoteBaseViaPython(
+    account: PlatformAccount,
+    noteId: string
+  ): Promise<NoteBaseData | null> {
+    const base = (await getPythonServiceBaseUrl()).replace(/\/$/, '');
+    const url = `${base}/xiaohongshu/note_base`;
+    const cookie = String(account.cookieData || '').trim();
+    if (!cookie) return null;
+
+    const controller = new AbortController();
+    const timeoutId = setTimeout(() => controller.abort(), 35_000);
+    try {
+      const res = await fetch(url, {
+        method: 'POST',
+        signal: controller.signal,
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ cookie, note_id: noteId }),
+      });
+      const text = await res.text();
+      const body = text ? (JSON.parse(text) as { data?: unknown; code?: number }) : null;
+      if (!body || typeof body !== 'object') return null;
+      const data = body.data;
+      if (!data || typeof data !== 'object') return null;
+      return data as NoteBaseData;
+    } catch {
+      return null;
+    } finally {
+      clearTimeout(timeoutId);
+    }
+  }
+
   private async fetchNoteBaseData(page: Page, noteId: string): Promise<NoteBaseData | null> {
     const noteUrl = `https://creator.xiaohongshu.com/statistics/note-detail?noteId=${encodeURIComponent(
       noteId