1 月之前 · 76851ea99f
--- a/server/python/platforms/__pycache__/xiaohongshu.cpython-313.pyc
+++ b/server/python/platforms/__pycache__/xiaohongshu.cpython-313.pyc
--- a/server/python/platforms/xiaohongshu.py
+++ b/server/python/platforms/xiaohongshu.py
@@ -579,6 +579,82 @@ class XiaohongshuPublisher(BasePublisher):
 
															             status='success'
														
 
															         )
														
 
															+    async def get_account_info(self, cookies: str) -> dict:
														
 
															+        """获取账号信息"""
														
 
															+        print(f"\n{'='*60}")
														
 
															+        print(f"[{self.platform_name}] 获取账号信息")
														
 
															+        print(f"{'='*60}")
														
 
															+        
														
 
															+        captured_info = {}
														
 
															+        
														
 
															+        try:
														
 
															+            await self.init_browser()
														
 
															+            cookie_list = self.parse_cookies(cookies)
														
 
															+            await self.set_cookies(cookie_list)
														
 
															+            
														
 
															+            if not self.page:
														
 
															+                raise Exception("Page not initialized")
														
 
															+            
														
 
															+            # 监听个人信息 API
														
 
															+            async def handle_response(response):
														
 
															+                nonlocal captured_info
														
 
															+                if 'api/galaxy/creator/home/personal_info' in response.url:
														
 
															+                    try:
														
 
															+                        json_data = await response.json()
														
 
															+                        print(f"[{self.platform_name}] 捕获个人信息 API", flush=True)
														
 
															+                        if json_data.get('success') or json_data.get('code') == 0:
														
 
															+                            data = json_data.get('data', {})
														
 
															+                            captured_info = {
														
 
															+                                "account_id": f"xhs_{data.get('red_num', '')}",
														
 
															+                                "account_name": data.get('name', ''),
														
 
															+                                "avatar_url": data.get('avatar', ''),
														
 
															+                                "fans_count": data.get('fans_count', 0),
														
 
															+                                "works_count": 0  # 暂时无法直接获取准确的作品数，需要从作品列表获取
														
 
															+                            }
														
 
															+                    except Exception as e:
														
 
															+                        print(f"[{self.platform_name}] 解析个人信息失败: {e}", flush=True)
														
 
															+            
														
 
															+            self.page.on('response', handle_response)
														
 
															+            
														
 
															+            # 访问首页
														
 
															+            print(f"[{self.platform_name}] 访问创作者首页...", flush=True)
														
 
															+            await self.page.goto("https://creator.xiaohongshu.com/new/home", wait_until="domcontentloaded")
														
 
															+            
														
 
															+            # 等待 API 响应
														
 
															+            for _ in range(10):
														
 
															+                if captured_info:
														
 
															+                    break
														
 
															+                await asyncio.sleep(1)
														
 
															+            
														
 
															+            if not captured_info:
														
 
															+                print(f"[{self.platform_name}] 未捕获到个人信息，尝试刷新...", flush=True)
														
 
															+                await self.page.reload()
														
 
															+                for _ in range(10):
														
 
															+                    if captured_info:
														
 
															+                        break
														
 
															+                    await asyncio.sleep(1)
														
 
															+            
														
 
															+            if not captured_info:
														
 
															+                raise Exception("无法获取账号信息")
														
 
															+            
														
 
															+            # 尝试获取作品数（从首页或其他地方）
														
 
															+            # 或者简单地返回已获取的信息，作品数由 get_works 更新
														
 
															+            
														
 
															+            return {
														
 
															+                "success": True,
														
 
															+                **captured_info
														
 
															+            }
														
 
															+            
														
 
															+        except Exception as e:
														
 
															+            import traceback
														
 
															+            traceback.print_exc()
														
 
															+            return {
														
 
															+                "success": False,
														
 
															+                "error": str(e)
														
 
															+            }
														
 
															+        finally:
														
 
															+            await self.close_browser()
														
 
															+
														
 
															     async def get_works(self, cookies: str, page: int = 0, page_size: int = 20) -> WorksResult:
														
 
															         """获取小红书作品列表 - 通过监听页面网络响应获取数据"""
														
 
															         print(f"\n{'='*60}", flush=True)
														
@@ -705,21 +781,26 @@ class XiaohongshuPublisher(BasePublisher):
 
															                 works.extend(parse_notes(notes))
														
 
															                 # 分页抓取剩余页面：不依赖 data.page（有些情况下会误报 -1），直到拿不到新数据为止
														
 
															-                max_pages = 30
														
 
															+                max_pages = 100  # 增加最大页数限制，确保能抓取更多作品
														
 
															                 page_num = 1  # 已经拿了 page=0
														
 
															                 seen_note_ids = set([w.work_id for w in works])
														
 
															                 has_more = True
														
 
															                 while has_more and page_num < max_pages:
														
 
															+                    print(f"[{self.platform_name}] 正在抓取第 {page_num} 页...", flush=True)
														
 
															                     try:
														
 
															                         next_resp = await self.page.evaluate(
														
 
															                             """async (p) => {
														
 
															-                                const res = await fetch(`https://edith.xiaohongshu.com/web_api/sns/v5/creator/note/user/posted?tab=0&page=${p}`, {
														
 
															-                                    method: 'GET',
														
 
															-                                    credentials: 'include',
														
 
															-                                    headers: { 'Accept': 'application/json' }
														
 
															-                                });
														
 
															-                                return await res.json();
														
 
															+                                try {
														
 
															+                                    const res = await fetch(`https://edith.xiaohongshu.com/web_api/sns/v5/creator/note/user/posted?tab=0&page=${p}`, {
														
 
															+                                        method: 'GET',
														
 
															+                                        credentials: 'include',
														
 
															+                                        headers: { 'Accept': 'application/json' }
														
 
															+                                    });
														
 
															+                                    return await res.json();
														
 
															+                                } catch (e) {
														
 
															+                                    return { success: false, error: e.toString() };
														
 
															+                                }
														
 
															                             }""",
														
 
															                             page_num
														
 
															                         )
														
@@ -728,25 +809,31 @@ class XiaohongshuPublisher(BasePublisher):
 
															                         break
														
 
															                     if not next_resp:
														
 
															+                        print(f"[{self.platform_name}] 第 {page_num} 页无响应", flush=True)
														
 
															                         break
														
 
															                     if not (next_resp.get('success') or next_resp.get('code') == 0):
														
 
															+                        print(f"[{self.platform_name}] 第 {page_num} 页请求失败: {next_resp.get('msg')}", flush=True)
														
 
															                         break
														
 
															                     next_data = next_resp.get('data', {})
														
 
															                     next_notes = next_data.get('notes', []) or []
														
 
															                     if not next_notes:
														
 
															+                        print(f"[{self.platform_name}] 第 {page_num} 页无作品，停止抓取", flush=True)
														
 
															                         has_more = False
														
 
															                         break
														
 
															                     parsed_next = parse_notes(next_notes)
														
 
															                     new_items = [w for w in parsed_next if w.work_id and w.work_id not in seen_note_ids]
														
 
															+                    
														
 
															                     if not new_items:
														
 
															                         # 没有新数据，停止
														
 
															+                        print(f"[{self.platform_name}] 第 {page_num} 页无新数据，停止抓取", flush=True)
														
 
															                         has_more = False
														
 
															                         break
														
 
															-
														
 
															+                    
														
 
															+                    print(f"[{self.platform_name}] 第 {page_num} 页获取到 {len(new_items)} 条新数据", flush=True)
														
 
															                     for w in new_items:
														
 
															                         seen_note_ids.add(w.work_id)
														
 
															                     works.extend(new_items)
														
@@ -759,10 +846,14 @@ class XiaohongshuPublisher(BasePublisher):
 
															                                 break
														
 
															                     page_num += 1
														
 
															+                    # 增加一点延迟，避免请求过快
														
 
															+                    await asyncio.sleep(1)
														
 
															                 # 分页完毕，has_more 表示是否还存在更多（以最后一页标记为准）
														
 
															                 if not has_more:
														
 
															                     print(f"[{self.platform_name}] 已抓取所有分页，共 {len(works)} 条", flush=True)
														
 
															+                else:
														
 
															+                    print(f"[{self.platform_name}] 达到最大页数限制 {max_pages}，共 {len(works)} 条", flush=True)
														
 
															             else:
														
 
															                 print(f"[{self.platform_name}] 未能捕获到 API 数据", flush=True)
														
--- a/server/python/weixin_private_msg_266653.png
+++ b/server/python/weixin_private_msg_266653.png
--- a/server/src/services/HeadlessBrowserService.ts
+++ b/server/src/services/HeadlessBrowserService.ts
@@ -637,7 +637,20 @@ class HeadlessBrowserService {
 
															             logger.info(`[Python API] Successfully fetched ${worksList.length} works for ${platform}`);
														
 
															             try {
														
 
															-              const accountInfo = await this.fetchAccountInfoWithPlaywright(platform, cookies);
														
 
															+              let accountInfo: AccountInfo;
														
 
															+
														
 
															+              // 对于小红书，优先使用 Python API 获取账号信息（因为 Node 端 Playwright 可能无法捕获到 API）
														
 
															+              if (platform === 'xiaohongshu') {
														
 
															+                try {
														
 
															+                  accountInfo = await this.fetchAccountInfoViaPython(platform, cookies);
														
 
															+                } catch (e) {
														
 
															+                  logger.warn(`[Python API] Failed to fetch account info for ${platform}, falling back to Playwright:`, e);
														
 
															+                  accountInfo = await this.fetchAccountInfoWithPlaywright(platform, cookies);
														
 
															+                }
														
 
															+              } else {
														
 
															+                accountInfo = await this.fetchAccountInfoWithPlaywright(platform, cookies);
														
 
															+              }
														
 
															+
														
 
															               accountInfo.worksList = worksList;
														
 
															               // 直接使用 Python API 获取的作品数量（最准确，排除了已删除/私密视频）
														
 
															               accountInfo.worksCount = worksList.length;