před 1 měsícem · 76851ea99f
--- a/server/python/platforms/__pycache__/xiaohongshu.cpython-313.pyc
+++ b/server/python/platforms/__pycache__/xiaohongshu.cpython-313.pyc
--- a/server/python/platforms/xiaohongshu.py
+++ b/server/python/platforms/xiaohongshu.py
@@ -579,6 +579,82 @@ class XiaohongshuPublisher(BasePublisher):
 
				             status='success'
			
 
				         )
			
 
				     
			
 
				+    async def get_account_info(self, cookies: str) -> dict:
			
 
				+        """获取账号信息"""
			
 
				+        print(f"\n{'='*60}")
			
 
				+        print(f"[{self.platform_name}] 获取账号信息")
			
 
				+        print(f"{'='*60}")
			
 
				+        
			
 
				+        captured_info = {}
			
 
				+        
			
 
				+        try:
			
 
				+            await self.init_browser()
			
 
				+            cookie_list = self.parse_cookies(cookies)
			
 
				+            await self.set_cookies(cookie_list)
			
 
				+            
			
 
				+            if not self.page:
			
 
				+                raise Exception("Page not initialized")
			
 
				+            
			
 
				+            # 监听个人信息 API
			
 
				+            async def handle_response(response):
			
 
				+                nonlocal captured_info
			
 
				+                if 'api/galaxy/creator/home/personal_info' in response.url:
			
 
				+                    try:
			
 
				+                        json_data = await response.json()
			
 
				+                        print(f"[{self.platform_name}] 捕获个人信息 API", flush=True)
			
 
				+                        if json_data.get('success') or json_data.get('code') == 0:
			
 
				+                            data = json_data.get('data', {})
			
 
				+                            captured_info = {
			
 
				+                                "account_id": f"xhs_{data.get('red_num', '')}",
			
 
				+                                "account_name": data.get('name', ''),
			
 
				+                                "avatar_url": data.get('avatar', ''),
			
 
				+                                "fans_count": data.get('fans_count', 0),
			
 
				+                                "works_count": 0  # 暂时无法直接获取准确的作品数，需要从作品列表获取
			
 
				+                            }
			
 
				+                    except Exception as e:
			
 
				+                        print(f"[{self.platform_name}] 解析个人信息失败: {e}", flush=True)
			
 
				+            
			
 
				+            self.page.on('response', handle_response)
			
 
				+            
			
 
				+            # 访问首页
			
 
				+            print(f"[{self.platform_name}] 访问创作者首页...", flush=True)
			
 
				+            await self.page.goto("https://creator.xiaohongshu.com/new/home", wait_until="domcontentloaded")
			
 
				+            
			
 
				+            # 等待 API 响应
			
 
				+            for _ in range(10):
			
 
				+                if captured_info:
			
 
				+                    break
			
 
				+                await asyncio.sleep(1)
			
 
				+            
			
 
				+            if not captured_info:
			
 
				+                print(f"[{self.platform_name}] 未捕获到个人信息，尝试刷新...", flush=True)
			
 
				+                await self.page.reload()
			
 
				+                for _ in range(10):
			
 
				+                    if captured_info:
			
 
				+                        break
			
 
				+                    await asyncio.sleep(1)
			
 
				+            
			
 
				+            if not captured_info:
			
 
				+                raise Exception("无法获取账号信息")
			
 
				+            
			
 
				+            # 尝试获取作品数（从首页或其他地方）
			
 
				+            # 或者简单地返回已获取的信息，作品数由 get_works 更新
			
 
				+            
			
 
				+            return {
			
 
				+                "success": True,
			
 
				+                **captured_info
			
 
				+            }
			
 
				+            
			
 
				+        except Exception as e:
			
 
				+            import traceback
			
 
				+            traceback.print_exc()
			
 
				+            return {
			
 
				+                "success": False,
			
 
				+                "error": str(e)
			
 
				+            }
			
 
				+        finally:
			
 
				+            await self.close_browser()
			
 
				+
			
 
				     async def get_works(self, cookies: str, page: int = 0, page_size: int = 20) -> WorksResult:
			
 
				         """获取小红书作品列表 - 通过监听页面网络响应获取数据"""
			
 
				         print(f"\n{'='*60}", flush=True)
			
@@ -705,21 +781,26 @@ class XiaohongshuPublisher(BasePublisher):
 
				                 works.extend(parse_notes(notes))
			
 
				 
			
 
				                 # 分页抓取剩余页面：不依赖 data.page（有些情况下会误报 -1），直到拿不到新数据为止
			
 
				-                max_pages = 30
			
 
				+                max_pages = 100  # 增加最大页数限制，确保能抓取更多作品
			
 
				                 page_num = 1  # 已经拿了 page=0
			
 
				                 seen_note_ids = set([w.work_id for w in works])
			
 
				                 has_more = True
			
 
				 
			
 
				                 while has_more and page_num < max_pages:
			
 
				+                    print(f"[{self.platform_name}] 正在抓取第 {page_num} 页...", flush=True)
			
 
				                     try:
			
 
				                         next_resp = await self.page.evaluate(
			
 
				                             """async (p) => {
			
 
				-                                const res = await fetch(`https://edith.xiaohongshu.com/web_api/sns/v5/creator/note/user/posted?tab=0&page=${p}`, {
			
 
				-                                    method: 'GET',
			
 
				-                                    credentials: 'include',
			
 
				-                                    headers: { 'Accept': 'application/json' }
			
 
				-                                });
			
 
				-                                return await res.json();
			
 
				+                                try {
			
 
				+                                    const res = await fetch(`https://edith.xiaohongshu.com/web_api/sns/v5/creator/note/user/posted?tab=0&page=${p}`, {
			
 
				+                                        method: 'GET',
			
 
				+                                        credentials: 'include',
			
 
				+                                        headers: { 'Accept': 'application/json' }
			
 
				+                                    });
			
 
				+                                    return await res.json();
			
 
				+                                } catch (e) {
			
 
				+                                    return { success: false, error: e.toString() };
			
 
				+                                }
			
 
				                             }""",
			
 
				                             page_num
			
 
				                         )
			
@@ -728,25 +809,31 @@ class XiaohongshuPublisher(BasePublisher):
 
				                         break
			
 
				 
			
 
				                     if not next_resp:
			
 
				+                        print(f"[{self.platform_name}] 第 {page_num} 页无响应", flush=True)
			
 
				                         break
			
 
				 
			
 
				                     if not (next_resp.get('success') or next_resp.get('code') == 0):
			
 
				+                        print(f"[{self.platform_name}] 第 {page_num} 页请求失败: {next_resp.get('msg')}", flush=True)
			
 
				                         break
			
 
				 
			
 
				                     next_data = next_resp.get('data', {})
			
 
				                     next_notes = next_data.get('notes', []) or []
			
 
				 
			
 
				                     if not next_notes:
			
 
				+                        print(f"[{self.platform_name}] 第 {page_num} 页无作品，停止抓取", flush=True)
			
 
				                         has_more = False
			
 
				                         break
			
 
				 
			
 
				                     parsed_next = parse_notes(next_notes)
			
 
				                     new_items = [w for w in parsed_next if w.work_id and w.work_id not in seen_note_ids]
			
 
				+                    
			
 
				                     if not new_items:
			
 
				                         # 没有新数据，停止
			
 
				+                        print(f"[{self.platform_name}] 第 {page_num} 页无新数据，停止抓取", flush=True)
			
 
				                         has_more = False
			
 
				                         break
			
 
				-
			
 
				+                    
			
 
				+                    print(f"[{self.platform_name}] 第 {page_num} 页获取到 {len(new_items)} 条新数据", flush=True)
			
 
				                     for w in new_items:
			
 
				                         seen_note_ids.add(w.work_id)
			
 
				                     works.extend(new_items)
			
@@ -759,10 +846,14 @@ class XiaohongshuPublisher(BasePublisher):
 
				                                 break
			
 
				 
			
 
				                     page_num += 1
			
 
				+                    # 增加一点延迟，避免请求过快
			
 
				+                    await asyncio.sleep(1)
			
 
				                 
			
 
				                 # 分页完毕，has_more 表示是否还存在更多（以最后一页标记为准）
			
 
				                 if not has_more:
			
 
				                     print(f"[{self.platform_name}] 已抓取所有分页，共 {len(works)} 条", flush=True)
			
 
				+                else:
			
 
				+                    print(f"[{self.platform_name}] 达到最大页数限制 {max_pages}，共 {len(works)} 条", flush=True)
			
 
				             else:
			
 
				                 print(f"[{self.platform_name}] 未能捕获到 API 数据", flush=True)
			
 
				             
			
--- a/server/python/weixin_private_msg_266653.png
+++ b/server/python/weixin_private_msg_266653.png
--- a/server/src/services/HeadlessBrowserService.ts
+++ b/server/src/services/HeadlessBrowserService.ts
@@ -637,7 +637,20 @@ class HeadlessBrowserService {
 
				             logger.info(`[Python API] Successfully fetched ${worksList.length} works for ${platform}`);
			
 
				 
			
 
				             try {
			
 
				-              const accountInfo = await this.fetchAccountInfoWithPlaywright(platform, cookies);
			
 
				+              let accountInfo: AccountInfo;
			
 
				+
			
 
				+              // 对于小红书，优先使用 Python API 获取账号信息（因为 Node 端 Playwright 可能无法捕获到 API）
			
 
				+              if (platform === 'xiaohongshu') {
			
 
				+                try {
			
 
				+                  accountInfo = await this.fetchAccountInfoViaPython(platform, cookies);
			
 
				+                } catch (e) {
			
 
				+                  logger.warn(`[Python API] Failed to fetch account info for ${platform}, falling back to Playwright:`, e);
			
 
				+                  accountInfo = await this.fetchAccountInfoWithPlaywright(platform, cookies);
			
 
				+                }
			
 
				+              } else {
			
 
				+                accountInfo = await this.fetchAccountInfoWithPlaywright(platform, cookies);
			
 
				+              }
			
 
				+
			
 
				               accountInfo.worksList = worksList;
			
 
				               // 直接使用 Python API 获取的作品数量（最准确，排除了已删除/私密视频）
			
 
				               accountInfo.worksCount = worksList.length;