22 horas atrás · 81ce2daf7d
--- a/server/python/platforms/xiaohongshu.py
+++ b/server/python/platforms/xiaohongshu.py
@@ -15,6 +15,9 @@ from .base import (
 
				     BasePublisher, PublishParams, PublishResult,
			
 
				     WorkItem, WorksResult, CommentItem, CommentsResult
			
 
				 )
			
 
				+from playwright.async_api import async_playwright
			
 
				+
			
 
				+
			
 
				 
			
 
				 # 添加 matrix 项目路径，用于导入签名脚本
			
 
				 MATRIX_PATH = Path(__file__).parent.parent.parent.parent / "matrix"
			
@@ -1554,150 +1557,194 @@ class XiaohongshuPublisher(BasePublisher):
 
				             debug_info=debug_info
			
 
				         )
			
 
				     
			
 
				+
			
 
				     async def get_comments(self, cookies: str, work_id: str, cursor: str = "") -> CommentsResult:
			
 
				-        """获取小红书作品评论 - 通过创作者后台评论管理页面"""
			
 
				-        print(f"\n{'='*60}")
			
 
				-        print(f"[{self.platform_name}] 获取作品评论")
			
 
				-        print(f"[{self.platform_name}] work_id={work_id}, cursor={cursor}")
			
 
				-        print(f"{'='*60}")
			
 
				-        
			
 
				-        comments: List[CommentItem] = []
			
 
				-        total = 0
			
 
				+        """
			
 
				+        获取账号下所有作品的评论 —— 完全复刻 get_xiaohongshu_work_comments.py 的7步流程。
			
 
				+        """
			
 
				+        all_comments: List[CommentItem] = []
			
 
				+        total_comments = 0
			
 
				         has_more = False
			
 
				-        next_cursor = ""
			
 
				-        captured_data = {}
			
 
				-        
			
 
				+        browser = None
			
 
				+        print(222222222222222222222222222222222222)
			
 
				+        print(work_id)
			
 
				+
			
 
				         try:
			
 
				-            await self.init_browser()
			
 
				+            # --- Step 1: 初始化浏览器和 Cookie ---
			
 
				             cookie_list = self.parse_cookies(cookies)
			
 
				-            await self.set_cookies(cookie_list)
			
 
				-            
			
 
				-            if not self.page:
			
 
				-                raise Exception("Page not initialized")
			
 
				-            
			
 
				-            # 设置 API 响应监听器
			
 
				-            async def handle_response(response):
			
 
				-                nonlocal captured_data
			
 
				-                url = response.url
			
 
				-                # 监听评论相关 API - 创作者后台和普通页面的 API
			
 
				-                if '/comment/' in url and ('page' in url or 'list' in url):
			
 
				-                    try:
			
 
				-                        json_data = await response.json()
			
 
				-                        print(f"[{self.platform_name}] 捕获到评论 API: {url[:100]}...", flush=True)
			
 
				-                        if json_data.get('success') or json_data.get('code') == 0:
			
 
				-                            data = json_data.get('data', {})
			
 
				-                            comment_list = data.get('comments') or data.get('list') or []
			
 
				-                            if comment_list:
			
 
				-                                captured_data = json_data
			
 
				-                                print(f"[{self.platform_name}] 评论 API 响应成功，comments={len(comment_list)}", flush=True)
			
 
				-                            else:
			
 
				-                                print(f"[{self.platform_name}] 评论 API 响应成功但无评论", flush=True)
			
 
				-                    except Exception as e:
			
 
				-                        print(f"[{self.platform_name}] 解析评论响应失败: {e}", flush=True)
			
 
				-            
			
 
				-            self.page.on('response', handle_response)
			
 
				-            print(f"[{self.platform_name}] 已注册评论 API 响应监听器", flush=True)
			
 
				-            
			
 
				-            # 访问创作者后台评论管理页面
			
 
				-            comment_url = "https://creator.xiaohongshu.com/creator/comment"
			
 
				-            print(f"[{self.platform_name}] 访问评论管理页面: {comment_url}", flush=True)
			
 
				-            await self.page.goto(comment_url, wait_until="domcontentloaded", timeout=30000)
			
 
				-            await asyncio.sleep(5)
			
 
				-            
			
 
				-            # 检查是否被重定向到登录页
			
 
				-            current_url = self.page.url
			
 
				-            print(f"[{self.platform_name}] 当前页面 URL: {current_url}", flush=True)
			
 
				-            if "login" in current_url:
			
 
				-                raise Exception("Cookie 已过期，请重新登录")
			
 
				-            
			
 
				-            # 等待评论加载
			
 
				-            if not captured_data:
			
 
				-                print(f"[{self.platform_name}] 等待评论 API 响应...", flush=True)
			
 
				-                # 尝试滚动页面触发评论加载
			
 
				-                await self.page.evaluate('window.scrollBy(0, 500)')
			
 
				-                await asyncio.sleep(3)
			
 
				-            
			
 
				-            if not captured_data:
			
 
				-                # 再等待一会，可能评论 API 加载较慢
			
 
				-                print(f"[{self.platform_name}] 继续等待评论加载...", flush=True)
			
 
				-                await asyncio.sleep(5)
			
 
				-            
			
 
				-            # 移除监听器
			
 
				-            self.page.remove_listener('response', handle_response)
			
 
				-            
			
 
				-            # 解析评论数据
			
 
				-            if captured_data:
			
 
				-                data = captured_data.get('data', {})
			
 
				-                comment_list = data.get('comments') or data.get('list') or []
			
 
				-                has_more = data.get('has_more', False)
			
 
				-                next_cursor = data.get('cursor', '')
			
 
				-                
			
 
				-                print(f"[{self.platform_name}] 解析评论: has_more={has_more}, comments={len(comment_list)}", flush=True)
			
 
				-                
			
 
				-                for comment in comment_list:
			
 
				-                    cid = comment.get('id', '')
			
 
				-                    if not cid:
			
 
				+            playwright = await async_playwright().start()
			
 
				+            browser = await playwright.chromium.launch(headless=False) 
			
 
				+            context = await browser.new_context(
			
 
				+                viewport={"width": 1400, "height": 900},
			
 
				+                user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
			
 
				+            )
			
 
				+            await context.add_cookies(cookie_list)
			
 
				+            page = await context.new_page()
			
 
				+
			
 
				+            # --- Step 2: 打开小红书主页 ---
			
 
				+            await page.goto("https://www.xiaohongshu.com", wait_until="domcontentloaded")
			
 
				+            await asyncio.sleep(1.5)
			
 
				+
			
 
				+            # --- Step 3: 检查并处理登录弹窗 ---
			
 
				+            try:
			
 
				+                if await page.is_visible(".login-container", timeout=3000):
			
 
				+                    await page.wait_for_selector(".login-container", state="hidden", timeout=120000)
			
 
				+            except Exception as e:
			
 
				+                pass  # 忽略超时，继续执行
			
 
				+
			
 
				+            # --- 提取 User ID ---
			
 
				+            user_id = None
			
 
				+            for cookie in cookie_list:
			
 
				+                if cookie.get('name') == 'x-user-id-creator.xiaohongshu.com':
			
 
				+                    user_id = cookie.get('value')
			
 
				+                    break
			
 
				+            if not user_id:
			
 
				+                raise ValueError("无法从 Cookie 中提取 user_id")
			
 
				+
			
 
				+            # --- Step 4: 跳转到用户主页 ---
			
 
				+            profile_url = f"https://www.xiaohongshu.com/user/profile/{user_id}"
			
 
				+            await page.goto(profile_url, wait_until="domcontentloaded")
			
 
				+            await asyncio.sleep(2)
			
 
				+
			
 
				+            # --- 等待笔记区域加载 ---
			
 
				+            try:
			
 
				+                await page.wait_for_selector("#userPostedFeeds .note-item", timeout=20000)
			
 
				+            except:
			
 
				+                raise Exception("笔记区域未加载，请检查账号是否公开或 Cookie 是否有效")
			
 
				+
			
 
				+            # --- Step 5: 滚动到底部加载全部笔记 ---
			
 
				+            last_height = None
			
 
				+            while True:
			
 
				+                await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
			
 
				+                await asyncio.sleep(2)
			
 
				+                new_height = await page.evaluate("document.body.scrollHeight")
			
 
				+                if new_height == last_height:
			
 
				+                    break
			
 
				+                last_height = new_height
			
 
				+
			
 
				+            # --- 获取所有封面图 ---
			
 
				+            note_imgs = await page.query_selector_all("#userPostedFeeds .note-item .cover img")
			
 
				+            print(f"共找到 {len(note_imgs)} 张封面图")
			
 
				+
			
 
				+            # --- Step 6 & 7: 依次点击封面图，捕获评论并结构化 ---
			
 
				+            for i, img in enumerate(note_imgs):
			
 
				+                try:
			
 
				+                    await img.scroll_into_view_if_needed()
			
 
				+                    await asyncio.sleep(0.5)
			
 
				+
			
 
				+                    comment_resp = None
			
 
				+                    def handle_response(response):
			
 
				+                        nonlocal comment_resp
			
 
				+                        if "edith.xiaohongshu.com/api/sns/web/v2/comment/page" in response.url:
			
 
				+                            comment_resp = response
			
 
				+
			
 
				+                    page.on("response", handle_response)
			
 
				+                    await img.click()
			
 
				+                    await asyncio.sleep(1.5)
			
 
				+                    page.remove_listener("response", handle_response)
			
 
				+
			
 
				+                    if not comment_resp:
			
 
				+                        await page.keyboard.press("Escape")
			
 
				                         continue
			
 
				-                    
			
 
				-                    user_info = comment.get('user_info', {})
			
 
				-                    
			
 
				-                    # 解析子评论
			
 
				-                    replies = []
			
 
				-                    sub_comments = comment.get('sub_comments', []) or []
			
 
				-                    for sub in sub_comments:
			
 
				-                        sub_user = sub.get('user_info', {})
			
 
				-                        replies.append(CommentItem(
			
 
				-                            comment_id=sub.get('id', ''),
			
 
				+
			
 
				+                    json_data = await comment_resp.json()
			
 
				+                    if not (json_data.get("success") or json_data.get("code") == 0):
			
 
				+                        await page.keyboard.press("Escape")
			
 
				+                        continue
			
 
				+
			
 
				+                    data = json_data.get("data", {})
			
 
				+                    raw_comments = data.get("comments", [])
			
 
				+                    note_id = data.get("note_id", "")
			
 
				+
			
 
				+                    for main_cmt in raw_comments:
			
 
				+                        # 主评论
			
 
				+                        user_info = main_cmt.get("user_info", {})
			
 
				+                        all_comments.append(CommentItem(
			
 
				+                            comment_id=main_cmt["id"],
			
 
				+                            parent_comment_id=None,
			
 
				                             work_id=work_id,
			
 
				-                            content=sub.get('content', ''),
			
 
				-                            author_id=sub_user.get('user_id', ''),
			
 
				-                            author_name=sub_user.get('nickname', ''),
			
 
				-                            author_avatar=sub_user.get('image', ''),
			
 
				-                            like_count=sub.get('like_count', 0),
			
 
				-                            create_time=sub.get('create_time', ''),
			
 
				-                        ))
			
 
				-                    
			
 
				-                    comments.append(CommentItem(
			
 
				-                        comment_id=cid,
			
 
				-                        work_id=work_id,
			
 
				-                        content=comment.get('content', ''),
			
 
				-                        author_id=user_info.get('user_id', ''),
			
 
				-                        author_name=user_info.get('nickname', ''),
			
 
				-                        author_avatar=user_info.get('image', ''),
			
 
				-                        like_count=comment.get('like_count', 0),
			
 
				-                        reply_count=comment.get('sub_comment_count', 0),
			
 
				-                        create_time=comment.get('create_time', ''),
			
 
				-                        replies=replies,
			
 
				-                    ))
			
 
				-                
			
 
				-                total = len(comments)
			
 
				-                print(f"[{self.platform_name}] 解析到 {total} 条评论", flush=True)
			
 
				-            else:
			
 
				-                print(f"[{self.platform_name}] 未捕获到评论 API 响应", flush=True)
			
 
				-            
			
 
				+                            content=main_cmt["content"],
			
 
				+                            author_id=user_info.get("user_id", ""),
			
 
				+                            author_name=user_info.get("nickname", ""),
			
 
				+                            author_avatar=user_info.get("image", ""),
			
 
				+                            like_count=int(main_cmt.get("like_count", 0)),
			
 
				+                            reply_count=main_cmt.get("sub_comment_count", 0),
			
 
				+                            create_time=self._timestamp_to_readable(main_cmt.get("create_time", 0)),
			
 
				+                            ))
			
 
				+
			
 
				+                        # 子评论
			
 
				+                        for sub_cmt in main_cmt.get("sub_comments", []):
			
 
				+                            sub_user = sub_cmt.get("user_info", {})
			
 
				+                            all_comments.append(CommentItem(
			
 
				+                                comment_id=sub_cmt["id"],
			
 
				+                                parent_comment_id=main_cmt["id"],
			
 
				+                                work_id=work_id,
			
 
				+                                content=sub_cmt["content"],
			
 
				+                                author_id=sub_user.get("user_id", ""),
			
 
				+                                author_name=sub_user.get("nickname", ""),
			
 
				+                                author_avatar=sub_user.get("image", ""),
			
 
				+                                like_count=int(sub_cmt.get("like_count", 0)),
			
 
				+                                reply_count=0,
			
 
				+                                create_time=self._timestamp_to_readable(sub_cmt.get("create_time", 0)),
			
 
				+                            ))
			
 
				+
			
 
				+                    # 关闭弹窗
			
 
				+                    await page.keyboard.press("Escape")
			
 
				+                    await asyncio.sleep(1)
			
 
				+
			
 
				+                except Exception as e:
			
 
				+                    # 出错也尝试关闭弹窗
			
 
				+                    try:
			
 
				+                        await page.keyboard.press("Escape")
			
 
				+                        await asyncio.sleep(0.5)
			
 
				+                    except:
			
 
				+                        pass
			
 
				+                    continue
			
 
				+
			
 
				+            # --- 返回结果 ---
			
 
				+            total_comments = len(all_comments)
			
 
				+            # return {
			
 
				+            #     'success': True,
			
 
				+            #     'platform': self.platform_name,
			
 
				+            #     'work_comments': all_comments,  # 注意：此处为扁平列表，如需按作品分组可在外层处理
			
 
				+            #     'total': total_comments
			
 
				+            # }
			
 
				+            return CommentsResult(
			
 
				+                success=True, 
			
 
				+                platform=self.platform_name, 
			
 
				+                work_id=work_id, 
			
 
				+                comments=all_comments, 
			
 
				+                total=total_comments,
			
 
				+                has_more=has_more
			
 
				+            )
			
 
				+
			
 
				+
			
 
				         except Exception as e:
			
 
				             import traceback
			
 
				             traceback.print_exc()
			
 
				             return CommentsResult(
			
 
				-                success=False,
			
 
				-                platform=self.platform_name,
			
 
				-                work_id=work_id,
			
 
				-                error=str(e)
			
 
				+                success=True, 
			
 
				+                platform=self.platform_name, 
			
 
				+                work_id=work_id, 
			
 
				+                total=0
			
 
				             )
			
 
				         finally:
			
 
				-            await self.close_browser()
			
 
				-        
			
 
				-        result = CommentsResult(
			
 
				-            success=True,
			
 
				-            platform=self.platform_name,
			
 
				-            work_id=work_id,
			
 
				-            comments=comments,
			
 
				-            total=total,
			
 
				-            has_more=has_more
			
 
				-        )
			
 
				-        result.__dict__['cursor'] = next_cursor
			
 
				-        return result
			
 
				-    
			
 
				+            if browser:
			
 
				+                await browser.close()
			
 
				+
			
 
				+
			
 
				+
			
 
				+    def _timestamp_to_readable(self, ts_ms: int) -> str:
			
 
				+        """将毫秒时间戳转换为可读格式"""
			
 
				+        from datetime import datetime
			
 
				+        if not ts_ms:
			
 
				+            return ""
			
 
				+        try:
			
 
				+            return datetime.fromtimestamp(ts_ms / 1000).strftime("%Y-%m-%d %H:%M:%S")
			
 
				+        except Exception:
			
 
				+            return ""
			
 
				+
			
 
				+
			
 
				     async def get_all_comments(self, cookies: str) -> dict:
			
 
				         """获取所有作品的评论 - 通过评论管理页面"""
			
 
				         print(f"\n{'='*60}")