Ver código fonte

子评论相关代码

swortect 1 dia atrás
pai
commit
a23999ffc3

+ 2 - 0
server/python/platforms/base.py

@@ -84,6 +84,7 @@ class WorkItem:
 class CommentItem:
     """评论数据"""
     comment_id: str
+    parent_comment_id: str
     work_id: str
     content: str
     author_id: str = ""
@@ -98,6 +99,7 @@ class CommentItem:
     def to_dict(self) -> Dict[str, Any]:
         return {
             "comment_id": self.comment_id,
+            "parent_comment_id": self.parent_comment_id,
             "work_id": self.work_id,
             "content": self.content,
             "author_id": self.author_id,

+ 231 - 116
server/python/platforms/weixin.py

@@ -326,7 +326,8 @@ class WeixinPublisher(BasePublisher):
         # 如果没有安装 Chrome,则使用默认 Chromium
         try:
             self.browser = await playwright.chromium.launch(
-                headless=self.headless,
+                # headless=self.headless,
+                headless=False,
                 channel="chrome"  # 使用系统 Chrome
             )
             print(f"[{self.platform_name}] 使用系统 Chrome 浏览器")
@@ -1070,12 +1071,12 @@ class WeixinPublisher(BasePublisher):
             if not self.page:
                 raise Exception("Page not initialized")
             
-            await self.page.goto("https://channels.weixin.qq.com/micro/content/post/list", timeout=30000)
+            await self.page.goto("https://channels.weixin.qq.com/platform/post/list", timeout=30000)
             await asyncio.sleep(3)
             
             current_url = self.page.url
             if "login" in current_url:
-                raise Exception("Cookie 已过期,请重新登录")
+                raise Exception("Cookie 已过期,请重新登录") 
             
             api_url = "https://channels.weixin.qq.com/micro/content/cgi-bin/mmfinderassistant-bin/post/post_list"
             req_body = {
@@ -1102,7 +1103,7 @@ class WeixinPublisher(BasePublisher):
                             headers: {
                                 'Content-Type': 'application/json',
                                 'Accept': '*/*',
-                                'Referer': 'https://channels.weixin.qq.com/micro/content/post/list'
+                                'Referer': 'https://channels.weixin.qq.com/platform/post/list'
                             },
                             body: bodyStr
                         });
@@ -1201,8 +1202,12 @@ class WeixinPublisher(BasePublisher):
         
         return WorksResult(success=True, platform=self.platform_name, works=works, total=total, has_more=has_more, next_page=next_page)
     
+
     async def get_comments(self, cookies: str, work_id: str, cursor: str = "") -> CommentsResult:
-        """获取视频号作品评论"""
+        """
+        获取视频号作品评论(完全参考 get_weixin_work_comments.py 的接口监听逻辑)
+        支持递归提取二级评论,正确处理 parent_comment_id
+        """
         print(f"\n{'='*60}")
         print(f"[{self.platform_name}] 获取作品评论")
         print(f"[{self.platform_name}] work_id={work_id}")
@@ -1221,142 +1226,252 @@ class WeixinPublisher(BasePublisher):
                 raise Exception("Page not initialized")
             
             # 访问评论管理页面
-            await self.page.goto("https://channels.weixin.qq.com/platform/interaction/comment")
-            await asyncio.sleep(3)
+            print(f"[{self.platform_name}] 正在打开评论页面...")
+            await self.page.goto("https://channels.weixin.qq.com/platform/interaction/comment", timeout=30000)
+            await asyncio.sleep(2)
             
             # 检查登录状态
             current_url = self.page.url
             if "login" in current_url:
                 raise Exception("Cookie 已过期,请重新登录")
             
-            # 等待左侧作品列表加载
+            # === 步骤1: 监听 post_list 接口获取作品列表 ===
+            posts = []
             try:
-                await self.page.wait_for_selector('div.comment-feed-wrap', timeout=15000)
-            except:
-                print(f"[{self.platform_name}] 未找到作品列表")
-                return CommentsResult(success=True, platform=self.platform_name, work_id=work_id, comments=[], total=0, has_more=False)
-            
-            print(f"[{self.platform_name}] 查找 work_id={work_id} 对应的作品")
-            
-            # 点击左侧作品项(根据 work_id 匹配)
-            feed_items = self.page.locator('div.comment-feed-wrap')
-            item_count = await feed_items.count()
-            print(f"[{self.platform_name}] 左侧共 {item_count} 个作品")
-            
-            clicked = False
-            for i in range(item_count):
-                feed = feed_items.nth(i)
-                title_el = feed.locator('div.feed-title').first
-                if await title_el.count() > 0:
-                    title_text = await title_el.text_content() or ''
-                    title_text = title_text.strip()
-                    
-                    # 检查是否包含 work_id(标题)
-                    if work_id in title_text or title_text in work_id:
-                        print(f"[{self.platform_name}] 找到匹配作品: {title_text}")
-                        await feed.click()
-                        await asyncio.sleep(2)
-                        clicked = True
-                        break
-            
-            if not clicked:
-                # 如果没找到匹配的,点击第一个
-                print(f"[{self.platform_name}] 未找到匹配作品,点击第一个")
-                if item_count > 0:
-                    await feed_items.nth(0).click()
-                    await asyncio.sleep(2)
+                async with self.page.expect_response(
+                    lambda res: "/post/post_list" in res.url,
+                    timeout=20000
+                ) as post_resp_info:
+                    await self.page.wait_for_selector('.scroll-list .comment-feed-wrap', timeout=15000)
+                
+                post_resp = await post_resp_info.value
+                post_data = await post_resp.json()
+                
+                if post_data.get("errCode") == 0:
+                    posts = post_data.get("data", {}).get("list", [])
+                    print(f"[{self.platform_name}] ✅ 获取 {len(posts)} 个作品")
                 else:
-                    return CommentsResult(success=True, platform=self.platform_name, work_id=work_id, comments=[], total=0, has_more=False)
+                    err_msg = post_data.get("errMsg", "未知错误")
+                    print(f"[{self.platform_name}] ❌ post_list 业务错误: {err_msg}")
+                    return CommentsResult(
+                        success=False, 
+                        platform=self.platform_name, 
+                        work_id=work_id,
+                        error=f"post_list 业务错误: {err_msg}"
+                    )
+            except Exception as e:
+                print(f"[{self.platform_name}] ❌ 获取 post_list 失败: {e}")
+                return CommentsResult(
+                    success=False, 
+                    platform=self.platform_name, 
+                    work_id=work_id,
+                    error=f"获取 post_list 失败: {e}"
+                )
             
-            # 等待右侧评论详情加载
-            try:
-                await self.page.wait_for_selector('div.comment-item', timeout=5000)
-            except:
-                print(f"[{self.platform_name}] 该作品暂无评论")
-                return CommentsResult(success=True, platform=self.platform_name, work_id=work_id, comments=[], total=0, has_more=False)
+            # === 步骤2: 在 DOM 中查找目标作品 ===
+            feed_wraps = await self.page.query_selector_all('.scroll-list .comment-feed-wrap')
+            target_feed = None
+            target_post = None
+            target_index = -1
             
-            # 获取评论总数
-            total_text_el = self.page.locator('div.comment-count__tips')
-            if await total_text_el.count() > 0:
-                total_text = await total_text_el.text_content() or ''
-                # 提取数字(如 "共 1 条评论")
-                import re
-                match = re.search(r'(\d+)', total_text)
-                if match:
-                    total = int(match.group(1))
+            for i, feed in enumerate(feed_wraps):
+                if i >= len(posts):
+                    break
+                
+                post = posts[i]
+                object_nonce = post.get("objectNonce", "")
+                post_work_id = post.get("objectId", "") or object_nonce
+                
+                # 匹配 work_id(支持 objectId 或 objectNonce 匹配)
+                if work_id in [post_work_id, object_nonce] or post_work_id in work_id or object_nonce in work_id:
+                    target_feed = feed
+                    target_post = post
+                    target_index = i
+                    work_title = post.get("desc", {}).get("description", "无标题")
+                    print(f"[{self.platform_name}] ✅ 找到目标作品: {work_title}")
+                    break
             
-            print(f"[{self.platform_name}] 评论总数: {total}")
+            if not target_feed or not target_post:
+                print(f"[{self.platform_name}] ❌ 未找到 work_id={work_id} 对应的作品")
+                return CommentsResult(
+                    success=True, 
+                    platform=self.platform_name, 
+                    work_id=work_id,
+                    comments=[],
+                    total=0,
+                    has_more=False
+                )
             
-            # 获取右侧评论列表
-            comment_items = self.page.locator('div.comment-item')
-            item_count = await comment_items.count()
+            # 准备作品信息(用于递归函数)
+            object_nonce = target_post.get("objectNonce", f"nonce_{target_index}")
+            work_title = target_post.get("desc", {}).get("description", f"作品{target_index+1}")
             
-            print(f"[{self.platform_name}] 当前加载 {item_count} 条评论")
+            work_info = {
+                "work_id": object_nonce,
+                "work_title": work_title
+            }
             
-            for i in range(item_count):
-                try:
-                    item = comment_items.nth(i)
-                    
-                    # 获取作者昵称(加 .first 防 strict mode)
-                    author_name = ''
-                    name_el = item.locator('span.comment-user-name').first
-                    if await name_el.count() > 0:
-                        author_name = await name_el.text_content() or ''
-                        author_name = author_name.strip()
-                    
-                    # 获取头像
-                    author_avatar = ''
-                    avatar_el = item.locator('img.comment-avatar').first
-                    if await avatar_el.count() > 0:
-                        author_avatar = await avatar_el.get_attribute('src') or ''
-                    
-                    # 获取评论内容(加 .first 防 strict mode)
-                    content = ''
-                    content_el = item.locator('span.comment-content').first
-                    if await content_el.count() > 0:
-                        content = await content_el.text_content() or ''
-                        content = content.strip()
-                    
-                    # 获取评论时间(加 .first 防 strict mode)
-                    create_time = ''
-                    time_el = item.locator('span.comment-time').first
-                    if await time_el.count() > 0:
-                        create_time = await time_el.text_content() or ''
-                        create_time = create_time.strip()
-                    
-                    if not content:
-                        continue
-                    
-                    # 生成评论 ID
-                    comment_id = f"weixin_comment_{i}_{abs(hash(content))}"
+            # === 步骤3: 点击作品触发 comment_list 接口 ===
+            content_wrap = await target_feed.query_selector('.feed-content') or target_feed
+            
+            try:
+                async with self.page.expect_response(
+                    lambda res: "/comment/comment_list" in res.url,
+                    timeout=15000
+                ) as comment_resp_info:
+                    await content_wrap.click()
+                    await asyncio.sleep(0.8)
+                
+                comment_resp = await comment_resp_info.value
+                comment_data = await comment_resp.json()
+                
+                if comment_data.get("errCode") != 0:
+                    err_msg = comment_data.get("errMsg", "未知错误")
+                    print(f"[{self.platform_name}] ❌ 评论接口错误: {err_msg}")
+                    return CommentsResult(
+                        success=False, 
+                        platform=self.platform_name, 
+                        work_id=work_id,
+                        error=f"评论接口错误: {err_msg}"
+                    )
+                
+                raw_comments = comment_data.get("data", {}).get("comment", [])
+                total = comment_data.get("data", {}).get("totalCount", len(raw_comments))
+                
+                print(f"[{self.platform_name}] 📊 原始评论数: {len(raw_comments)}, 总数: {total}")
+                
+                # === 步骤4: 递归提取所有评论(含子评论)===
+                extracted = self._extract_comments(raw_comments, parent_id="", work_info=work_info)
+                
+                # === 步骤5: 转换为 CommentItem 列表(保留 weixin.py 的数据结构)===
+                for c in extracted:
+                    # 使用接口返回的 comment_id
+                    comment_id = c.get("comment_id", "")
+                    parent_comment_id = c.get("parent_comment_id", "")
                     
-                    comments.append(CommentItem(
+                    # 构建 CommentItem(保留原有数据结构用于数据库入库)
+                    comment_item = CommentItem(
                         comment_id=comment_id,
+                        parent_comment_id=parent_comment_id,
                         work_id=work_id,
-                        content=content,
-                        author_id='',
-                        author_name=author_name,
-                        author_avatar=author_avatar,
-                        like_count=0,
+                        content=c.get("content", ""),
+                        author_id=c.get("username", ""),  # 使用 username 作为 author_id
+                        author_name=c.get("nickname", ""),
+                        author_avatar=c.get("avatar", ""),
+                        like_count=c.get("like_count", 0),
                         reply_count=0,
-                        create_time=create_time,
-                    ))
+                        create_time=c.get("create_time", ""),
+                    )
                     
-                    print(f"[{self.platform_name}] 评论 {i+1}: {author_name} - {content[:20]}...")
+                    # 添加扩展字段(用于数据库存储和后续处理)
+                    # comment_item.parent_comment_id = c.get("parent_comment_id", "")
+                    comment_item.is_author = c.get("is_author", False)
+                    comment_item.create_time_unix = c.get("create_time_unix", 0)
+                    comment_item.work_title = c.get("work_title", "")
+                    print(comment_item)
+                    comments.append(comment_item) 
                     
-                except Exception as e:
-                    print(f"[{self.platform_name}] 解析评论 {i} 失败: {e}")
-                    continue
-            
-            print(f"[{self.platform_name}] 成功获取 {len(comments)} 条评论")
+                    # 打印日志
+                    author_tag = " 👤(作者)" if c.get("is_author") else ""
+                    parent_tag = f" [回复: {c.get('parent_comment_id', '')}]" if c.get("parent_comment_id") else ""
+                    print(f"[{self.platform_name}]   - [{c.get('nickname', '')}] {c.get('content', '')[:30]}... "
+                          f"({c.get('create_time', '')}){author_tag}{parent_tag}")
+                
+                # 判断是否还有更多(优先使用接口返回的 continueFlag,否则根据数量判断)
+                has_more = comment_data.get("data", {}).get("continueFlag", False) or len(extracted) < total
+                
+                print(f"[{self.platform_name}] ✅ 共提取 {len(comments)} 条评论(含子评论)")
+                
+            except Exception as e:
+                print(f"[{self.platform_name}] ❌ 获取评论失败: {e}")
+                import traceback
+                traceback.print_exc()
+                return CommentsResult(
+                    success=False, 
+                    platform=self.platform_name, 
+                    work_id=work_id,
+                    error=f"获取评论失败: {e}"
+                )
             
         except Exception as e:
             import traceback
             traceback.print_exc()
-            return CommentsResult(success=False, platform=self.platform_name, work_id=work_id, error=str(e))
+            return CommentsResult(
+                success=False, 
+                platform=self.platform_name, 
+                work_id=work_id, 
+                error=str(e)
+            )
         
-        return CommentsResult(success=True, platform=self.platform_name, work_id=work_id, comments=comments, total=total, has_more=has_more)
-    
+        return CommentsResult(
+            success=True, 
+            platform=self.platform_name, 
+            work_id=work_id, 
+            comments=comments, 
+            total=total, 
+            has_more=has_more
+        )
+
+    def _extract_comments(self, comment_list: list, parent_id: str = "", work_info: dict = None) -> list:
+        """
+        递归提取一级和二级评论(完全参考 get_weixin_work_comments.py 的 extract_comments 函数)
+        
+        Args:
+            comment_list: 评论列表(原始接口数据)
+            parent_id: 父评论ID(一级评论为空字符串"",二级评论为父级评论ID)
+            work_info: 作品信息字典
+        
+        Returns:
+            list: 扁平化的评论列表,包含一级和二级评论
+        """
+        result = []
+        
+        # 获取当前用户 username(用于判断是否为作者)
+        # 优先从环境变量获取,也可通过其他方式配置
+        my_username = getattr(self, 'my_username', '') or os.environ.get('WEIXIN_MY_USERNAME', '')
+        
+        for cmt in comment_list:
+            # 处理时间戳
+            create_ts = int(cmt.get("commentCreatetime", 0) or 0)
+            readable_time = (
+                datetime.fromtimestamp(create_ts).strftime('%Y-%m-%d %H:%M:%S')
+                if create_ts > 0 else ""
+            )
+            
+            # 判断是否作者(如果配置了 my_username)
+            username = cmt.get("username", "") or ""
+            is_author = (my_username != "") and (username == my_username)
+            
+            # 构建评论条目 - 完全参考 get_weixin_work_comments.py 的字段
+            entry = {
+                "work_id": work_info.get("work_id", "") if work_info else "",
+                "work_title": work_info.get("work_title", "") if work_info else "",
+                "comment_id": cmt.get("commentId"),
+                "parent_comment_id": parent_id,  # 关键:一级评论为空字符串"",二级评论为父评论ID
+                "username": username,
+                "nickname": cmt.get("commentNickname", ""),
+                "avatar": cmt.get("commentHeadurl", ""),
+                "content": cmt.get("commentContent", ""),
+                "create_time_unix": create_ts,
+                "create_time": readable_time,
+                "is_author": is_author,
+                "like_count": cmt.get("commentLikeCount", 0) or 0
+            }
+            result.append(entry)
+            
+            # 递归处理二级评论(levelTwoComment)
+            # 关键:二级评论的 parent_id 应该是当前这条评论的 comment_id
+            level_two = cmt.get("levelTwoComment", []) or []
+            if level_two and isinstance(level_two, list) and len(level_two) > 0:
+                # 当前评论的 ID 作为其子评论的 parent_id
+                current_comment_id = cmt.get("commentId", "")
+                result.extend(
+                    self._extract_comments(level_two, parent_id=current_comment_id, work_info=work_info)
+                )
+        
+        return result
+
+
     async def auto_reply_private_messages(self, cookies: str) -> dict:
         """自动回复私信 - 集成自 pw3.py"""
         print(f"\n{'='*60}")

+ 3 - 0
server/src/services/CommentService.ts

@@ -283,6 +283,8 @@ export class CommentService {
         } else if (account.platform === 'weixin_video') {
           workComments = await headlessBrowserService.fetchWeixinVideoCommentsViaApi(cookies);
         }
+
+
         
         // 获取该账号的所有作品,用于关联
         const workRepository = AppDataSource.getRepository(Work);
@@ -447,6 +449,7 @@ export class CommentService {
                   platform: account.platform as PlatformType,
                   videoId: workComment.videoId,
                   commentId: comment.commentId,
+                  parentCommentId: comment.parentCommentId,
                   authorId: comment.authorId,
                   authorName: comment.authorName,
                   authorAvatar: comment.authorAvatar,

+ 2 - 0
server/src/services/HeadlessBrowserService.ts

@@ -3984,6 +3984,7 @@ class HeadlessBrowserService {
 
         const comments: CommentItem[] = (commentsResult.comments || []).map((c: {
           comment_id: string;
+          parent_comment_id: string;
           author_id: string;
           author_name: string;
           author_avatar: string;
@@ -3993,6 +3994,7 @@ class HeadlessBrowserService {
           reply_count?: number;
         }) => ({
           commentId: c.comment_id,
+          parentCommentId: c.parent_comment_id,
           authorId: c.author_id,
           authorName: c.author_name,
           authorAvatar: c.author_avatar,