|
|
@@ -326,7 +326,8 @@ class WeixinPublisher(BasePublisher):
|
|
|
# 如果没有安装 Chrome,则使用默认 Chromium
|
|
|
try:
|
|
|
self.browser = await playwright.chromium.launch(
|
|
|
- headless=self.headless,
|
|
|
+ # headless=self.headless,
|
|
|
+ headless=False,
|
|
|
channel="chrome" # 使用系统 Chrome
|
|
|
)
|
|
|
print(f"[{self.platform_name}] 使用系统 Chrome 浏览器")
|
|
|
@@ -1070,12 +1071,12 @@ class WeixinPublisher(BasePublisher):
|
|
|
if not self.page:
|
|
|
raise Exception("Page not initialized")
|
|
|
|
|
|
- await self.page.goto("https://channels.weixin.qq.com/micro/content/post/list", timeout=30000)
|
|
|
+ await self.page.goto("https://channels.weixin.qq.com/platform/post/list", timeout=30000)
|
|
|
await asyncio.sleep(3)
|
|
|
|
|
|
current_url = self.page.url
|
|
|
if "login" in current_url:
|
|
|
- raise Exception("Cookie 已过期,请重新登录")
|
|
|
+ raise Exception("Cookie 已过期,请重新登录")
|
|
|
|
|
|
api_url = "https://channels.weixin.qq.com/micro/content/cgi-bin/mmfinderassistant-bin/post/post_list"
|
|
|
req_body = {
|
|
|
@@ -1102,7 +1103,7 @@ class WeixinPublisher(BasePublisher):
|
|
|
headers: {
|
|
|
'Content-Type': 'application/json',
|
|
|
'Accept': '*/*',
|
|
|
- 'Referer': 'https://channels.weixin.qq.com/micro/content/post/list'
|
|
|
+ 'Referer': 'https://channels.weixin.qq.com/platform/post/list'
|
|
|
},
|
|
|
body: bodyStr
|
|
|
});
|
|
|
@@ -1201,8 +1202,12 @@ class WeixinPublisher(BasePublisher):
|
|
|
|
|
|
return WorksResult(success=True, platform=self.platform_name, works=works, total=total, has_more=has_more, next_page=next_page)
|
|
|
|
|
|
+
|
|
|
async def get_comments(self, cookies: str, work_id: str, cursor: str = "") -> CommentsResult:
|
|
|
- """获取视频号作品评论"""
|
|
|
+ """
|
|
|
+ 获取视频号作品评论(完全参考 get_weixin_work_comments.py 的接口监听逻辑)
|
|
|
+ 支持递归提取二级评论,正确处理 parent_comment_id
|
|
|
+ """
|
|
|
print(f"\n{'='*60}")
|
|
|
print(f"[{self.platform_name}] 获取作品评论")
|
|
|
print(f"[{self.platform_name}] work_id={work_id}")
|
|
|
@@ -1221,142 +1226,252 @@ class WeixinPublisher(BasePublisher):
|
|
|
raise Exception("Page not initialized")
|
|
|
|
|
|
# 访问评论管理页面
|
|
|
- await self.page.goto("https://channels.weixin.qq.com/platform/interaction/comment")
|
|
|
- await asyncio.sleep(3)
|
|
|
+ print(f"[{self.platform_name}] 正在打开评论页面...")
|
|
|
+ await self.page.goto("https://channels.weixin.qq.com/platform/interaction/comment", timeout=30000)
|
|
|
+ await asyncio.sleep(2)
|
|
|
|
|
|
# 检查登录状态
|
|
|
current_url = self.page.url
|
|
|
if "login" in current_url:
|
|
|
raise Exception("Cookie 已过期,请重新登录")
|
|
|
|
|
|
- # 等待左侧作品列表加载
|
|
|
+ # === 步骤1: 监听 post_list 接口获取作品列表 ===
|
|
|
+ posts = []
|
|
|
try:
|
|
|
- await self.page.wait_for_selector('div.comment-feed-wrap', timeout=15000)
|
|
|
- except:
|
|
|
- print(f"[{self.platform_name}] 未找到作品列表")
|
|
|
- return CommentsResult(success=True, platform=self.platform_name, work_id=work_id, comments=[], total=0, has_more=False)
|
|
|
-
|
|
|
- print(f"[{self.platform_name}] 查找 work_id={work_id} 对应的作品")
|
|
|
-
|
|
|
- # 点击左侧作品项(根据 work_id 匹配)
|
|
|
- feed_items = self.page.locator('div.comment-feed-wrap')
|
|
|
- item_count = await feed_items.count()
|
|
|
- print(f"[{self.platform_name}] 左侧共 {item_count} 个作品")
|
|
|
-
|
|
|
- clicked = False
|
|
|
- for i in range(item_count):
|
|
|
- feed = feed_items.nth(i)
|
|
|
- title_el = feed.locator('div.feed-title').first
|
|
|
- if await title_el.count() > 0:
|
|
|
- title_text = await title_el.text_content() or ''
|
|
|
- title_text = title_text.strip()
|
|
|
-
|
|
|
- # 检查是否包含 work_id(标题)
|
|
|
- if work_id in title_text or title_text in work_id:
|
|
|
- print(f"[{self.platform_name}] 找到匹配作品: {title_text}")
|
|
|
- await feed.click()
|
|
|
- await asyncio.sleep(2)
|
|
|
- clicked = True
|
|
|
- break
|
|
|
-
|
|
|
- if not clicked:
|
|
|
- # 如果没找到匹配的,点击第一个
|
|
|
- print(f"[{self.platform_name}] 未找到匹配作品,点击第一个")
|
|
|
- if item_count > 0:
|
|
|
- await feed_items.nth(0).click()
|
|
|
- await asyncio.sleep(2)
|
|
|
+ async with self.page.expect_response(
|
|
|
+ lambda res: "/post/post_list" in res.url,
|
|
|
+ timeout=20000
|
|
|
+ ) as post_resp_info:
|
|
|
+ await self.page.wait_for_selector('.scroll-list .comment-feed-wrap', timeout=15000)
|
|
|
+
|
|
|
+ post_resp = await post_resp_info.value
|
|
|
+ post_data = await post_resp.json()
|
|
|
+
|
|
|
+ if post_data.get("errCode") == 0:
|
|
|
+ posts = post_data.get("data", {}).get("list", [])
|
|
|
+ print(f"[{self.platform_name}] ✅ 获取 {len(posts)} 个作品")
|
|
|
else:
|
|
|
- return CommentsResult(success=True, platform=self.platform_name, work_id=work_id, comments=[], total=0, has_more=False)
|
|
|
+ err_msg = post_data.get("errMsg", "未知错误")
|
|
|
+ print(f"[{self.platform_name}] ❌ post_list 业务错误: {err_msg}")
|
|
|
+ return CommentsResult(
|
|
|
+ success=False,
|
|
|
+ platform=self.platform_name,
|
|
|
+ work_id=work_id,
|
|
|
+ error=f"post_list 业务错误: {err_msg}"
|
|
|
+ )
|
|
|
+ except Exception as e:
|
|
|
+ print(f"[{self.platform_name}] ❌ 获取 post_list 失败: {e}")
|
|
|
+ return CommentsResult(
|
|
|
+ success=False,
|
|
|
+ platform=self.platform_name,
|
|
|
+ work_id=work_id,
|
|
|
+ error=f"获取 post_list 失败: {e}"
|
|
|
+ )
|
|
|
|
|
|
- # 等待右侧评论详情加载
|
|
|
- try:
|
|
|
- await self.page.wait_for_selector('div.comment-item', timeout=5000)
|
|
|
- except:
|
|
|
- print(f"[{self.platform_name}] 该作品暂无评论")
|
|
|
- return CommentsResult(success=True, platform=self.platform_name, work_id=work_id, comments=[], total=0, has_more=False)
|
|
|
+ # === 步骤2: 在 DOM 中查找目标作品 ===
|
|
|
+ feed_wraps = await self.page.query_selector_all('.scroll-list .comment-feed-wrap')
|
|
|
+ target_feed = None
|
|
|
+ target_post = None
|
|
|
+ target_index = -1
|
|
|
|
|
|
- # 获取评论总数
|
|
|
- total_text_el = self.page.locator('div.comment-count__tips')
|
|
|
- if await total_text_el.count() > 0:
|
|
|
- total_text = await total_text_el.text_content() or ''
|
|
|
- # 提取数字(如 "共 1 条评论")
|
|
|
- import re
|
|
|
- match = re.search(r'(\d+)', total_text)
|
|
|
- if match:
|
|
|
- total = int(match.group(1))
|
|
|
+ for i, feed in enumerate(feed_wraps):
|
|
|
+ if i >= len(posts):
|
|
|
+ break
|
|
|
+
|
|
|
+ post = posts[i]
|
|
|
+ object_nonce = post.get("objectNonce", "")
|
|
|
+ post_work_id = post.get("objectId", "") or object_nonce
|
|
|
+
|
|
|
+ # 匹配 work_id(支持 objectId 或 objectNonce 匹配)
|
|
|
+ if work_id in [post_work_id, object_nonce] or post_work_id in work_id or object_nonce in work_id:
|
|
|
+ target_feed = feed
|
|
|
+ target_post = post
|
|
|
+ target_index = i
|
|
|
+ work_title = post.get("desc", {}).get("description", "无标题")
|
|
|
+ print(f"[{self.platform_name}] ✅ 找到目标作品: {work_title}")
|
|
|
+ break
|
|
|
|
|
|
- print(f"[{self.platform_name}] 评论总数: {total}")
|
|
|
+ if not target_feed or not target_post:
|
|
|
+ print(f"[{self.platform_name}] ❌ 未找到 work_id={work_id} 对应的作品")
|
|
|
+ return CommentsResult(
|
|
|
+ success=True,
|
|
|
+ platform=self.platform_name,
|
|
|
+ work_id=work_id,
|
|
|
+ comments=[],
|
|
|
+ total=0,
|
|
|
+ has_more=False
|
|
|
+ )
|
|
|
|
|
|
- # 获取右侧评论列表
|
|
|
- comment_items = self.page.locator('div.comment-item')
|
|
|
- item_count = await comment_items.count()
|
|
|
+ # 准备作品信息(用于递归函数)
|
|
|
+ object_nonce = target_post.get("objectNonce", f"nonce_{target_index}")
|
|
|
+ work_title = target_post.get("desc", {}).get("description", f"作品{target_index+1}")
|
|
|
|
|
|
- print(f"[{self.platform_name}] 当前加载 {item_count} 条评论")
|
|
|
+ work_info = {
|
|
|
+ "work_id": object_nonce,
|
|
|
+ "work_title": work_title
|
|
|
+ }
|
|
|
|
|
|
- for i in range(item_count):
|
|
|
- try:
|
|
|
- item = comment_items.nth(i)
|
|
|
-
|
|
|
- # 获取作者昵称(加 .first 防 strict mode)
|
|
|
- author_name = ''
|
|
|
- name_el = item.locator('span.comment-user-name').first
|
|
|
- if await name_el.count() > 0:
|
|
|
- author_name = await name_el.text_content() or ''
|
|
|
- author_name = author_name.strip()
|
|
|
-
|
|
|
- # 获取头像
|
|
|
- author_avatar = ''
|
|
|
- avatar_el = item.locator('img.comment-avatar').first
|
|
|
- if await avatar_el.count() > 0:
|
|
|
- author_avatar = await avatar_el.get_attribute('src') or ''
|
|
|
-
|
|
|
- # 获取评论内容(加 .first 防 strict mode)
|
|
|
- content = ''
|
|
|
- content_el = item.locator('span.comment-content').first
|
|
|
- if await content_el.count() > 0:
|
|
|
- content = await content_el.text_content() or ''
|
|
|
- content = content.strip()
|
|
|
-
|
|
|
- # 获取评论时间(加 .first 防 strict mode)
|
|
|
- create_time = ''
|
|
|
- time_el = item.locator('span.comment-time').first
|
|
|
- if await time_el.count() > 0:
|
|
|
- create_time = await time_el.text_content() or ''
|
|
|
- create_time = create_time.strip()
|
|
|
-
|
|
|
- if not content:
|
|
|
- continue
|
|
|
-
|
|
|
- # 生成评论 ID
|
|
|
- comment_id = f"weixin_comment_{i}_{abs(hash(content))}"
|
|
|
+ # === 步骤3: 点击作品触发 comment_list 接口 ===
|
|
|
+ content_wrap = await target_feed.query_selector('.feed-content') or target_feed
|
|
|
+
|
|
|
+ try:
|
|
|
+ async with self.page.expect_response(
|
|
|
+ lambda res: "/comment/comment_list" in res.url,
|
|
|
+ timeout=15000
|
|
|
+ ) as comment_resp_info:
|
|
|
+ await content_wrap.click()
|
|
|
+ await asyncio.sleep(0.8)
|
|
|
+
|
|
|
+ comment_resp = await comment_resp_info.value
|
|
|
+ comment_data = await comment_resp.json()
|
|
|
+
|
|
|
+ if comment_data.get("errCode") != 0:
|
|
|
+ err_msg = comment_data.get("errMsg", "未知错误")
|
|
|
+ print(f"[{self.platform_name}] ❌ 评论接口错误: {err_msg}")
|
|
|
+ return CommentsResult(
|
|
|
+ success=False,
|
|
|
+ platform=self.platform_name,
|
|
|
+ work_id=work_id,
|
|
|
+ error=f"评论接口错误: {err_msg}"
|
|
|
+ )
|
|
|
+
|
|
|
+ raw_comments = comment_data.get("data", {}).get("comment", [])
|
|
|
+ total = comment_data.get("data", {}).get("totalCount", len(raw_comments))
|
|
|
+
|
|
|
+ print(f"[{self.platform_name}] 📊 原始评论数: {len(raw_comments)}, 总数: {total}")
|
|
|
+
|
|
|
+ # === 步骤4: 递归提取所有评论(含子评论)===
|
|
|
+ extracted = self._extract_comments(raw_comments, parent_id="", work_info=work_info)
|
|
|
+
|
|
|
+ # === 步骤5: 转换为 CommentItem 列表(保留 weixin.py 的数据结构)===
|
|
|
+ for c in extracted:
|
|
|
+ # 使用接口返回的 comment_id
|
|
|
+ comment_id = c.get("comment_id", "")
|
|
|
+ parent_comment_id = c.get("parent_comment_id", "")
|
|
|
|
|
|
- comments.append(CommentItem(
|
|
|
+ # 构建 CommentItem(保留原有数据结构用于数据库入库)
|
|
|
+ comment_item = CommentItem(
|
|
|
comment_id=comment_id,
|
|
|
+ parent_comment_id=parent_comment_id,
|
|
|
work_id=work_id,
|
|
|
- content=content,
|
|
|
- author_id='',
|
|
|
- author_name=author_name,
|
|
|
- author_avatar=author_avatar,
|
|
|
- like_count=0,
|
|
|
+ content=c.get("content", ""),
|
|
|
+ author_id=c.get("username", ""), # 使用 username 作为 author_id
|
|
|
+ author_name=c.get("nickname", ""),
|
|
|
+ author_avatar=c.get("avatar", ""),
|
|
|
+ like_count=c.get("like_count", 0),
|
|
|
reply_count=0,
|
|
|
- create_time=create_time,
|
|
|
- ))
|
|
|
+ create_time=c.get("create_time", ""),
|
|
|
+ )
|
|
|
|
|
|
- print(f"[{self.platform_name}] 评论 {i+1}: {author_name} - {content[:20]}...")
|
|
|
+ # 添加扩展字段(用于数据库存储和后续处理)
|
|
|
+ # comment_item.parent_comment_id = c.get("parent_comment_id", "")
|
|
|
+ comment_item.is_author = c.get("is_author", False)
|
|
|
+ comment_item.create_time_unix = c.get("create_time_unix", 0)
|
|
|
+ comment_item.work_title = c.get("work_title", "")
|
|
|
+ print(comment_item)
|
|
|
+ comments.append(comment_item)
|
|
|
|
|
|
- except Exception as e:
|
|
|
- print(f"[{self.platform_name}] 解析评论 {i} 失败: {e}")
|
|
|
- continue
|
|
|
-
|
|
|
- print(f"[{self.platform_name}] 成功获取 {len(comments)} 条评论")
|
|
|
+ # 打印日志
|
|
|
+ author_tag = " 👤(作者)" if c.get("is_author") else ""
|
|
|
+ parent_tag = f" [回复: {c.get('parent_comment_id', '')}]" if c.get("parent_comment_id") else ""
|
|
|
+ print(f"[{self.platform_name}] - [{c.get('nickname', '')}] {c.get('content', '')[:30]}... "
|
|
|
+ f"({c.get('create_time', '')}){author_tag}{parent_tag}")
|
|
|
+
|
|
|
+ # 判断是否还有更多(优先使用接口返回的 continueFlag,否则根据数量判断)
|
|
|
+ has_more = comment_data.get("data", {}).get("continueFlag", False) or len(extracted) < total
|
|
|
+
|
|
|
+ print(f"[{self.platform_name}] ✅ 共提取 {len(comments)} 条评论(含子评论)")
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ print(f"[{self.platform_name}] ❌ 获取评论失败: {e}")
|
|
|
+ import traceback
|
|
|
+ traceback.print_exc()
|
|
|
+ return CommentsResult(
|
|
|
+ success=False,
|
|
|
+ platform=self.platform_name,
|
|
|
+ work_id=work_id,
|
|
|
+ error=f"获取评论失败: {e}"
|
|
|
+ )
|
|
|
|
|
|
except Exception as e:
|
|
|
import traceback
|
|
|
traceback.print_exc()
|
|
|
- return CommentsResult(success=False, platform=self.platform_name, work_id=work_id, error=str(e))
|
|
|
+ return CommentsResult(
|
|
|
+ success=False,
|
|
|
+ platform=self.platform_name,
|
|
|
+ work_id=work_id,
|
|
|
+ error=str(e)
|
|
|
+ )
|
|
|
|
|
|
- return CommentsResult(success=True, platform=self.platform_name, work_id=work_id, comments=comments, total=total, has_more=has_more)
|
|
|
-
|
|
|
+ return CommentsResult(
|
|
|
+ success=True,
|
|
|
+ platform=self.platform_name,
|
|
|
+ work_id=work_id,
|
|
|
+ comments=comments,
|
|
|
+ total=total,
|
|
|
+ has_more=has_more
|
|
|
+ )
|
|
|
+
|
|
|
+ def _extract_comments(self, comment_list: list, parent_id: str = "", work_info: dict = None) -> list:
|
|
|
+ """
|
|
|
+ 递归提取一级和二级评论(完全参考 get_weixin_work_comments.py 的 extract_comments 函数)
|
|
|
+
|
|
|
+ Args:
|
|
|
+ comment_list: 评论列表(原始接口数据)
|
|
|
+ parent_id: 父评论ID(一级评论为空字符串"",二级评论为父级评论ID)
|
|
|
+ work_info: 作品信息字典
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ list: 扁平化的评论列表,包含一级和二级评论
|
|
|
+ """
|
|
|
+ result = []
|
|
|
+
|
|
|
+ # 获取当前用户 username(用于判断是否为作者)
|
|
|
+ # 优先从环境变量获取,也可通过其他方式配置
|
|
|
+ my_username = getattr(self, 'my_username', '') or os.environ.get('WEIXIN_MY_USERNAME', '')
|
|
|
+
|
|
|
+ for cmt in comment_list:
|
|
|
+ # 处理时间戳
|
|
|
+ create_ts = int(cmt.get("commentCreatetime", 0) or 0)
|
|
|
+ readable_time = (
|
|
|
+ datetime.fromtimestamp(create_ts).strftime('%Y-%m-%d %H:%M:%S')
|
|
|
+ if create_ts > 0 else ""
|
|
|
+ )
|
|
|
+
|
|
|
+ # 判断是否作者(如果配置了 my_username)
|
|
|
+ username = cmt.get("username", "") or ""
|
|
|
+ is_author = (my_username != "") and (username == my_username)
|
|
|
+
|
|
|
+ # 构建评论条目 - 完全参考 get_weixin_work_comments.py 的字段
|
|
|
+ entry = {
|
|
|
+ "work_id": work_info.get("work_id", "") if work_info else "",
|
|
|
+ "work_title": work_info.get("work_title", "") if work_info else "",
|
|
|
+ "comment_id": cmt.get("commentId"),
|
|
|
+ "parent_comment_id": parent_id, # 关键:一级评论为空字符串"",二级评论为父评论ID
|
|
|
+ "username": username,
|
|
|
+ "nickname": cmt.get("commentNickname", ""),
|
|
|
+ "avatar": cmt.get("commentHeadurl", ""),
|
|
|
+ "content": cmt.get("commentContent", ""),
|
|
|
+ "create_time_unix": create_ts,
|
|
|
+ "create_time": readable_time,
|
|
|
+ "is_author": is_author,
|
|
|
+ "like_count": cmt.get("commentLikeCount", 0) or 0
|
|
|
+ }
|
|
|
+ result.append(entry)
|
|
|
+
|
|
|
+ # 递归处理二级评论(levelTwoComment)
|
|
|
+ # 关键:二级评论的 parent_id 应该是当前这条评论的 comment_id
|
|
|
+ level_two = cmt.get("levelTwoComment", []) or []
|
|
|
+ if level_two and isinstance(level_two, list) and len(level_two) > 0:
|
|
|
+ # 当前评论的 ID 作为其子评论的 parent_id
|
|
|
+ current_comment_id = cmt.get("commentId", "")
|
|
|
+ result.extend(
|
|
|
+ self._extract_comments(level_two, parent_id=current_comment_id, work_info=work_info)
|
|
|
+ )
|
|
|
+
|
|
|
+ return result
|
|
|
+
|
|
|
+
|
|
|
async def auto_reply_private_messages(self, cookies: str) -> dict:
|
|
|
"""自动回复私信 - 集成自 pw3.py"""
|
|
|
print(f"\n{'='*60}")
|