|
|
@@ -3314,11 +3314,197 @@ class BaijiahaoPublisher(BasePublisher):
|
|
|
return await super().check_login_status(cookies)
|
|
|
|
|
|
async def get_comments(self, cookies: str, work_id: str, cursor: str = "") -> CommentsResult:
|
|
|
- """获取百家号作品评论"""
|
|
|
- # TODO: 实现评论获取逻辑
|
|
|
+ """
|
|
|
+ 获取百家号作品评论
|
|
|
+ 参考 weixin.py 中同名逻辑,但百家号当前评论页仅提供「所有评论流」视图,
|
|
|
+ 且页面 DOM 结构为(外层关键节点):
|
|
|
+ div.client_pages_newComment_comment_all_list > div.cheetah-ui-pro-scroll-view >
|
|
|
+ div.list-container > div.client_pages_newComment_comment_all_listItem
|
|
|
+
|
|
|
+ 暂时按「按作品标题模糊匹配」的方式,筛选属于指定 work_id 对应作品标题的评论;
|
|
|
+ 若无法可靠匹配,则先返回该页面上的所有评论记录。
|
|
|
+ """
|
|
|
+ print(f"\n{'='*60}")
|
|
|
+ print(f"[{self.platform_name}] 获取作品评论")
|
|
|
+ print(f"[{self.platform_name}] work_id={work_id}")
|
|
|
+ print(f"{'='*60}")
|
|
|
+
|
|
|
+ comments: List[CommentItem] = []
|
|
|
+ total = 0
|
|
|
+ has_more = False
|
|
|
+
|
|
|
+ try:
|
|
|
+ # 启动浏览器并设置 Cookie(复用 BasePublisher 逻辑)
|
|
|
+ await self.init_browser()
|
|
|
+ cookie_list = self.parse_cookies(cookies)
|
|
|
+ await self.set_cookies(cookie_list)
|
|
|
+
|
|
|
+ if not self.page:
|
|
|
+ raise Exception("Page not initialized")
|
|
|
+
|
|
|
+ page = self.page
|
|
|
+
|
|
|
+ # 打开评论管理页(全量评论列表)
|
|
|
+ comment_url = "https://baijiahao.baidu.com/builder/rc/commentmanage/comment/all"
|
|
|
+ print(f"[{self.platform_name}] 正在打开评论页面: {comment_url}")
|
|
|
+ await page.goto(comment_url, timeout=30000)
|
|
|
+ await asyncio.sleep(3)
|
|
|
+
|
|
|
+ # 检查是否跳转到登录页
|
|
|
+ current_url = page.url
|
|
|
+ if "login" in current_url or "passport.baidu.com" in current_url:
|
|
|
+ raise Exception("Cookie 已过期,请重新登录")
|
|
|
+
|
|
|
+ # 等待评论列表容器加载
|
|
|
+ container_selector = ".client_pages_newComment_comment_all_list .cheetah-ui-pro-scroll-view .list-container"
|
|
|
+ print(f"[{self.platform_name}] 等待评论列表 DOM 加载...")
|
|
|
+ try:
|
|
|
+ await page.wait_for_selector(container_selector, timeout=15000)
|
|
|
+ except Exception as e:
|
|
|
+ print(f"[{self.platform_name}] 等待评论列表失败: {e}")
|
|
|
+ return CommentsResult(
|
|
|
+ success=False,
|
|
|
+ platform=self.platform_name,
|
|
|
+ work_id=work_id,
|
|
|
+ error=f"无法加载评论列表: {e}",
|
|
|
+ )
|
|
|
+
|
|
|
+ # 尝试多次下拉「加载更多」
|
|
|
+ try:
|
|
|
+ for i in range(5):
|
|
|
+ # 滚动评论列表区域
|
|
|
+ await page.evaluate(
|
|
|
+ """
|
|
|
+ () => {
|
|
|
+ const container = document.querySelector('.client_pages_newComment_comment_all_list .cheetah-ui-pro-scroll-view .list-container');
|
|
|
+ if (container) {
|
|
|
+ container.scrollTop = container.scrollHeight;
|
|
|
+ } else {
|
|
|
+ window.scrollBy(0, 600);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ """
|
|
|
+ )
|
|
|
+ await asyncio.sleep(1)
|
|
|
+
|
|
|
+ # 点击「加载更多」按钮(如果存在)
|
|
|
+ await page.evaluate(
|
|
|
+ """
|
|
|
+ () => {
|
|
|
+ const more = document.querySelector('.client_pages_newComment_components_loadMore, .client_pages_newComment_comment_all_list .client_pages_newComment_components_loadMore');
|
|
|
+ if (more) {
|
|
|
+ (more.querySelector('.more-arrow') || more).click();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ """
|
|
|
+ )
|
|
|
+ await asyncio.sleep(1.2)
|
|
|
+ except Exception as e:
|
|
|
+ print(f"[{self.platform_name}] 下拉/加载更多异常(非致命): {e}")
|
|
|
+
|
|
|
+ # 从 DOM 中提取评论数据
|
|
|
+ print(f"[{self.platform_name}] 从 DOM 提取评论数据...")
|
|
|
+ raw_comments = await page.evaluate(
|
|
|
+ """
|
|
|
+ () => {
|
|
|
+ const result = [];
|
|
|
+ const listRoot = document.querySelector('.client_pages_newComment_comment_all_list .cheetah-ui-pro-scroll-view .list-container');
|
|
|
+ if (!listRoot) {
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+
|
|
|
+ const items = listRoot.querySelectorAll('.client_pages_newComment_comment_all_listItem');
|
|
|
+ items.forEach((item) => {
|
|
|
+ try {
|
|
|
+ const avatarImg = item.querySelector('.comment-card-avatar-wrapper-card-avatar img');
|
|
|
+ const nameEl = item.querySelector('.content-wrapper .user-container .name');
|
|
|
+ const titleContentEl = item.querySelector('.title-wrapper .title-content');
|
|
|
+ const contentEl = item.querySelector('.content-wrapper .content .content-w-highlight, .content-wrapper .content');
|
|
|
+ const infoWrapper = item.querySelector('.content-wrapper .info-wrapper');
|
|
|
+ const timeEl = infoWrapper ? infoWrapper.querySelector('span:nth-child(1)') : null;
|
|
|
+ const replyInfoEl = infoWrapper ? infoWrapper.querySelector('span:nth-child(2)') : null;
|
|
|
+
|
|
|
+ const workTitle = titleContentEl ? titleContentEl.textContent.trim() : '';
|
|
|
+ const authorName = nameEl ? nameEl.textContent.trim() : '';
|
|
|
+ const avatar = avatarImg ? avatarImg.src : '';
|
|
|
+ const content = contentEl ? contentEl.textContent.trim() : '';
|
|
|
+ const timeText = timeEl ? timeEl.textContent.trim() : '';
|
|
|
+ const replyText = replyInfoEl ? replyInfoEl.textContent.trim() : '';
|
|
|
+
|
|
|
+ const replyMatch = replyText.match(/(\\d+)/);
|
|
|
+ const replyCount = replyMatch ? parseInt(replyMatch[1], 10) : 0;
|
|
|
+
|
|
|
+ result.push({
|
|
|
+ comment_id: `bjh_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`,
|
|
|
+ parent_comment_id: "",
|
|
|
+ work_title: workTitle,
|
|
|
+ content,
|
|
|
+ username: authorName,
|
|
|
+ nickname: authorName,
|
|
|
+ avatar,
|
|
|
+ like_count: 0,
|
|
|
+ reply_count: replyCount,
|
|
|
+ create_time: timeText,
|
|
|
+ is_author: false,
|
|
|
+ create_time_unix: Date.now() / 1000,
|
|
|
+ });
|
|
|
+ } catch (e) {
|
|
|
+ // 单条解析失败忽略
|
|
|
+ }
|
|
|
+ });
|
|
|
+
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+ """
|
|
|
+ )
|
|
|
+
|
|
|
+ if not isinstance(raw_comments, list):
|
|
|
+ raw_comments = []
|
|
|
+
|
|
|
+ print(f"[{self.platform_name}] 共抓取到原始评论条目: {len(raw_comments)}")
|
|
|
+
|
|
|
+ # TODO: 如果后续有更可靠的 work_id -> 标题映射,可以在此处根据 work_id 过滤
|
|
|
+ # 当前先不做强过滤,全部返回,避免漏数据
|
|
|
+
|
|
|
+ for c in raw_comments:
|
|
|
+ comment_item = CommentItem(
|
|
|
+ comment_id=c.get("comment_id", ""),
|
|
|
+ parent_comment_id=c.get("parent_comment_id", ""),
|
|
|
+ work_id=work_id,
|
|
|
+ content=c.get("content", ""),
|
|
|
+ author_id=c.get("username", ""),
|
|
|
+ author_name=c.get("nickname", ""),
|
|
|
+ author_avatar=c.get("avatar", ""),
|
|
|
+ like_count=c.get("like_count", 0),
|
|
|
+ reply_count=c.get("reply_count", 0),
|
|
|
+ create_time=c.get("create_time", ""),
|
|
|
+ )
|
|
|
+ comment_item.is_author = c.get("is_author", False)
|
|
|
+ comment_item.create_time_unix = c.get("create_time_unix", 0)
|
|
|
+ comment_item.work_title = c.get("work_title", "")
|
|
|
+ comments.append(comment_item)
|
|
|
+
|
|
|
+ total = len(comments)
|
|
|
+ # 是否还有更多暂无法从接口获知,先固定为 False
|
|
|
+ has_more = False
|
|
|
+
|
|
|
+ print(f"[{self.platform_name}] ✅ 最终整理出评论条数: {total}")
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ import traceback
|
|
|
+ traceback.print_exc()
|
|
|
+ return CommentsResult(
|
|
|
+ success=False,
|
|
|
+ platform=self.platform_name,
|
|
|
+ work_id=work_id,
|
|
|
+ error=str(e),
|
|
|
+ )
|
|
|
+
|
|
|
return CommentsResult(
|
|
|
- success=False,
|
|
|
+ success=True,
|
|
|
platform=self.platform_name,
|
|
|
work_id=work_id,
|
|
|
- error="百家号评论功能暂未实现"
|
|
|
+ comments=comments,
|
|
|
+ total=total,
|
|
|
+ has_more=has_more,
|
|
|
)
|