Browse Source

百家号评论

swortect 1 ngày trước cách đây
mục cha
commit
a09b04af28
2 tập tin đã thay đổi với 211 bổ sung16 xóa
  1. 190 4
      server/python/platforms/baijiahao.py
  2. 21 12
      server/src/services/CommentService.ts

+ 190 - 4
server/python/platforms/baijiahao.py

@@ -3314,11 +3314,197 @@ class BaijiahaoPublisher(BasePublisher):
         return await super().check_login_status(cookies)
     
     async def get_comments(self, cookies: str, work_id: str, cursor: str = "") -> CommentsResult:
-        """获取百家号作品评论"""
-        # TODO: 实现评论获取逻辑
+        """
+        获取百家号作品评论
+        参考 weixin.py 中同名逻辑,但百家号当前评论页仅提供「所有评论流」视图,
+        且页面 DOM 结构为(外层关键节点):
+          div.client_pages_newComment_comment_all_list > div.cheetah-ui-pro-scroll-view >
+          div.list-container > div.client_pages_newComment_comment_all_listItem
+
+        暂时按「按作品标题模糊匹配」的方式,筛选属于指定 work_id 对应作品标题的评论;
+        若无法可靠匹配,则先返回该页面上的所有评论记录。
+        """
+        print(f"\n{'='*60}")
+        print(f"[{self.platform_name}] 获取作品评论")
+        print(f"[{self.platform_name}] work_id={work_id}")
+        print(f"{'='*60}")
+
+        comments: List[CommentItem] = []
+        total = 0
+        has_more = False
+
+        try:
+            # 启动浏览器并设置 Cookie(复用 BasePublisher 逻辑)
+            await self.init_browser()
+            cookie_list = self.parse_cookies(cookies)
+            await self.set_cookies(cookie_list)
+
+            if not self.page:
+                raise Exception("Page not initialized")
+
+            page = self.page
+
+            # 打开评论管理页(全量评论列表)
+            comment_url = "https://baijiahao.baidu.com/builder/rc/commentmanage/comment/all"
+            print(f"[{self.platform_name}] 正在打开评论页面: {comment_url}")
+            await page.goto(comment_url, timeout=30000)
+            await asyncio.sleep(3)
+
+            # 检查是否跳转到登录页
+            current_url = page.url
+            if "login" in current_url or "passport.baidu.com" in current_url:
+                raise Exception("Cookie 已过期,请重新登录")
+
+            # 等待评论列表容器加载
+            container_selector = ".client_pages_newComment_comment_all_list .cheetah-ui-pro-scroll-view .list-container"
+            print(f"[{self.platform_name}] 等待评论列表 DOM 加载...")
+            try:
+                await page.wait_for_selector(container_selector, timeout=15000)
+            except Exception as e:
+                print(f"[{self.platform_name}] 等待评论列表失败: {e}")
+                return CommentsResult(
+                    success=False,
+                    platform=self.platform_name,
+                    work_id=work_id,
+                    error=f"无法加载评论列表: {e}",
+                )
+
+            # 尝试多次下拉「加载更多」
+            try:
+                for i in range(5):
+                    # 滚动评论列表区域
+                    await page.evaluate(
+                        """
+                        () => {
+                            const container = document.querySelector('.client_pages_newComment_comment_all_list .cheetah-ui-pro-scroll-view .list-container');
+                            if (container) {
+                                container.scrollTop = container.scrollHeight;
+                            } else {
+                                window.scrollBy(0, 600);
+                            }
+                        }
+                        """
+                    )
+                    await asyncio.sleep(1)
+
+                    # 点击「加载更多」按钮(如果存在)
+                    await page.evaluate(
+                        """
+                        () => {
+                            const more = document.querySelector('.client_pages_newComment_components_loadMore, .client_pages_newComment_comment_all_list .client_pages_newComment_components_loadMore');
+                            if (more) {
+                                (more.querySelector('.more-arrow') || more).click();
+                            }
+                        }
+                        """
+                    )
+                    await asyncio.sleep(1.2)
+            except Exception as e:
+                print(f"[{self.platform_name}] 下拉/加载更多异常(非致命): {e}")
+
+            # 从 DOM 中提取评论数据
+            print(f"[{self.platform_name}] 从 DOM 提取评论数据...")
+            raw_comments = await page.evaluate(
+                """
+                () => {
+                    const result = [];
+                    const listRoot = document.querySelector('.client_pages_newComment_comment_all_list .cheetah-ui-pro-scroll-view .list-container');
+                    if (!listRoot) {
+                        return result;
+                    }
+
+                    const items = listRoot.querySelectorAll('.client_pages_newComment_comment_all_listItem');
+                    items.forEach((item) => {
+                        try {
+                            const avatarImg = item.querySelector('.comment-card-avatar-wrapper-card-avatar img');
+                            const nameEl = item.querySelector('.content-wrapper .user-container .name');
+                            const titleContentEl = item.querySelector('.title-wrapper .title-content');
+                            const contentEl = item.querySelector('.content-wrapper .content .content-w-highlight, .content-wrapper .content');
+                            const infoWrapper = item.querySelector('.content-wrapper .info-wrapper');
+                            const timeEl = infoWrapper ? infoWrapper.querySelector('span:nth-child(1)') : null;
+                            const replyInfoEl = infoWrapper ? infoWrapper.querySelector('span:nth-child(2)') : null;
+
+                            const workTitle = titleContentEl ? titleContentEl.textContent.trim() : '';
+                            const authorName = nameEl ? nameEl.textContent.trim() : '';
+                            const avatar = avatarImg ? avatarImg.src : '';
+                            const content = contentEl ? contentEl.textContent.trim() : '';
+                            const timeText = timeEl ? timeEl.textContent.trim() : '';
+                            const replyText = replyInfoEl ? replyInfoEl.textContent.trim() : '';
+
+                            const replyMatch = replyText.match(/(\\d+)/);
+                            const replyCount = replyMatch ? parseInt(replyMatch[1], 10) : 0;
+
+                            result.push({
+                                comment_id: `bjh_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`,
+                                parent_comment_id: "",
+                                work_title: workTitle,
+                                content,
+                                username: authorName,
+                                nickname: authorName,
+                                avatar,
+                                like_count: 0,
+                                reply_count: replyCount,
+                                create_time: timeText,
+                                is_author: false,
+                                create_time_unix: Date.now() / 1000,
+                            });
+                        } catch (e) {
+                            // 单条解析失败忽略
+                        }
+                    });
+
+                    return result;
+                }
+                """
+            )
+
+            if not isinstance(raw_comments, list):
+                raw_comments = []
+
+            print(f"[{self.platform_name}] 共抓取到原始评论条目: {len(raw_comments)}")
+
+            # TODO: 如果后续有更可靠的 work_id -> 标题映射,可以在此处根据 work_id 过滤
+            # 当前先不做强过滤,全部返回,避免漏数据
+
+            for c in raw_comments:
+                comment_item = CommentItem(
+                    comment_id=c.get("comment_id", ""),
+                    parent_comment_id=c.get("parent_comment_id", ""),
+                    work_id=work_id,
+                    content=c.get("content", ""),
+                    author_id=c.get("username", ""),
+                    author_name=c.get("nickname", ""),
+                    author_avatar=c.get("avatar", ""),
+                    like_count=c.get("like_count", 0),
+                    reply_count=c.get("reply_count", 0),
+                    create_time=c.get("create_time", ""),
+                )
+                comment_item.is_author = c.get("is_author", False)
+                comment_item.create_time_unix = c.get("create_time_unix", 0)
+                comment_item.work_title = c.get("work_title", "")
+                comments.append(comment_item)
+
+            total = len(comments)
+            # 是否还有更多暂无法从接口获知,先固定为 False
+            has_more = False
+
+            print(f"[{self.platform_name}] ✅ 最终整理出评论条数: {total}")
+
+        except Exception as e:
+            import traceback
+            traceback.print_exc()
+            return CommentsResult(
+                success=False,
+                platform=self.platform_name,
+                work_id=work_id,
+                error=str(e),
+            )
+
         return CommentsResult(
-            success=False,
+            success=True,
             platform=self.platform_name,
             work_id=work_id,
-            error="百家号评论功能暂未实现"
+            comments=comments,
+            total=total,
+            has_more=has_more,
         )

+ 21 - 12
server/src/services/CommentService.ts

@@ -8,7 +8,7 @@ import type {
   PlatformType,
 } from '@media-manager/shared';
 import { wsManager } from '../websocket/index.js';
-import { headlessBrowserService, type WorkComments, type CookieData } from './HeadlessBrowserService.js';
+import { headlessBrowserService, type CookieData } from './HeadlessBrowserService.js';
 import { CookieManager } from '../automation/cookie.js';
 import { logger } from '../utils/logger.js';
 
@@ -210,7 +210,7 @@ export class CommentService {
     const accountRepository = AppDataSource.getRepository(PlatformAccount);
     
     // 获取需要同步的账号列表
-    const whereCondition: { userId: number; id?: number; platform?: string } = { userId };
+    const whereCondition: { userId: number; id?: number; platform?: PlatformType } = { userId };
     if (accountId) {
       whereCondition.id = accountId;
     }
@@ -227,7 +227,7 @@ export class CommentService {
     for (const account of accounts) {
       try {
         // 只处理支持的平台
-        if (account.platform !== 'douyin' && account.platform !== 'xiaohongshu' && account.platform !== 'weixin_video') {
+        if (account.platform !== 'douyin' && account.platform !== 'xiaohongshu' && account.platform !== 'weixin_video' && account.platform !== 'baijiahao') {
           logger.info(`Skipping unsupported platform: ${account.platform}`);
           continue;
         }
@@ -282,6 +282,8 @@ export class CommentService {
           workComments = await headlessBrowserService.fetchXiaohongshuCommentsViaApi(cookies);
         } else if (account.platform === 'weixin_video') {
           workComments = await headlessBrowserService.fetchWeixinVideoCommentsViaApi(cookies);
+        } else if (account.platform === 'baijiahao') {
+          workComments = await headlessBrowserService.fetchBaijiahaoCommentsViaApi(cookies);
         }
 
 
@@ -303,7 +305,9 @@ export class CommentService {
         for (const work of accountWorks) {
           if (work.platformVideoId) {
             videoIdToWorkMap.set(work.platformVideoId, { id: work.id, title: work.title });
-            // 同时存储不带前缀的版本(如果 platformVideoId 是 "douyin_xxx" 格式)
+            // 同时存储带平台前缀的版本(兼容 comment.videoId 为 "platform_xxx" 的情况)
+            videoIdToWorkMap.set(`${work.platform}_${work.platformVideoId}`, { id: work.id, title: work.title });
+            // 同时存储不带前缀的版本(如果 platformVideoId 是 "platform_xxx" 格式)
             if (work.platformVideoId.includes('_')) {
               const parts = work.platformVideoId.split('_');
               if (parts.length >= 2) {
@@ -340,7 +344,7 @@ export class CommentService {
             
             // 尝试带平台前缀匹配
             if (!workId) {
-              const prefixedId = `douyin_${commentVideoId}`;
+              const prefixedId = `${account.platform}_${commentVideoId}`;
               if (videoIdToWorkMap.has(prefixedId)) {
                 const matched = videoIdToWorkMap.get(prefixedId)!;
                 workId = matched.id;
@@ -354,7 +358,7 @@ export class CommentService {
                 if (!w.platformVideoId) return false;
                 // 尝试各种匹配方式
                 return w.platformVideoId === commentVideoId ||
-                       w.platformVideoId === `douyin_${commentVideoId}` ||
+                       w.platformVideoId === `${account.platform}_${commentVideoId}` ||
                        w.platformVideoId.endsWith(`_${commentVideoId}`) ||
                        w.platformVideoId.includes(commentVideoId);
               });
@@ -518,16 +522,21 @@ export class CommentService {
         if (work.platformVideoId) {
           // 存储原始 platformVideoId
           videoIdToWork.set(work.platformVideoId, { id: work.id, title: work.title });
+          // 存储带平台前缀的版本
+          videoIdToWork.set(`${work.platform}_${work.platformVideoId}`, { id: work.id, title: work.title });
           
-          // 如果是 "douyin_xxx" 格式,也存储纯 ID
-          if (work.platformVideoId.startsWith('douyin_')) {
-            const pureId = work.platformVideoId.replace('douyin_', '');
-            videoIdToWork.set(pureId, { id: work.id, title: work.title });
+          // 如果是 "platform_xxx" 格式,也存储纯 ID
+          if (work.platformVideoId.includes('_')) {
+            const parts = work.platformVideoId.split('_');
+            if (parts.length >= 2) {
+              const pureId = parts.slice(1).join('_');
+              videoIdToWork.set(pureId, { id: work.id, title: work.title });
+            }
           }
           
           // 如果是纯数字 ID,也存储带前缀的版本
           if (/^\d+$/.test(work.platformVideoId)) {
-            videoIdToWork.set(`douyin_${work.platformVideoId}`, { id: work.id, title: work.title });
+            videoIdToWork.set(`${work.platform}_${work.platformVideoId}`, { id: work.id, title: work.title });
           }
         }
       }
@@ -544,7 +553,7 @@ export class CommentService {
           }
           // 尝试带前缀匹配
           if (!matchedWorkId) {
-            const prefixedId = `douyin_${comment.videoId}`;
+            const prefixedId = `${comment.platform}_${comment.videoId}`;
             if (videoIdToWork.has(prefixedId)) {
               matchedWorkId = videoIdToWork.get(prefixedId)!.id;
             }