|
@@ -9,7 +9,11 @@ import asyncio
|
|
|
import os
|
|
import os
|
|
|
import sys
|
|
import sys
|
|
|
from pathlib import Path
|
|
from pathlib import Path
|
|
|
-from .base import BasePublisher, PublishParams, PublishResult
|
|
|
|
|
|
|
+from typing import List
|
|
|
|
|
+from .base import (
|
|
|
|
|
+ BasePublisher, PublishParams, PublishResult,
|
|
|
|
|
+ WorkItem, WorksResult, CommentItem, CommentsResult
|
|
|
|
|
+)
|
|
|
|
|
|
|
|
# 添加 matrix 项目路径,用于导入签名脚本
|
|
# 添加 matrix 项目路径,用于导入签名脚本
|
|
|
MATRIX_PATH = Path(__file__).parent.parent.parent.parent / "matrix"
|
|
MATRIX_PATH = Path(__file__).parent.parent.parent.parent / "matrix"
|
|
@@ -463,3 +467,461 @@ class XiaohongshuPublisher(BasePublisher):
|
|
|
platform=self.platform_name,
|
|
platform=self.platform_name,
|
|
|
message="发布完成"
|
|
message="发布完成"
|
|
|
)
|
|
)
|
|
|
|
|
+
|
|
|
|
|
+ async def get_works(self, cookies: str, page: int = 0, page_size: int = 20) -> WorksResult:
|
|
|
|
|
+ """获取小红书作品列表 - 通过监听页面网络响应获取数据"""
|
|
|
|
|
+ print(f"\n{'='*60}", flush=True)
|
|
|
|
|
+ print(f"[{self.platform_name}] 获取作品列表", flush=True)
|
|
|
|
|
+ print(f"[{self.platform_name}] page={page}, page_size={page_size}", flush=True)
|
|
|
|
|
+ print(f"{'='*60}", flush=True)
|
|
|
|
|
+
|
|
|
|
|
+ works: List[WorkItem] = []
|
|
|
|
|
+ total = 0
|
|
|
|
|
+ has_more = False
|
|
|
|
|
+ captured_data = {}
|
|
|
|
|
+
|
|
|
|
|
+ try:
|
|
|
|
|
+ await self.init_browser()
|
|
|
|
|
+ cookie_list = self.parse_cookies(cookies)
|
|
|
|
|
+
|
|
|
|
|
+ # 打印 cookies 信息用于调试
|
|
|
|
|
+ print(f"[{self.platform_name}] 解析到 {len(cookie_list)} 个 cookies", flush=True)
|
|
|
|
|
+
|
|
|
|
|
+ await self.set_cookies(cookie_list)
|
|
|
|
|
+
|
|
|
|
|
+ if not self.page:
|
|
|
|
|
+ raise Exception("Page not initialized")
|
|
|
|
|
+
|
|
|
|
|
+ # 定义响应监听器 - 捕获页面自动发起的 API 请求
|
|
|
|
|
+ async def handle_response(response):
|
|
|
|
|
+ nonlocal captured_data
|
|
|
|
|
+ url = response.url
|
|
|
|
|
+ # 监听作品列表 API
|
|
|
|
|
+ if 'creator/note/user/posted' in url or 'creator/note_list' in url:
|
|
|
|
|
+ try:
|
|
|
|
|
+ json_data = await response.json()
|
|
|
|
|
+ print(f"[{self.platform_name}] 捕获到 API 响应: {url[:80]}...", flush=True)
|
|
|
|
|
+ if json_data.get('success') or json_data.get('code') == 0:
|
|
|
|
|
+ captured_data = json_data
|
|
|
|
|
+ print(f"[{self.platform_name}] API 响应成功,data keys: {list(json_data.get('data', {}).keys())}", flush=True)
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ print(f"[{self.platform_name}] 解析响应失败: {e}", flush=True)
|
|
|
|
|
+
|
|
|
|
|
+ # 注册响应监听器
|
|
|
|
|
+ self.page.on('response', handle_response)
|
|
|
|
|
+ print(f"[{self.platform_name}] 已注册 API 响应监听器", flush=True)
|
|
|
|
|
+
|
|
|
|
|
+ # 访问笔记管理页面 - 页面会自动发起 API 请求
|
|
|
|
|
+ print(f"[{self.platform_name}] 访问笔记管理页面...", flush=True)
|
|
|
|
|
+
|
|
|
|
|
+ try:
|
|
|
|
|
+ await self.page.goto("https://creator.xiaohongshu.com/new/note-manager", wait_until="domcontentloaded", timeout=30000)
|
|
|
|
|
+ except Exception as nav_error:
|
|
|
|
|
+ print(f"[{self.platform_name}] 导航超时,但继续尝试: {nav_error}", flush=True)
|
|
|
|
|
+
|
|
|
|
|
+ # 等待 API 响应被捕获
|
|
|
|
|
+ await asyncio.sleep(5)
|
|
|
|
|
+
|
|
|
|
|
+ # 检查登录状态
|
|
|
|
|
+ current_url = self.page.url
|
|
|
|
|
+ print(f"[{self.platform_name}] 当前页面: {current_url}", flush=True)
|
|
|
|
|
+ if "login" in current_url:
|
|
|
|
|
+ raise Exception("Cookie 已过期,请重新登录")
|
|
|
|
|
+
|
|
|
|
|
+ # 如果还没有捕获到数据,等待更长时间
|
|
|
|
|
+ if not captured_data:
|
|
|
|
|
+ print(f"[{self.platform_name}] 等待 API 响应...", flush=True)
|
|
|
|
|
+ await asyncio.sleep(5)
|
|
|
|
|
+
|
|
|
|
|
+ # 移除监听器
|
|
|
|
|
+ self.page.remove_listener('response', handle_response)
|
|
|
|
|
+
|
|
|
|
|
+ # 处理捕获到的数据
|
|
|
|
|
+ import json
|
|
|
|
|
+ if captured_data:
|
|
|
|
|
+ print(f"[{self.platform_name}] 成功捕获到 API 数据", flush=True)
|
|
|
|
|
+ data = captured_data.get('data', {})
|
|
|
|
|
+ notes = data.get('notes', [])
|
|
|
|
|
+ print(f"[{self.platform_name}] notes 数量: {len(notes)}", flush=True)
|
|
|
|
|
+
|
|
|
|
|
+ # 从 tags 获取总数
|
|
|
|
|
+ tags = data.get('tags', [])
|
|
|
|
|
+ for tag in tags:
|
|
|
|
|
+ if tag.get('id') == 'special.note_time_desc':
|
|
|
|
|
+ total = tag.get('notes_count', 0)
|
|
|
|
|
+ break
|
|
|
|
|
+
|
|
|
|
|
+ has_more = data.get('page', -1) != -1
|
|
|
|
|
+
|
|
|
|
|
+ for note in notes:
|
|
|
|
|
+ note_id = note.get('id', '')
|
|
|
|
|
+ if not note_id:
|
|
|
|
|
+ continue
|
|
|
|
|
+
|
|
|
|
|
+ # 获取封面
|
|
|
|
|
+ cover_url = ''
|
|
|
|
|
+ images_list = note.get('images_list', [])
|
|
|
|
|
+ if images_list:
|
|
|
|
|
+ cover_url = images_list[0].get('url', '')
|
|
|
|
|
+ if cover_url.startswith('http://'):
|
|
|
|
|
+ cover_url = cover_url.replace('http://', 'https://')
|
|
|
|
|
+
|
|
|
|
|
+ # 获取时长
|
|
|
|
|
+ duration = note.get('video_info', {}).get('duration', 0)
|
|
|
|
|
+
|
|
|
|
|
+ # 解析状态
|
|
|
|
|
+ status = 'published'
|
|
|
|
|
+ tab_status = note.get('tab_status', 1)
|
|
|
|
|
+ if tab_status == 0:
|
|
|
|
|
+ status = 'draft'
|
|
|
|
|
+ elif tab_status == 2:
|
|
|
|
|
+ status = 'reviewing'
|
|
|
|
|
+ elif tab_status == 3:
|
|
|
|
|
+ status = 'rejected'
|
|
|
|
|
+
|
|
|
|
|
+ works.append(WorkItem(
|
|
|
|
|
+ work_id=note_id,
|
|
|
|
|
+ title=note.get('display_title', '') or '无标题',
|
|
|
|
|
+ cover_url=cover_url,
|
|
|
|
|
+ duration=duration,
|
|
|
|
|
+ status=status,
|
|
|
|
|
+ publish_time=note.get('time', ''),
|
|
|
|
|
+ play_count=note.get('view_count', 0),
|
|
|
|
|
+ like_count=note.get('likes', 0),
|
|
|
|
|
+ comment_count=note.get('comments_count', 0),
|
|
|
|
|
+ share_count=note.get('shared_count', 0),
|
|
|
|
|
+ collect_count=note.get('collected_count', 0),
|
|
|
|
|
+ ))
|
|
|
|
|
+
|
|
|
|
|
+ print(f"[{self.platform_name}] 解析到 {len(works)} 个作品,总计: {total}", flush=True)
|
|
|
|
|
+ else:
|
|
|
|
|
+ print(f"[{self.platform_name}] 未能捕获到 API 数据", flush=True)
|
|
|
|
|
+
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ import traceback
|
|
|
|
|
+ print(f"[{self.platform_name}] 发生异常: {e}", flush=True)
|
|
|
|
|
+ traceback.print_exc()
|
|
|
|
|
+ return WorksResult(
|
|
|
|
|
+ success=False,
|
|
|
|
|
+ platform=self.platform_name,
|
|
|
|
|
+ error=str(e)
|
|
|
|
|
+ )
|
|
|
|
|
+ finally:
|
|
|
|
|
+ # 确保关闭浏览器
|
|
|
|
|
+ await self.close_browser()
|
|
|
|
|
+
|
|
|
|
|
+ return WorksResult(
|
|
|
|
|
+ success=True,
|
|
|
|
|
+ platform=self.platform_name,
|
|
|
|
|
+ works=works,
|
|
|
|
|
+ total=total or len(works),
|
|
|
|
|
+ has_more=has_more
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ async def get_comments(self, cookies: str, work_id: str, cursor: str = "") -> CommentsResult:
|
|
|
|
|
+ """获取小红书作品评论 - 通过创作者后台评论管理页面"""
|
|
|
|
|
+ print(f"\n{'='*60}")
|
|
|
|
|
+ print(f"[{self.platform_name}] 获取作品评论")
|
|
|
|
|
+ print(f"[{self.platform_name}] work_id={work_id}, cursor={cursor}")
|
|
|
|
|
+ print(f"{'='*60}")
|
|
|
|
|
+
|
|
|
|
|
+ comments: List[CommentItem] = []
|
|
|
|
|
+ total = 0
|
|
|
|
|
+ has_more = False
|
|
|
|
|
+ next_cursor = ""
|
|
|
|
|
+ captured_data = {}
|
|
|
|
|
+
|
|
|
|
|
+ try:
|
|
|
|
|
+ await self.init_browser()
|
|
|
|
|
+ cookie_list = self.parse_cookies(cookies)
|
|
|
|
|
+ await self.set_cookies(cookie_list)
|
|
|
|
|
+
|
|
|
|
|
+ if not self.page:
|
|
|
|
|
+ raise Exception("Page not initialized")
|
|
|
|
|
+
|
|
|
|
|
+ # 设置 API 响应监听器
|
|
|
|
|
+ async def handle_response(response):
|
|
|
|
|
+ nonlocal captured_data
|
|
|
|
|
+ url = response.url
|
|
|
|
|
+ # 监听评论相关 API - 创作者后台和普通页面的 API
|
|
|
|
|
+ if '/comment/' in url and ('page' in url or 'list' in url):
|
|
|
|
|
+ try:
|
|
|
|
|
+ json_data = await response.json()
|
|
|
|
|
+ print(f"[{self.platform_name}] 捕获到评论 API: {url[:100]}...", flush=True)
|
|
|
|
|
+ if json_data.get('success') or json_data.get('code') == 0:
|
|
|
|
|
+ data = json_data.get('data', {})
|
|
|
|
|
+ comment_list = data.get('comments') or data.get('list') or []
|
|
|
|
|
+ if comment_list:
|
|
|
|
|
+ captured_data = json_data
|
|
|
|
|
+ print(f"[{self.platform_name}] 评论 API 响应成功,comments={len(comment_list)}", flush=True)
|
|
|
|
|
+ else:
|
|
|
|
|
+ print(f"[{self.platform_name}] 评论 API 响应成功但无评论", flush=True)
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ print(f"[{self.platform_name}] 解析评论响应失败: {e}", flush=True)
|
|
|
|
|
+
|
|
|
|
|
+ self.page.on('response', handle_response)
|
|
|
|
|
+ print(f"[{self.platform_name}] 已注册评论 API 响应监听器", flush=True)
|
|
|
|
|
+
|
|
|
|
|
+ # 访问创作者后台评论管理页面
|
|
|
|
|
+ comment_url = "https://creator.xiaohongshu.com/creator/comment"
|
|
|
|
|
+ print(f"[{self.platform_name}] 访问评论管理页面: {comment_url}", flush=True)
|
|
|
|
|
+ await self.page.goto(comment_url, wait_until="domcontentloaded", timeout=30000)
|
|
|
|
|
+ await asyncio.sleep(5)
|
|
|
|
|
+
|
|
|
|
|
+ # 检查是否被重定向到登录页
|
|
|
|
|
+ current_url = self.page.url
|
|
|
|
|
+ print(f"[{self.platform_name}] 当前页面 URL: {current_url}", flush=True)
|
|
|
|
|
+ if "login" in current_url:
|
|
|
|
|
+ raise Exception("Cookie 已过期,请重新登录")
|
|
|
|
|
+
|
|
|
|
|
+ # 等待评论加载
|
|
|
|
|
+ if not captured_data:
|
|
|
|
|
+ print(f"[{self.platform_name}] 等待评论 API 响应...", flush=True)
|
|
|
|
|
+ # 尝试滚动页面触发评论加载
|
|
|
|
|
+ await self.page.evaluate('window.scrollBy(0, 500)')
|
|
|
|
|
+ await asyncio.sleep(3)
|
|
|
|
|
+
|
|
|
|
|
+ if not captured_data:
|
|
|
|
|
+ # 再等待一会,可能评论 API 加载较慢
|
|
|
|
|
+ print(f"[{self.platform_name}] 继续等待评论加载...", flush=True)
|
|
|
|
|
+ await asyncio.sleep(5)
|
|
|
|
|
+
|
|
|
|
|
+ # 移除监听器
|
|
|
|
|
+ self.page.remove_listener('response', handle_response)
|
|
|
|
|
+
|
|
|
|
|
+ # 解析评论数据
|
|
|
|
|
+ if captured_data:
|
|
|
|
|
+ data = captured_data.get('data', {})
|
|
|
|
|
+ comment_list = data.get('comments') or data.get('list') or []
|
|
|
|
|
+ has_more = data.get('has_more', False)
|
|
|
|
|
+ next_cursor = data.get('cursor', '')
|
|
|
|
|
+
|
|
|
|
|
+ print(f"[{self.platform_name}] 解析评论: has_more={has_more}, comments={len(comment_list)}", flush=True)
|
|
|
|
|
+
|
|
|
|
|
+ for comment in comment_list:
|
|
|
|
|
+ cid = comment.get('id', '')
|
|
|
|
|
+ if not cid:
|
|
|
|
|
+ continue
|
|
|
|
|
+
|
|
|
|
|
+ user_info = comment.get('user_info', {})
|
|
|
|
|
+
|
|
|
|
|
+ # 解析子评论
|
|
|
|
|
+ replies = []
|
|
|
|
|
+ sub_comments = comment.get('sub_comments', []) or []
|
|
|
|
|
+ for sub in sub_comments:
|
|
|
|
|
+ sub_user = sub.get('user_info', {})
|
|
|
|
|
+ replies.append(CommentItem(
|
|
|
|
|
+ comment_id=sub.get('id', ''),
|
|
|
|
|
+ work_id=work_id,
|
|
|
|
|
+ content=sub.get('content', ''),
|
|
|
|
|
+ author_id=sub_user.get('user_id', ''),
|
|
|
|
|
+ author_name=sub_user.get('nickname', ''),
|
|
|
|
|
+ author_avatar=sub_user.get('image', ''),
|
|
|
|
|
+ like_count=sub.get('like_count', 0),
|
|
|
|
|
+ create_time=sub.get('create_time', ''),
|
|
|
|
|
+ ))
|
|
|
|
|
+
|
|
|
|
|
+ comments.append(CommentItem(
|
|
|
|
|
+ comment_id=cid,
|
|
|
|
|
+ work_id=work_id,
|
|
|
|
|
+ content=comment.get('content', ''),
|
|
|
|
|
+ author_id=user_info.get('user_id', ''),
|
|
|
|
|
+ author_name=user_info.get('nickname', ''),
|
|
|
|
|
+ author_avatar=user_info.get('image', ''),
|
|
|
|
|
+ like_count=comment.get('like_count', 0),
|
|
|
|
|
+ reply_count=comment.get('sub_comment_count', 0),
|
|
|
|
|
+ create_time=comment.get('create_time', ''),
|
|
|
|
|
+ replies=replies,
|
|
|
|
|
+ ))
|
|
|
|
|
+
|
|
|
|
|
+ total = len(comments)
|
|
|
|
|
+ print(f"[{self.platform_name}] 解析到 {total} 条评论", flush=True)
|
|
|
|
|
+ else:
|
|
|
|
|
+ print(f"[{self.platform_name}] 未捕获到评论 API 响应", flush=True)
|
|
|
|
|
+
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ import traceback
|
|
|
|
|
+ traceback.print_exc()
|
|
|
|
|
+ return CommentsResult(
|
|
|
|
|
+ success=False,
|
|
|
|
|
+ platform=self.platform_name,
|
|
|
|
|
+ work_id=work_id,
|
|
|
|
|
+ error=str(e)
|
|
|
|
|
+ )
|
|
|
|
|
+ finally:
|
|
|
|
|
+ await self.close_browser()
|
|
|
|
|
+
|
|
|
|
|
+ result = CommentsResult(
|
|
|
|
|
+ success=True,
|
|
|
|
|
+ platform=self.platform_name,
|
|
|
|
|
+ work_id=work_id,
|
|
|
|
|
+ comments=comments,
|
|
|
|
|
+ total=total,
|
|
|
|
|
+ has_more=has_more
|
|
|
|
|
+ )
|
|
|
|
|
+ result.__dict__['cursor'] = next_cursor
|
|
|
|
|
+ return result
|
|
|
|
|
+
|
|
|
|
|
+ async def get_all_comments(self, cookies: str) -> dict:
|
|
|
|
|
+ """获取所有作品的评论 - 通过评论管理页面"""
|
|
|
|
|
+ print(f"\n{'='*60}")
|
|
|
|
|
+ print(f"[{self.platform_name}] 获取所有作品评论")
|
|
|
|
|
+ print(f"{'='*60}")
|
|
|
|
|
+
|
|
|
|
|
+ all_work_comments = []
|
|
|
|
|
+ captured_comments = []
|
|
|
|
|
+ captured_notes = {} # note_id -> note_info
|
|
|
|
|
+
|
|
|
|
|
+ try:
|
|
|
|
|
+ await self.init_browser()
|
|
|
|
|
+ cookie_list = self.parse_cookies(cookies)
|
|
|
|
|
+ await self.set_cookies(cookie_list)
|
|
|
|
|
+
|
|
|
|
|
+ if not self.page:
|
|
|
|
|
+ raise Exception("Page not initialized")
|
|
|
|
|
+
|
|
|
|
|
+ # 设置 API 响应监听器
|
|
|
|
|
+ async def handle_response(response):
|
|
|
|
|
+ nonlocal captured_comments, captured_notes
|
|
|
|
|
+ url = response.url
|
|
|
|
|
+ try:
|
|
|
|
|
+ # 监听评论列表 API - 多种格式
|
|
|
|
|
+ if '/comment/' in url and ('page' in url or 'list' in url):
|
|
|
|
|
+ json_data = await response.json()
|
|
|
|
|
+ print(f"[{self.platform_name}] 捕获到评论 API: {url[:100]}...", flush=True)
|
|
|
|
|
+
|
|
|
|
|
+ if json_data.get('success') or json_data.get('code') == 0:
|
|
|
|
|
+ data = json_data.get('data', {})
|
|
|
|
|
+ comments = data.get('comments', []) or data.get('list', [])
|
|
|
|
|
+
|
|
|
|
|
+ # 从 URL 中提取 note_id
|
|
|
|
|
+ import re
|
|
|
|
|
+ note_id_match = re.search(r'note_id=([^&]+)', url)
|
|
|
|
|
+ note_id = note_id_match.group(1) if note_id_match else ''
|
|
|
|
|
+
|
|
|
|
|
+ if comments:
|
|
|
|
|
+ for comment in comments:
|
|
|
|
|
+ # 添加 note_id 到评论中
|
|
|
|
|
+ if note_id and 'note_id' not in comment:
|
|
|
|
|
+ comment['note_id'] = note_id
|
|
|
|
|
+ captured_comments.append(comment)
|
|
|
|
|
+
|
|
|
|
|
+ print(f"[{self.platform_name}] 捕获到 {len(comments)} 条评论 (note_id={note_id}),总计: {len(captured_comments)}", flush=True)
|
|
|
|
|
+
|
|
|
|
|
+ # 监听笔记列表 API
|
|
|
|
|
+ if '/note/' in url and ('list' in url or 'posted' in url or 'manager' in url):
|
|
|
|
|
+ json_data = await response.json()
|
|
|
|
|
+ if json_data.get('success') or json_data.get('code') == 0:
|
|
|
|
|
+ data = json_data.get('data', {})
|
|
|
|
|
+ notes = data.get('notes', []) or data.get('list', [])
|
|
|
|
|
+ print(f"[{self.platform_name}] 捕获到笔记列表 API: {len(notes)} 个笔记", flush=True)
|
|
|
|
|
+ for note in notes:
|
|
|
|
|
+ note_id = note.get('note_id', '') or note.get('id', '')
|
|
|
|
|
+ if note_id:
|
|
|
|
|
+ cover_url = ''
|
|
|
|
|
+ cover = note.get('cover', {})
|
|
|
|
|
+ if isinstance(cover, dict):
|
|
|
|
|
+ cover_url = cover.get('url', '') or cover.get('url_default', '')
|
|
|
|
|
+ elif isinstance(cover, str):
|
|
|
|
|
+ cover_url = cover
|
|
|
|
|
+
|
|
|
|
|
+ captured_notes[note_id] = {
|
|
|
|
|
+ 'title': note.get('title', '') or note.get('display_title', ''),
|
|
|
|
|
+ 'cover': cover_url,
|
|
|
|
|
+ }
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ print(f"[{self.platform_name}] 解析响应失败: {e}", flush=True)
|
|
|
|
|
+
|
|
|
|
|
+ self.page.on('response', handle_response)
|
|
|
|
|
+ print(f"[{self.platform_name}] 已注册 API 响应监听器", flush=True)
|
|
|
|
|
+
|
|
|
|
|
+ # 访问评论管理页面
|
|
|
|
|
+ print(f"[{self.platform_name}] 访问评论管理页面...", flush=True)
|
|
|
|
|
+ await self.page.goto("https://creator.xiaohongshu.com/creator/comment", wait_until="domcontentloaded", timeout=30000)
|
|
|
|
|
+ await asyncio.sleep(5)
|
|
|
|
|
+
|
|
|
|
|
+ # 检查登录状态
|
|
|
|
|
+ current_url = self.page.url
|
|
|
|
|
+ if "login" in current_url:
|
|
|
|
|
+ raise Exception("Cookie 已过期,请重新登录")
|
|
|
|
|
+
|
|
|
|
|
+ print(f"[{self.platform_name}] 页面加载完成,当前捕获: {len(captured_comments)} 条评论, {len(captured_notes)} 个笔记", flush=True)
|
|
|
|
|
+
|
|
|
|
|
+ # 滚动加载更多评论
|
|
|
|
|
+ for i in range(5):
|
|
|
|
|
+ await self.page.evaluate('window.scrollBy(0, 500)')
|
|
|
|
|
+ await asyncio.sleep(1)
|
|
|
|
|
+
|
|
|
|
|
+ await asyncio.sleep(3)
|
|
|
|
|
+
|
|
|
|
|
+ # 移除监听器
|
|
|
|
|
+ self.page.remove_listener('response', handle_response)
|
|
|
|
|
+
|
|
|
|
|
+ print(f"[{self.platform_name}] 最终捕获: {len(captured_comments)} 条评论, {len(captured_notes)} 个笔记", flush=True)
|
|
|
|
|
+
|
|
|
|
|
+ # 按作品分组评论
|
|
|
|
|
+ work_comments_map = {} # note_id -> work_comments
|
|
|
|
|
+ for comment in captured_comments:
|
|
|
|
|
+ # 获取笔记信息
|
|
|
|
|
+ note_info = comment.get('note_info', {}) or comment.get('note', {})
|
|
|
|
|
+ note_id = comment.get('note_id', '') or note_info.get('note_id', '') or note_info.get('id', '')
|
|
|
|
|
+
|
|
|
|
|
+ if not note_id:
|
|
|
|
|
+ continue
|
|
|
|
|
+
|
|
|
|
|
+ if note_id not in work_comments_map:
|
|
|
|
|
+ saved_note = captured_notes.get(note_id, {})
|
|
|
|
|
+ cover_url = ''
|
|
|
|
|
+ cover = note_info.get('cover', {})
|
|
|
|
|
+ if isinstance(cover, dict):
|
|
|
|
|
+ cover_url = cover.get('url', '') or cover.get('url_default', '')
|
|
|
|
|
+ elif isinstance(cover, str):
|
|
|
|
|
+ cover_url = cover
|
|
|
|
|
+ if not cover_url:
|
|
|
|
|
+ cover_url = saved_note.get('cover', '')
|
|
|
|
|
+
|
|
|
|
|
+ work_comments_map[note_id] = {
|
|
|
|
|
+ 'work_id': note_id,
|
|
|
|
|
+ 'title': note_info.get('title', '') or note_info.get('display_title', '') or saved_note.get('title', ''),
|
|
|
|
|
+ 'cover_url': cover_url,
|
|
|
|
|
+ 'comments': []
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ cid = comment.get('id', '') or comment.get('comment_id', '')
|
|
|
|
|
+ if not cid:
|
|
|
|
|
+ continue
|
|
|
|
|
+
|
|
|
|
|
+ user_info = comment.get('user_info', {}) or comment.get('user', {})
|
|
|
|
|
+
|
|
|
|
|
+ work_comments_map[note_id]['comments'].append({
|
|
|
|
|
+ 'comment_id': cid,
|
|
|
|
|
+ 'author_id': user_info.get('user_id', '') or user_info.get('id', ''),
|
|
|
|
|
+ 'author_name': user_info.get('nickname', '') or user_info.get('name', ''),
|
|
|
|
|
+ 'author_avatar': user_info.get('image', '') or user_info.get('avatar', ''),
|
|
|
|
|
+ 'content': comment.get('content', ''),
|
|
|
|
|
+ 'like_count': comment.get('like_count', 0),
|
|
|
|
|
+ 'create_time': comment.get('create_time', ''),
|
|
|
|
|
+ })
|
|
|
|
|
+
|
|
|
|
|
+ all_work_comments = list(work_comments_map.values())
|
|
|
|
|
+ total_comments = sum(len(w['comments']) for w in all_work_comments)
|
|
|
|
|
+ print(f"[{self.platform_name}] 获取到 {len(all_work_comments)} 个作品的 {total_comments} 条评论", flush=True)
|
|
|
|
|
+
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ import traceback
|
|
|
|
|
+ traceback.print_exc()
|
|
|
|
|
+ return {
|
|
|
|
|
+ 'success': False,
|
|
|
|
|
+ 'platform': self.platform_name,
|
|
|
|
|
+ 'error': str(e),
|
|
|
|
|
+ 'work_comments': []
|
|
|
|
|
+ }
|
|
|
|
|
+ finally:
|
|
|
|
|
+ await self.close_browser()
|
|
|
|
|
+
|
|
|
|
|
+ return {
|
|
|
|
|
+ 'success': True,
|
|
|
|
|
+ 'platform': self.platform_name,
|
|
|
|
|
+ 'work_comments': all_work_comments,
|
|
|
|
|
+ 'total': len(all_work_comments)
|
|
|
|
|
+ }
|