|
|
@@ -9,12 +9,16 @@ import asyncio
|
|
|
import os
|
|
|
import sys
|
|
|
import time
|
|
|
+import concurrent.futures
|
|
|
from pathlib import Path
|
|
|
from typing import List
|
|
|
from .base import (
|
|
|
BasePublisher, PublishParams, PublishResult,
|
|
|
WorkItem, WorksResult, CommentItem, CommentsResult
|
|
|
)
|
|
|
+from playwright.async_api import async_playwright
|
|
|
+
|
|
|
+
|
|
|
|
|
|
# 添加 matrix 项目路径,用于导入签名脚本
|
|
|
MATRIX_PATH = Path(__file__).parent.parent.parent.parent / "matrix"
|
|
|
@@ -32,6 +36,8 @@ except ImportError:
|
|
|
# 签名脚本路径
|
|
|
STEALTH_JS_PATH = MATRIX_PATH / "xhs-api" / "js" / "stealth.min.js"
|
|
|
|
|
|
+_xhs_sign_executor = concurrent.futures.ThreadPoolExecutor(max_workers=1)
|
|
|
+
|
|
|
|
|
|
class XiaohongshuPublisher(BasePublisher):
|
|
|
"""
|
|
|
@@ -90,55 +96,19 @@ class XiaohongshuPublisher(BasePublisher):
|
|
|
"""
|
|
|
同步签名函数,供 XhsClient 使用。
|
|
|
|
|
|
- 注意:发布流程运行在 asyncio 事件循环中(通过 asyncio.run 启动),
|
|
|
- 这里如果再调用 asyncio.run 会触发 “asyncio.run() cannot be called from a running event loop”。
|
|
|
- 因此改为使用 sync_playwright 的同步实现(参考 matrix/xhs_uploader)。
|
|
|
+ 注意:发布流程运行在 asyncio 事件循环中(通过 asyncio.run 启动)。
|
|
|
+ XhsClient 以同步方式调用 sign 回调,但我们需要使用 Playwright Async API 进行签名。
|
|
|
+ 因此当处于事件循环中时,将签名逻辑放到独立线程里执行 asyncio.run。
|
|
|
"""
|
|
|
+ def run_async_sign():
|
|
|
+ return asyncio.run(self.get_sign(uri, data=data, a1=a1, web_session=web_session))
|
|
|
+
|
|
|
try:
|
|
|
- from playwright.sync_api import sync_playwright
|
|
|
- except Exception as e:
|
|
|
- raise Exception(f"缺少 playwright 同步接口支持: {e}")
|
|
|
-
|
|
|
- last_exc: Exception | None = None
|
|
|
- for attempt in range(1, 6):
|
|
|
- try:
|
|
|
- with sync_playwright() as playwright:
|
|
|
- browser = playwright.chromium.launch(headless=True)
|
|
|
- context = browser.new_context()
|
|
|
-
|
|
|
- if STEALTH_JS_PATH.exists():
|
|
|
- context.add_init_script(path=str(STEALTH_JS_PATH))
|
|
|
-
|
|
|
- page = context.new_page()
|
|
|
- page.goto("https://www.xiaohongshu.com", wait_until="domcontentloaded", timeout=60000)
|
|
|
-
|
|
|
- if a1:
|
|
|
- context.add_cookies([
|
|
|
- {'name': 'a1', 'value': a1, 'domain': ".xiaohongshu.com", 'path': "/"}
|
|
|
- ])
|
|
|
- page.reload(wait_until="domcontentloaded")
|
|
|
-
|
|
|
- # 参考 matrix:设置完 cookie 后需要稍等,否则可能出现 window._webmsxyw 不存在
|
|
|
- time.sleep(1.5)
|
|
|
-
|
|
|
- encrypt_params = page.evaluate(
|
|
|
- "([url, data]) => window._webmsxyw(url, data)",
|
|
|
- [uri, data]
|
|
|
- )
|
|
|
-
|
|
|
- context.close()
|
|
|
- browser.close()
|
|
|
-
|
|
|
- return {
|
|
|
- "x-s": encrypt_params["X-s"],
|
|
|
- "x-t": str(encrypt_params["X-t"])
|
|
|
- }
|
|
|
- except Exception as e:
|
|
|
- last_exc = e
|
|
|
- # 轻微退避重试
|
|
|
- time.sleep(0.4 * attempt)
|
|
|
-
|
|
|
- raise Exception(f"签名失败: {last_exc}")
|
|
|
+ asyncio.get_running_loop()
|
|
|
+ future = _xhs_sign_executor.submit(run_async_sign)
|
|
|
+ return future.result(timeout=120)
|
|
|
+ except RuntimeError:
|
|
|
+ return run_async_sign()
|
|
|
|
|
|
async def publish_via_api(self, cookies: str, params: PublishParams) -> PublishResult:
|
|
|
"""通过 API 发布视频"""
|
|
|
@@ -157,14 +127,45 @@ class XiaohongshuPublisher(BasePublisher):
|
|
|
|
|
|
self.report_progress(20, "正在上传视频...")
|
|
|
|
|
|
- # 创建客户端
|
|
|
- xhs_client = XhsClient(cookie_string, sign=self.sign_sync)
|
|
|
-
|
|
|
- print(f"[{self.platform_name}] 开始调用 create_video_note...")
|
|
|
-
|
|
|
- # 发布视频
|
|
|
- try:
|
|
|
- result = xhs_client.create_video_note(
|
|
|
+ async def ensure_valid_cookie_for_sdk() -> str | None:
|
|
|
+ await self.init_browser()
|
|
|
+ cookie_list_for_browser = self.parse_cookies(cookie_string)
|
|
|
+ await self.set_cookies(cookie_list_for_browser)
|
|
|
+
|
|
|
+ if not self.page or not self.context:
|
|
|
+ return None
|
|
|
+
|
|
|
+ await self.page.goto("https://creator.xiaohongshu.com/new/home", wait_until="domcontentloaded", timeout=60000)
|
|
|
+ await asyncio.sleep(2)
|
|
|
+
|
|
|
+ current_url = (self.page.url or '').lower()
|
|
|
+ if 'login' in current_url or 'passport' in current_url:
|
|
|
+ if self.headless:
|
|
|
+ return None
|
|
|
+
|
|
|
+ waited = 0
|
|
|
+ while waited < 180:
|
|
|
+ current_url = (self.page.url or '').lower()
|
|
|
+ if 'login' not in current_url and 'passport' not in current_url and 'creator.xiaohongshu.com' in current_url:
|
|
|
+ break
|
|
|
+ await asyncio.sleep(2)
|
|
|
+ waited += 2
|
|
|
+
|
|
|
+ current_url = (self.page.url or '').lower()
|
|
|
+ if 'login' in current_url or 'passport' in current_url:
|
|
|
+ return None
|
|
|
+
|
|
|
+ cookies_after = await self.context.cookies()
|
|
|
+ try:
|
|
|
+ await self.sync_cookies_to_node(cookies_after)
|
|
|
+ except Exception:
|
|
|
+ pass
|
|
|
+ refreshed_cookie_str = self.cookies_to_string(cookies_after)
|
|
|
+ return refreshed_cookie_str or None
|
|
|
+
|
|
|
+ def call_create_video_note(sdk_cookie_str: str):
|
|
|
+ xhs_client = XhsClient(sdk_cookie_str, sign=self.sign_sync)
|
|
|
+ return xhs_client.create_video_note(
|
|
|
title=params.title,
|
|
|
desc=params.description or params.title,
|
|
|
topics=params.tags or [],
|
|
|
@@ -172,12 +173,42 @@ class XiaohongshuPublisher(BasePublisher):
|
|
|
video_path=params.video_path,
|
|
|
cover_path=params.cover_path if params.cover_path and os.path.exists(params.cover_path) else None
|
|
|
)
|
|
|
+
|
|
|
+ print(f"[{self.platform_name}] 开始调用 create_video_note...")
|
|
|
+
|
|
|
+ try:
|
|
|
+ result = call_create_video_note(cookie_string)
|
|
|
print(f"[{self.platform_name}] SDK 返回结果: {result}")
|
|
|
except Exception as e:
|
|
|
- import traceback
|
|
|
- traceback.print_exc()
|
|
|
- print(f"[{self.platform_name}] SDK 调用失败: {e}")
|
|
|
- raise Exception(f"XHS SDK 发布失败: {e}")
|
|
|
+ err_text = str(e)
|
|
|
+ if '无登录信息' in err_text or '"code": -100' in err_text or "'code': -100" in err_text:
|
|
|
+ self.report_progress(15, "登录信息失效,尝试刷新登录信息...")
|
|
|
+ refreshed = await ensure_valid_cookie_for_sdk()
|
|
|
+ if not refreshed:
|
|
|
+ screenshot_base64 = await self.capture_screenshot()
|
|
|
+ page_url = await self.get_page_url() if hasattr(self, 'get_page_url') else (self.page.url if self.page else "")
|
|
|
+ return PublishResult(
|
|
|
+ success=False,
|
|
|
+ platform=self.platform_name,
|
|
|
+ error="登录已过期,请使用有头浏览器重新登录",
|
|
|
+ screenshot_base64=screenshot_base64,
|
|
|
+ page_url=page_url,
|
|
|
+ status='need_captcha',
|
|
|
+ need_captcha=True,
|
|
|
+ captcha_type='login'
|
|
|
+ )
|
|
|
+ try:
|
|
|
+ result = call_create_video_note(refreshed)
|
|
|
+ print(f"[{self.platform_name}] SDK 重试返回结果: {result}")
|
|
|
+ except Exception as e2:
|
|
|
+ import traceback
|
|
|
+ traceback.print_exc()
|
|
|
+ raise Exception(f"XHS SDK 发布失败: {e2}")
|
|
|
+ else:
|
|
|
+ import traceback
|
|
|
+ traceback.print_exc()
|
|
|
+ print(f"[{self.platform_name}] SDK 调用失败: {e}")
|
|
|
+ raise Exception(f"XHS SDK 发布失败: {e}")
|
|
|
|
|
|
# 验证返回结果
|
|
|
if not result:
|
|
|
@@ -243,9 +274,13 @@ class XiaohongshuPublisher(BasePublisher):
|
|
|
# 其他情况尝试 Playwright 方式
|
|
|
print(f"[{self.platform_name}] API 方式未成功,尝试 Playwright...")
|
|
|
except Exception as e:
|
|
|
- import traceback
|
|
|
- traceback.print_exc()
|
|
|
- print(f"[{self.platform_name}] API 发布失败: {e}")
|
|
|
+ err_text = str(e)
|
|
|
+ if '登录已过期' in err_text or '无登录信息' in err_text:
|
|
|
+ print(f"[{self.platform_name}] API 登录失效,切换到 Playwright 方式...", flush=True)
|
|
|
+ else:
|
|
|
+ import traceback
|
|
|
+ traceback.print_exc()
|
|
|
+ print(f"[{self.platform_name}] API 发布失败: {e}")
|
|
|
print(f"[{self.platform_name}] 尝试使用 Playwright 方式...")
|
|
|
|
|
|
# 使用 Playwright 方式发布
|
|
|
@@ -613,6 +648,7 @@ class XiaohongshuPublisher(BasePublisher):
|
|
|
|
|
|
# 等待发布完成(检测 URL 变化或成功提示)
|
|
|
publish_success = False
|
|
|
+ refresh_retry = 0
|
|
|
for i in range(20): # 最多等待 20 秒
|
|
|
await asyncio.sleep(1)
|
|
|
|
|
|
@@ -638,9 +674,47 @@ class XiaohongshuPublisher(BasePublisher):
|
|
|
try:
|
|
|
error_elements = self.page.locator('[class*="error"], .toast-error, [class*="fail"]')
|
|
|
if await error_elements.count() > 0:
|
|
|
- error_text = await error_elements.first.text_content()
|
|
|
- if error_text and len(error_text.strip()) > 0:
|
|
|
- raise Exception(f"发布失败: {error_text.strip()}")
|
|
|
+ first_error = error_elements.first
|
|
|
+ if await first_error.is_visible():
|
|
|
+ error_text = (await first_error.text_content()) or ''
|
|
|
+ error_text = error_text.strip()
|
|
|
+ if error_text:
|
|
|
+ if '请刷新' in error_text and refresh_retry < 3:
|
|
|
+ refresh_retry += 1
|
|
|
+ print(f"[{self.platform_name}] 检测到临时错误: {error_text},尝试刷新并重试发布({refresh_retry}/3)", flush=True)
|
|
|
+ try:
|
|
|
+ await self.page.reload(wait_until="domcontentloaded")
|
|
|
+ except Exception:
|
|
|
+ pass
|
|
|
+ await asyncio.sleep(2)
|
|
|
+ await self.page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
|
|
|
+ await asyncio.sleep(1)
|
|
|
+ republish_clicked = False
|
|
|
+ for selector in publish_selectors:
|
|
|
+ try:
|
|
|
+ btn = self.page.locator(selector).first
|
|
|
+ if await btn.count() > 0 and await btn.is_visible() and await btn.is_enabled():
|
|
|
+ try:
|
|
|
+ await btn.click()
|
|
|
+ except:
|
|
|
+ box = await btn.bounding_box()
|
|
|
+ if box:
|
|
|
+ await self.page.mouse.click(box['x'] + box['width']/2, box['y'] + box['height']/2)
|
|
|
+ republish_clicked = True
|
|
|
+ break
|
|
|
+ except:
|
|
|
+ continue
|
|
|
+ continue
|
|
|
+ screenshot_base64 = await self.capture_screenshot()
|
|
|
+ page_url = await self.get_page_url()
|
|
|
+ return PublishResult(
|
|
|
+ success=False,
|
|
|
+ platform=self.platform_name,
|
|
|
+ error=f"发布失败: {error_text}",
|
|
|
+ screenshot_base64=screenshot_base64,
|
|
|
+ page_url=page_url,
|
|
|
+ status='failed'
|
|
|
+ )
|
|
|
except Exception as e:
|
|
|
if "发布失败" in str(e):
|
|
|
raise
|
|
|
@@ -1554,150 +1628,194 @@ class XiaohongshuPublisher(BasePublisher):
|
|
|
debug_info=debug_info
|
|
|
)
|
|
|
|
|
|
+
|
|
|
async def get_comments(self, cookies: str, work_id: str, cursor: str = "") -> CommentsResult:
|
|
|
- """获取小红书作品评论 - 通过创作者后台评论管理页面"""
|
|
|
- print(f"\n{'='*60}")
|
|
|
- print(f"[{self.platform_name}] 获取作品评论")
|
|
|
- print(f"[{self.platform_name}] work_id={work_id}, cursor={cursor}")
|
|
|
- print(f"{'='*60}")
|
|
|
-
|
|
|
- comments: List[CommentItem] = []
|
|
|
- total = 0
|
|
|
+ """
|
|
|
+ 获取账号下所有作品的评论 —— 完全复刻 get_xiaohongshu_work_comments.py 的7步流程。
|
|
|
+ """
|
|
|
+ all_comments: List[CommentItem] = []
|
|
|
+ total_comments = 0
|
|
|
has_more = False
|
|
|
- next_cursor = ""
|
|
|
- captured_data = {}
|
|
|
-
|
|
|
+ browser = None
|
|
|
+ print(222222222222222222222222222222222222)
|
|
|
+ print(work_id)
|
|
|
+
|
|
|
try:
|
|
|
- await self.init_browser()
|
|
|
+ # --- Step 1: 初始化浏览器和 Cookie ---
|
|
|
cookie_list = self.parse_cookies(cookies)
|
|
|
- await self.set_cookies(cookie_list)
|
|
|
-
|
|
|
- if not self.page:
|
|
|
- raise Exception("Page not initialized")
|
|
|
-
|
|
|
- # 设置 API 响应监听器
|
|
|
- async def handle_response(response):
|
|
|
- nonlocal captured_data
|
|
|
- url = response.url
|
|
|
- # 监听评论相关 API - 创作者后台和普通页面的 API
|
|
|
- if '/comment/' in url and ('page' in url or 'list' in url):
|
|
|
- try:
|
|
|
- json_data = await response.json()
|
|
|
- print(f"[{self.platform_name}] 捕获到评论 API: {url[:100]}...", flush=True)
|
|
|
- if json_data.get('success') or json_data.get('code') == 0:
|
|
|
- data = json_data.get('data', {})
|
|
|
- comment_list = data.get('comments') or data.get('list') or []
|
|
|
- if comment_list:
|
|
|
- captured_data = json_data
|
|
|
- print(f"[{self.platform_name}] 评论 API 响应成功,comments={len(comment_list)}", flush=True)
|
|
|
- else:
|
|
|
- print(f"[{self.platform_name}] 评论 API 响应成功但无评论", flush=True)
|
|
|
- except Exception as e:
|
|
|
- print(f"[{self.platform_name}] 解析评论响应失败: {e}", flush=True)
|
|
|
-
|
|
|
- self.page.on('response', handle_response)
|
|
|
- print(f"[{self.platform_name}] 已注册评论 API 响应监听器", flush=True)
|
|
|
-
|
|
|
- # 访问创作者后台评论管理页面
|
|
|
- comment_url = "https://creator.xiaohongshu.com/creator/comment"
|
|
|
- print(f"[{self.platform_name}] 访问评论管理页面: {comment_url}", flush=True)
|
|
|
- await self.page.goto(comment_url, wait_until="domcontentloaded", timeout=30000)
|
|
|
- await asyncio.sleep(5)
|
|
|
-
|
|
|
- # 检查是否被重定向到登录页
|
|
|
- current_url = self.page.url
|
|
|
- print(f"[{self.platform_name}] 当前页面 URL: {current_url}", flush=True)
|
|
|
- if "login" in current_url:
|
|
|
- raise Exception("Cookie 已过期,请重新登录")
|
|
|
-
|
|
|
- # 等待评论加载
|
|
|
- if not captured_data:
|
|
|
- print(f"[{self.platform_name}] 等待评论 API 响应...", flush=True)
|
|
|
- # 尝试滚动页面触发评论加载
|
|
|
- await self.page.evaluate('window.scrollBy(0, 500)')
|
|
|
- await asyncio.sleep(3)
|
|
|
-
|
|
|
- if not captured_data:
|
|
|
- # 再等待一会,可能评论 API 加载较慢
|
|
|
- print(f"[{self.platform_name}] 继续等待评论加载...", flush=True)
|
|
|
- await asyncio.sleep(5)
|
|
|
-
|
|
|
- # 移除监听器
|
|
|
- self.page.remove_listener('response', handle_response)
|
|
|
-
|
|
|
- # 解析评论数据
|
|
|
- if captured_data:
|
|
|
- data = captured_data.get('data', {})
|
|
|
- comment_list = data.get('comments') or data.get('list') or []
|
|
|
- has_more = data.get('has_more', False)
|
|
|
- next_cursor = data.get('cursor', '')
|
|
|
-
|
|
|
- print(f"[{self.platform_name}] 解析评论: has_more={has_more}, comments={len(comment_list)}", flush=True)
|
|
|
-
|
|
|
- for comment in comment_list:
|
|
|
- cid = comment.get('id', '')
|
|
|
- if not cid:
|
|
|
+ playwright = await async_playwright().start()
|
|
|
+ browser = await playwright.chromium.launch(headless=False)
|
|
|
+ context = await browser.new_context(
|
|
|
+ viewport={"width": 1400, "height": 900},
|
|
|
+ user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
|
|
+ )
|
|
|
+ await context.add_cookies(cookie_list)
|
|
|
+ page = await context.new_page()
|
|
|
+
|
|
|
+ # --- Step 2: 打开小红书主页 ---
|
|
|
+ await page.goto("https://www.xiaohongshu.com", wait_until="domcontentloaded")
|
|
|
+ await asyncio.sleep(1.5)
|
|
|
+
|
|
|
+ # --- Step 3: 检查并处理登录弹窗 ---
|
|
|
+ try:
|
|
|
+ if await page.is_visible(".login-container", timeout=3000):
|
|
|
+ await page.wait_for_selector(".login-container", state="hidden", timeout=120000)
|
|
|
+ except Exception as e:
|
|
|
+ pass # 忽略超时,继续执行
|
|
|
+
|
|
|
+ # --- 提取 User ID ---
|
|
|
+ user_id = None
|
|
|
+ for cookie in cookie_list:
|
|
|
+ if cookie.get('name') == 'x-user-id-creator.xiaohongshu.com':
|
|
|
+ user_id = cookie.get('value')
|
|
|
+ break
|
|
|
+ if not user_id:
|
|
|
+ raise ValueError("无法从 Cookie 中提取 user_id")
|
|
|
+
|
|
|
+ # --- Step 4: 跳转到用户主页 ---
|
|
|
+ profile_url = f"https://www.xiaohongshu.com/user/profile/{user_id}"
|
|
|
+ await page.goto(profile_url, wait_until="domcontentloaded")
|
|
|
+ await asyncio.sleep(2)
|
|
|
+
|
|
|
+ # --- 等待笔记区域加载 ---
|
|
|
+ try:
|
|
|
+ await page.wait_for_selector("#userPostedFeeds .note-item", timeout=20000)
|
|
|
+ except:
|
|
|
+ raise Exception("笔记区域未加载,请检查账号是否公开或 Cookie 是否有效")
|
|
|
+
|
|
|
+ # --- Step 5: 滚动到底部加载全部笔记 ---
|
|
|
+ last_height = None
|
|
|
+ while True:
|
|
|
+ await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
|
|
|
+ await asyncio.sleep(2)
|
|
|
+ new_height = await page.evaluate("document.body.scrollHeight")
|
|
|
+ if new_height == last_height:
|
|
|
+ break
|
|
|
+ last_height = new_height
|
|
|
+
|
|
|
+ # --- 获取所有封面图 ---
|
|
|
+ note_imgs = await page.query_selector_all("#userPostedFeeds .note-item .cover img")
|
|
|
+ print(f"共找到 {len(note_imgs)} 张封面图")
|
|
|
+
|
|
|
+ # --- Step 6 & 7: 依次点击封面图,捕获评论并结构化 ---
|
|
|
+ for i, img in enumerate(note_imgs):
|
|
|
+ try:
|
|
|
+ await img.scroll_into_view_if_needed()
|
|
|
+ await asyncio.sleep(0.5)
|
|
|
+
|
|
|
+ comment_resp = None
|
|
|
+ def handle_response(response):
|
|
|
+ nonlocal comment_resp
|
|
|
+ if "edith.xiaohongshu.com/api/sns/web/v2/comment/page" in response.url:
|
|
|
+ comment_resp = response
|
|
|
+
|
|
|
+ page.on("response", handle_response)
|
|
|
+ await img.click()
|
|
|
+ await asyncio.sleep(1.5)
|
|
|
+ page.remove_listener("response", handle_response)
|
|
|
+
|
|
|
+ if not comment_resp:
|
|
|
+ await page.keyboard.press("Escape")
|
|
|
continue
|
|
|
-
|
|
|
- user_info = comment.get('user_info', {})
|
|
|
-
|
|
|
- # 解析子评论
|
|
|
- replies = []
|
|
|
- sub_comments = comment.get('sub_comments', []) or []
|
|
|
- for sub in sub_comments:
|
|
|
- sub_user = sub.get('user_info', {})
|
|
|
- replies.append(CommentItem(
|
|
|
- comment_id=sub.get('id', ''),
|
|
|
+
|
|
|
+ json_data = await comment_resp.json()
|
|
|
+ if not (json_data.get("success") or json_data.get("code") == 0):
|
|
|
+ await page.keyboard.press("Escape")
|
|
|
+ continue
|
|
|
+
|
|
|
+ data = json_data.get("data", {})
|
|
|
+ raw_comments = data.get("comments", [])
|
|
|
+ note_id = data.get("note_id", "")
|
|
|
+
|
|
|
+ for main_cmt in raw_comments:
|
|
|
+ # 主评论
|
|
|
+ user_info = main_cmt.get("user_info", {})
|
|
|
+ all_comments.append(CommentItem(
|
|
|
+ comment_id=main_cmt["id"],
|
|
|
+ parent_comment_id=None,
|
|
|
work_id=work_id,
|
|
|
- content=sub.get('content', ''),
|
|
|
- author_id=sub_user.get('user_id', ''),
|
|
|
- author_name=sub_user.get('nickname', ''),
|
|
|
- author_avatar=sub_user.get('image', ''),
|
|
|
- like_count=sub.get('like_count', 0),
|
|
|
- create_time=sub.get('create_time', ''),
|
|
|
- ))
|
|
|
-
|
|
|
- comments.append(CommentItem(
|
|
|
- comment_id=cid,
|
|
|
- work_id=work_id,
|
|
|
- content=comment.get('content', ''),
|
|
|
- author_id=user_info.get('user_id', ''),
|
|
|
- author_name=user_info.get('nickname', ''),
|
|
|
- author_avatar=user_info.get('image', ''),
|
|
|
- like_count=comment.get('like_count', 0),
|
|
|
- reply_count=comment.get('sub_comment_count', 0),
|
|
|
- create_time=comment.get('create_time', ''),
|
|
|
- replies=replies,
|
|
|
- ))
|
|
|
-
|
|
|
- total = len(comments)
|
|
|
- print(f"[{self.platform_name}] 解析到 {total} 条评论", flush=True)
|
|
|
- else:
|
|
|
- print(f"[{self.platform_name}] 未捕获到评论 API 响应", flush=True)
|
|
|
-
|
|
|
+ content=main_cmt["content"],
|
|
|
+ author_id=user_info.get("user_id", ""),
|
|
|
+ author_name=user_info.get("nickname", ""),
|
|
|
+ author_avatar=user_info.get("image", ""),
|
|
|
+ like_count=int(main_cmt.get("like_count", 0)),
|
|
|
+ reply_count=main_cmt.get("sub_comment_count", 0),
|
|
|
+ create_time=self._timestamp_to_readable(main_cmt.get("create_time", 0)),
|
|
|
+ ))
|
|
|
+
|
|
|
+ # 子评论
|
|
|
+ for sub_cmt in main_cmt.get("sub_comments", []):
|
|
|
+ sub_user = sub_cmt.get("user_info", {})
|
|
|
+ all_comments.append(CommentItem(
|
|
|
+ comment_id=sub_cmt["id"],
|
|
|
+ parent_comment_id=main_cmt["id"],
|
|
|
+ work_id=work_id,
|
|
|
+ content=sub_cmt["content"],
|
|
|
+ author_id=sub_user.get("user_id", ""),
|
|
|
+ author_name=sub_user.get("nickname", ""),
|
|
|
+ author_avatar=sub_user.get("image", ""),
|
|
|
+ like_count=int(sub_cmt.get("like_count", 0)),
|
|
|
+ reply_count=0,
|
|
|
+ create_time=self._timestamp_to_readable(sub_cmt.get("create_time", 0)),
|
|
|
+ ))
|
|
|
+
|
|
|
+ # 关闭弹窗
|
|
|
+ await page.keyboard.press("Escape")
|
|
|
+ await asyncio.sleep(1)
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ # 出错也尝试关闭弹窗
|
|
|
+ try:
|
|
|
+ await page.keyboard.press("Escape")
|
|
|
+ await asyncio.sleep(0.5)
|
|
|
+ except:
|
|
|
+ pass
|
|
|
+ continue
|
|
|
+
|
|
|
+ # --- 返回结果 ---
|
|
|
+ total_comments = len(all_comments)
|
|
|
+ # return {
|
|
|
+ # 'success': True,
|
|
|
+ # 'platform': self.platform_name,
|
|
|
+ # 'work_comments': all_comments, # 注意:此处为扁平列表,如需按作品分组可在外层处理
|
|
|
+ # 'total': total_comments
|
|
|
+ # }
|
|
|
+ return CommentsResult(
|
|
|
+ success=True,
|
|
|
+ platform=self.platform_name,
|
|
|
+ work_id=work_id,
|
|
|
+ comments=all_comments,
|
|
|
+ total=total_comments,
|
|
|
+ has_more=has_more
|
|
|
+ )
|
|
|
+
|
|
|
+
|
|
|
except Exception as e:
|
|
|
import traceback
|
|
|
traceback.print_exc()
|
|
|
return CommentsResult(
|
|
|
- success=False,
|
|
|
- platform=self.platform_name,
|
|
|
- work_id=work_id,
|
|
|
- error=str(e)
|
|
|
+ success=True,
|
|
|
+ platform=self.platform_name,
|
|
|
+ work_id=work_id,
|
|
|
+ total=0
|
|
|
)
|
|
|
finally:
|
|
|
- await self.close_browser()
|
|
|
-
|
|
|
- result = CommentsResult(
|
|
|
- success=True,
|
|
|
- platform=self.platform_name,
|
|
|
- work_id=work_id,
|
|
|
- comments=comments,
|
|
|
- total=total,
|
|
|
- has_more=has_more
|
|
|
- )
|
|
|
- result.__dict__['cursor'] = next_cursor
|
|
|
- return result
|
|
|
-
|
|
|
+ if browser:
|
|
|
+ await browser.close()
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ def _timestamp_to_readable(self, ts_ms: int) -> str:
|
|
|
+ """将毫秒时间戳转换为可读格式"""
|
|
|
+ from datetime import datetime
|
|
|
+ if not ts_ms:
|
|
|
+ return ""
|
|
|
+ try:
|
|
|
+ return datetime.fromtimestamp(ts_ms / 1000).strftime("%Y-%m-%d %H:%M:%S")
|
|
|
+ except Exception:
|
|
|
+ return ""
|
|
|
+
|
|
|
+
|
|
|
async def get_all_comments(self, cookies: str) -> dict:
|
|
|
"""获取所有作品的评论 - 通过评论管理页面"""
|
|
|
print(f"\n{'='*60}")
|