douyin.py 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682
  1. # -*- coding: utf-8 -*-
  2. """
  3. 抖音视频发布器
  4. 参考: matrix/douyin_uploader/main.py
  5. """
  6. import asyncio
  7. import os
  8. import json
  9. from datetime import datetime
  10. from typing import List
  11. from .base import (
  12. BasePublisher, PublishParams, PublishResult,
  13. WorkItem, WorksResult, CommentItem, CommentsResult
  14. )
  15. class DouyinPublisher(BasePublisher):
  16. """
  17. 抖音视频发布器
  18. 使用 Playwright 自动化操作抖音创作者中心
  19. """
  20. platform_name = "douyin"
  21. login_url = "https://creator.douyin.com/"
  22. publish_url = "https://creator.douyin.com/creator-micro/content/upload"
  23. cookie_domain = ".douyin.com"
  24. async def set_schedule_time(self, publish_date: datetime):
  25. """设置定时发布"""
  26. if not self.page:
  27. return
  28. # 选择定时发布
  29. label_element = self.page.locator("label.radio-d4zkru:has-text('定时发布')")
  30. await label_element.click()
  31. await asyncio.sleep(1)
  32. # 输入时间
  33. publish_date_str = publish_date.strftime("%Y-%m-%d %H:%M")
  34. await self.page.locator('.semi-input[placeholder="日期和时间"]').click()
  35. await self.page.keyboard.press("Control+KeyA")
  36. await self.page.keyboard.type(str(publish_date_str))
  37. await self.page.keyboard.press("Enter")
  38. await asyncio.sleep(1)
  39. async def handle_upload_error(self, video_path: str):
  40. """处理上传错误,重新上传"""
  41. if not self.page:
  42. return
  43. print(f"[{self.platform_name}] 视频出错了,重新上传中...")
  44. await self.page.locator('div.progress-div [class^="upload-btn-input"]').set_input_files(video_path)
  45. async def publish(self, cookies: str, params: PublishParams) -> PublishResult:
  46. """发布视频到抖音"""
  47. print(f"\n{'='*60}")
  48. print(f"[{self.platform_name}] 开始发布视频")
  49. print(f"[{self.platform_name}] 视频路径: {params.video_path}")
  50. print(f"[{self.platform_name}] 标题: {params.title}")
  51. print(f"[{self.platform_name}] Headless: {self.headless}")
  52. print(f"{'='*60}")
  53. self.report_progress(5, "正在初始化浏览器...")
  54. # 初始化浏览器
  55. await self.init_browser()
  56. print(f"[{self.platform_name}] 浏览器初始化完成")
  57. # 解析并设置 cookies
  58. cookie_list = self.parse_cookies(cookies)
  59. print(f"[{self.platform_name}] 解析到 {len(cookie_list)} 个 cookies")
  60. await self.set_cookies(cookie_list)
  61. if not self.page:
  62. raise Exception("Page not initialized")
  63. # 检查视频文件
  64. if not os.path.exists(params.video_path):
  65. raise Exception(f"视频文件不存在: {params.video_path}")
  66. print(f"[{self.platform_name}] 视频文件存在,大小: {os.path.getsize(params.video_path)} bytes")
  67. self.report_progress(10, "正在打开上传页面...")
  68. # 访问上传页面
  69. await self.page.goto(self.publish_url)
  70. await self.page.wait_for_url(self.publish_url, timeout=30000)
  71. self.report_progress(15, "正在选择视频文件...")
  72. # 点击上传区域
  73. upload_div = self.page.locator("div[class*='container-drag']").first
  74. async with self.page.expect_file_chooser() as fc_info:
  75. await upload_div.click()
  76. file_chooser = await fc_info.value
  77. await file_chooser.set_files(params.video_path)
  78. # 等待跳转到发布页面
  79. self.report_progress(20, "等待进入发布页面...")
  80. for _ in range(60):
  81. try:
  82. await self.page.wait_for_url(
  83. "https://creator.douyin.com/creator-micro/content/post/video*",
  84. timeout=2000
  85. )
  86. break
  87. except:
  88. await asyncio.sleep(1)
  89. await asyncio.sleep(2)
  90. self.report_progress(30, "正在填充标题和话题...")
  91. # 填写标题
  92. title_input = self.page.get_by_text('作品标题').locator("..").locator(
  93. "xpath=following-sibling::div[1]").locator("input")
  94. if await title_input.count():
  95. await title_input.fill(params.title[:30])
  96. else:
  97. # 备用方式
  98. title_container = self.page.locator(".notranslate")
  99. await title_container.click()
  100. await self.page.keyboard.press("Control+KeyA")
  101. await self.page.keyboard.press("Delete")
  102. await self.page.keyboard.type(params.title)
  103. await self.page.keyboard.press("Enter")
  104. # 添加话题标签
  105. if params.tags:
  106. css_selector = ".zone-container"
  107. for tag in params.tags:
  108. print(f"[{self.platform_name}] 添加话题: #{tag}")
  109. await self.page.type(css_selector, "#" + tag)
  110. await self.page.press(css_selector, "Space")
  111. self.report_progress(40, "等待视频上传完成...")
  112. # 等待视频上传完成
  113. for _ in range(120):
  114. try:
  115. count = await self.page.locator("div").filter(has_text="重新上传").count()
  116. if count > 0:
  117. print(f"[{self.platform_name}] 视频上传完毕")
  118. break
  119. # 检查上传错误
  120. if await self.page.locator('div.progress-div > div:has-text("上传失败")').count():
  121. await self.handle_upload_error(params.video_path)
  122. await asyncio.sleep(3)
  123. except:
  124. await asyncio.sleep(3)
  125. self.report_progress(60, "处理视频设置...")
  126. # 关闭弹窗
  127. known_btn = self.page.get_by_role("button", name="我知道了")
  128. if await known_btn.count() > 0:
  129. await known_btn.first.click()
  130. await asyncio.sleep(2)
  131. # 设置位置
  132. try:
  133. await self.page.locator('div.semi-select span:has-text("输入地理位置")').click()
  134. await asyncio.sleep(1)
  135. await self.page.keyboard.press("Control+KeyA")
  136. await self.page.keyboard.press("Delete")
  137. await self.page.keyboard.type(params.location)
  138. await asyncio.sleep(1)
  139. await self.page.locator('div[role="listbox"] [role="option"]').first.click()
  140. except Exception as e:
  141. print(f"[{self.platform_name}] 设置位置失败: {e}")
  142. # 开启头条/西瓜同步
  143. try:
  144. third_part_element = '[class^="info"] > [class^="first-part"] div div.semi-switch'
  145. if await self.page.locator(third_part_element).count():
  146. class_name = await self.page.eval_on_selector(
  147. third_part_element, 'div => div.className')
  148. if 'semi-switch-checked' not in class_name:
  149. await self.page.locator(third_part_element).locator(
  150. 'input.semi-switch-native-control').click()
  151. except:
  152. pass
  153. # 定时发布
  154. if params.publish_date:
  155. self.report_progress(70, "设置定时发布...")
  156. await self.set_schedule_time(params.publish_date)
  157. self.report_progress(80, "正在发布...")
  158. print(f"[{self.platform_name}] 查找发布按钮...")
  159. # 点击发布
  160. publish_clicked = False
  161. for i in range(30):
  162. try:
  163. publish_btn = self.page.get_by_role('button', name="发布", exact=True)
  164. btn_count = await publish_btn.count()
  165. print(f"[{self.platform_name}] 发布按钮数量: {btn_count}")
  166. if btn_count > 0:
  167. print(f"[{self.platform_name}] 点击发布按钮...")
  168. await publish_btn.click()
  169. publish_clicked = True
  170. await self.page.wait_for_url(
  171. "https://creator.douyin.com/creator-micro/content/manage",
  172. timeout=5000
  173. )
  174. self.report_progress(100, "发布成功")
  175. print(f"[{self.platform_name}] 发布成功! 已跳转到内容管理页面")
  176. return PublishResult(
  177. success=True,
  178. platform=self.platform_name,
  179. message="发布成功"
  180. )
  181. except Exception as e:
  182. current_url = self.page.url
  183. print(f"[{self.platform_name}] 尝试 {i+1}/30, 当前URL: {current_url}")
  184. if "content/manage" in current_url:
  185. self.report_progress(100, "发布成功")
  186. print(f"[{self.platform_name}] 发布成功! 已在内容管理页面")
  187. return PublishResult(
  188. success=True,
  189. platform=self.platform_name,
  190. message="发布成功"
  191. )
  192. # 检查是否有错误提示
  193. try:
  194. error_toast = self.page.locator('[class*="toast"][class*="error"], [class*="error-tip"]')
  195. if await error_toast.count() > 0:
  196. error_text = await error_toast.first.text_content()
  197. if error_text:
  198. print(f"[{self.platform_name}] 检测到错误提示: {error_text}")
  199. raise Exception(f"发布失败: {error_text}")
  200. except:
  201. pass
  202. await asyncio.sleep(1)
  203. # 发布超时,保存截图
  204. screenshot_path = f"debug_publish_timeout_{self.platform_name}.png"
  205. await self.page.screenshot(path=screenshot_path, full_page=True)
  206. print(f"[{self.platform_name}] 发布超时,截图保存到: {screenshot_path}")
  207. raise Exception(f"发布超时(截图: {screenshot_path})")
  208. async def get_works(self, cookies: str, page: int = 0, page_size: int = 20) -> WorksResult:
  209. """获取抖音作品列表"""
  210. print(f"\n{'='*60}")
  211. print(f"[{self.platform_name}] 获取作品列表")
  212. print(f"[{self.platform_name}] page={page}, page_size={page_size}")
  213. print(f"{'='*60}")
  214. works: List[WorkItem] = []
  215. total = 0
  216. has_more = False
  217. try:
  218. await self.init_browser()
  219. cookie_list = self.parse_cookies(cookies)
  220. await self.set_cookies(cookie_list)
  221. if not self.page:
  222. raise Exception("Page not initialized")
  223. # 访问创作者中心首页以触发登录验证
  224. await self.page.goto("https://creator.douyin.com/creator-micro/home")
  225. await asyncio.sleep(3)
  226. # 检查登录状态
  227. current_url = self.page.url
  228. if "login" in current_url or "passport" in current_url:
  229. raise Exception("Cookie 已过期,请重新登录")
  230. # 调用作品列表 API
  231. cursor = page * page_size
  232. api_url = f"https://creator.douyin.com/janus/douyin/creator/pc/work_list?scene=star_atlas&device_platform=android&count={page_size}&max_cursor={cursor}&cookie_enabled=true&browser_language=zh-CN&browser_platform=Win32&browser_name=Mozilla&browser_online=true&timezone_name=Asia%2FShanghai&aid=1128"
  233. response = await self.page.evaluate(f'''
  234. async () => {{
  235. const resp = await fetch("{api_url}", {{
  236. credentials: 'include',
  237. headers: {{ 'Accept': 'application/json' }}
  238. }});
  239. return await resp.json();
  240. }}
  241. ''')
  242. print(f"[{self.platform_name}] API 响应: has_more={response.get('has_more')}, aweme_list={len(response.get('aweme_list', []))}")
  243. aweme_list = response.get('aweme_list', [])
  244. has_more = response.get('has_more', False)
  245. for aweme in aweme_list:
  246. aweme_id = str(aweme.get('aweme_id', ''))
  247. if not aweme_id:
  248. continue
  249. statistics = aweme.get('statistics', {})
  250. # 获取封面
  251. cover_url = ''
  252. if aweme.get('Cover', {}).get('url_list'):
  253. cover_url = aweme['Cover']['url_list'][0]
  254. elif aweme.get('video', {}).get('cover', {}).get('url_list'):
  255. cover_url = aweme['video']['cover']['url_list'][0]
  256. # 获取标题
  257. title = aweme.get('item_title', '') or aweme.get('desc', '').split('\n')[0][:50] or '无标题'
  258. # 获取时长(毫秒转秒)
  259. duration = aweme.get('video', {}).get('duration', 0) // 1000
  260. # 获取发布时间
  261. create_time = aweme.get('create_time', 0)
  262. publish_time = datetime.fromtimestamp(create_time).strftime('%Y-%m-%d %H:%M:%S') if create_time else ''
  263. works.append(WorkItem(
  264. work_id=aweme_id,
  265. title=title,
  266. cover_url=cover_url,
  267. duration=duration,
  268. status='published',
  269. publish_time=publish_time,
  270. play_count=int(statistics.get('play_count', 0)),
  271. like_count=int(statistics.get('digg_count', 0)),
  272. comment_count=int(statistics.get('comment_count', 0)),
  273. share_count=int(statistics.get('share_count', 0)),
  274. ))
  275. total = len(works)
  276. print(f"[{self.platform_name}] 获取到 {total} 个作品")
  277. except Exception as e:
  278. import traceback
  279. traceback.print_exc()
  280. return WorksResult(
  281. success=False,
  282. platform=self.platform_name,
  283. error=str(e)
  284. )
  285. return WorksResult(
  286. success=True,
  287. platform=self.platform_name,
  288. works=works,
  289. total=total,
  290. has_more=has_more
  291. )
  292. async def get_comments(self, cookies: str, work_id: str, cursor: str = "") -> CommentsResult:
  293. """获取抖音作品评论 - 通过访问视频详情页拦截评论 API"""
  294. print(f"\n{'='*60}")
  295. print(f"[{self.platform_name}] 获取作品评论")
  296. print(f"[{self.platform_name}] work_id={work_id}, cursor={cursor}")
  297. print(f"{'='*60}")
  298. comments: List[CommentItem] = []
  299. total = 0
  300. has_more = False
  301. next_cursor = ""
  302. captured_data = {}
  303. try:
  304. await self.init_browser()
  305. cookie_list = self.parse_cookies(cookies)
  306. await self.set_cookies(cookie_list)
  307. if not self.page:
  308. raise Exception("Page not initialized")
  309. # 设置 API 响应监听器
  310. async def handle_response(response):
  311. nonlocal captured_data
  312. url = response.url
  313. # 监听评论列表 API - 抖音视频页面使用的 API
  314. # /aweme/v1/web/comment/list/ 或 /comment/list/
  315. if '/comment/list' in url and ('aweme_id' in url or work_id in url):
  316. try:
  317. json_data = await response.json()
  318. print(f"[{self.platform_name}] 捕获到评论 API: {url[:100]}...", flush=True)
  319. # 检查响应是否成功
  320. if json_data.get('status_code') == 0 or json_data.get('comments'):
  321. captured_data = json_data
  322. comment_count = len(json_data.get('comments', []))
  323. print(f"[{self.platform_name}] 评论 API 响应成功: comments={comment_count}, has_more={json_data.get('has_more')}", flush=True)
  324. except Exception as e:
  325. print(f"[{self.platform_name}] 解析评论响应失败: {e}", flush=True)
  326. self.page.on('response', handle_response)
  327. print(f"[{self.platform_name}] 已注册评论 API 响应监听器", flush=True)
  328. # 访问视频详情页 - 这会自动触发评论 API 请求
  329. video_url = f"https://www.douyin.com/video/{work_id}"
  330. print(f"[{self.platform_name}] 访问视频详情页: {video_url}", flush=True)
  331. await self.page.goto(video_url, wait_until="domcontentloaded", timeout=30000)
  332. await asyncio.sleep(5)
  333. # 检查登录状态
  334. current_url = self.page.url
  335. if "login" in current_url or "passport" in current_url:
  336. raise Exception("Cookie 已过期,请重新登录")
  337. # 等待评论加载
  338. if not captured_data:
  339. print(f"[{self.platform_name}] 等待评论 API 响应...", flush=True)
  340. # 尝试滚动页面触发评论加载
  341. await self.page.evaluate('window.scrollBy(0, 300)')
  342. await asyncio.sleep(3)
  343. if not captured_data:
  344. # 再等待一会
  345. await asyncio.sleep(3)
  346. # 移除监听器
  347. self.page.remove_listener('response', handle_response)
  348. # 解析评论数据
  349. if captured_data:
  350. comment_list = captured_data.get('comments') or []
  351. has_more = captured_data.get('has_more', False) or captured_data.get('has_more', 0) == 1
  352. next_cursor = str(captured_data.get('cursor', ''))
  353. total = captured_data.get('total', 0) or len(comment_list)
  354. print(f"[{self.platform_name}] 解析评论: total={total}, has_more={has_more}, comments={len(comment_list)}", flush=True)
  355. for comment in comment_list:
  356. cid = str(comment.get('cid', ''))
  357. if not cid:
  358. continue
  359. user = comment.get('user', {})
  360. # 解析回复列表
  361. replies = []
  362. reply_list = comment.get('reply_comment', []) or []
  363. for reply in reply_list:
  364. reply_user = reply.get('user', {})
  365. replies.append(CommentItem(
  366. comment_id=str(reply.get('cid', '')),
  367. work_id=work_id,
  368. content=reply.get('text', ''),
  369. author_id=str(reply_user.get('uid', '')),
  370. author_name=reply_user.get('nickname', ''),
  371. author_avatar=reply_user.get('avatar_thumb', {}).get('url_list', [''])[0] if reply_user.get('avatar_thumb') else '',
  372. like_count=int(reply.get('digg_count', 0)),
  373. create_time=datetime.fromtimestamp(reply.get('create_time', 0)).strftime('%Y-%m-%d %H:%M:%S') if reply.get('create_time') else '',
  374. is_author=reply.get('is_author', False),
  375. ))
  376. comments.append(CommentItem(
  377. comment_id=cid,
  378. work_id=work_id,
  379. content=comment.get('text', ''),
  380. author_id=str(user.get('uid', '')),
  381. author_name=user.get('nickname', ''),
  382. author_avatar=user.get('avatar_thumb', {}).get('url_list', [''])[0] if user.get('avatar_thumb') else '',
  383. like_count=int(comment.get('digg_count', 0)),
  384. reply_count=int(comment.get('reply_comment_total', 0)),
  385. create_time=datetime.fromtimestamp(comment.get('create_time', 0)).strftime('%Y-%m-%d %H:%M:%S') if comment.get('create_time') else '',
  386. is_author=comment.get('is_author', False),
  387. replies=replies,
  388. ))
  389. print(f"[{self.platform_name}] 解析到 {len(comments)} 条评论", flush=True)
  390. else:
  391. print(f"[{self.platform_name}] 未捕获到评论 API 响应", flush=True)
  392. except Exception as e:
  393. import traceback
  394. traceback.print_exc()
  395. return CommentsResult(
  396. success=False,
  397. platform=self.platform_name,
  398. work_id=work_id,
  399. error=str(e)
  400. )
  401. finally:
  402. await self.close_browser()
  403. result = CommentsResult(
  404. success=True,
  405. platform=self.platform_name,
  406. work_id=work_id,
  407. comments=comments,
  408. total=total,
  409. has_more=has_more
  410. )
  411. result.__dict__['cursor'] = next_cursor
  412. return result
  413. async def get_all_comments(self, cookies: str) -> dict:
  414. """获取所有作品的评论 - 通过评论管理页面"""
  415. print(f"\n{'='*60}")
  416. print(f"[{self.platform_name}] 获取所有作品评论")
  417. print(f"{'='*60}")
  418. all_work_comments = []
  419. captured_comments = []
  420. captured_works = {} # work_id -> work_info
  421. try:
  422. await self.init_browser()
  423. cookie_list = self.parse_cookies(cookies)
  424. await self.set_cookies(cookie_list)
  425. if not self.page:
  426. raise Exception("Page not initialized")
  427. # 设置 API 响应监听器
  428. async def handle_response(response):
  429. nonlocal captured_comments, captured_works
  430. url = response.url
  431. try:
  432. # 监听评论列表 API - 多种格式
  433. # /comment/list/select/ 或 /comment/read 或 /creator/comment/list
  434. if '/comment/list' in url or '/comment/read' in url or 'comment_list' in url:
  435. json_data = await response.json()
  436. print(f"[{self.platform_name}] 捕获到评论 API: {url[:100]}...", flush=True)
  437. # 格式1: comments 字段
  438. comments = json_data.get('comments', [])
  439. # 格式2: comment_info_list 字段
  440. if not comments:
  441. comments = json_data.get('comment_info_list', [])
  442. if comments:
  443. # 从 URL 中提取 aweme_id
  444. import re
  445. aweme_id_match = re.search(r'aweme_id=(\d+)', url)
  446. aweme_id = aweme_id_match.group(1) if aweme_id_match else ''
  447. for comment in comments:
  448. # 添加 aweme_id 到评论中
  449. if aweme_id and 'aweme_id' not in comment:
  450. comment['aweme_id'] = aweme_id
  451. captured_comments.append(comment)
  452. print(f"[{self.platform_name}] 捕获到 {len(comments)} 条评论 (aweme_id={aweme_id}),总计: {len(captured_comments)}", flush=True)
  453. # 监听作品列表 API
  454. if '/work_list' in url or '/item/list' in url or '/creator/item' in url:
  455. json_data = await response.json()
  456. aweme_list = json_data.get('aweme_list', []) or json_data.get('item_info_list', []) or json_data.get('item_list', [])
  457. print(f"[{self.platform_name}] 捕获到作品列表 API: {len(aweme_list)} 个作品", flush=True)
  458. for aweme in aweme_list:
  459. aweme_id = str(aweme.get('aweme_id', '') or aweme.get('item_id', '') or aweme.get('item_id_plain', ''))
  460. if aweme_id:
  461. cover_url = ''
  462. if aweme.get('Cover', {}).get('url_list'):
  463. cover_url = aweme['Cover']['url_list'][0]
  464. elif aweme.get('video', {}).get('cover', {}).get('url_list'):
  465. cover_url = aweme['video']['cover']['url_list'][0]
  466. elif aweme.get('cover_image_url'):
  467. cover_url = aweme['cover_image_url']
  468. captured_works[aweme_id] = {
  469. 'title': aweme.get('item_title', '') or aweme.get('title', '') or aweme.get('desc', ''),
  470. 'cover': cover_url,
  471. 'comment_count': aweme.get('statistics', {}).get('comment_count', 0) or aweme.get('comment_count', 0),
  472. }
  473. except Exception as e:
  474. print(f"[{self.platform_name}] 解析响应失败: {e}", flush=True)
  475. self.page.on('response', handle_response)
  476. print(f"[{self.platform_name}] 已注册 API 响应监听器", flush=True)
  477. # 访问评论管理页面
  478. print(f"[{self.platform_name}] 访问评论管理页面...", flush=True)
  479. await self.page.goto("https://creator.douyin.com/creator-micro/interactive/comment", wait_until="domcontentloaded", timeout=30000)
  480. await asyncio.sleep(5)
  481. # 检查登录状态
  482. current_url = self.page.url
  483. if "login" in current_url or "passport" in current_url:
  484. raise Exception("Cookie 已过期,请重新登录")
  485. print(f"[{self.platform_name}] 页面加载完成,当前捕获: {len(captured_comments)} 条评论, {len(captured_works)} 个作品", flush=True)
  486. # 尝试点击"选择作品"来加载作品列表
  487. try:
  488. select_btn = await self.page.query_selector('text="选择作品"')
  489. if select_btn:
  490. print(f"[{self.platform_name}] 点击选择作品按钮...", flush=True)
  491. await select_btn.click()
  492. await asyncio.sleep(3)
  493. # 获取作品列表
  494. work_items = await self.page.query_selector_all('[class*="work-item"], [class*="video-item"], [class*="aweme-item"]')
  495. print(f"[{self.platform_name}] 找到 {len(work_items)} 个作品元素", flush=True)
  496. # 点击每个作品加载其评论
  497. for i, item in enumerate(work_items[:10]): # 最多处理10个作品
  498. try:
  499. await item.click()
  500. await asyncio.sleep(2)
  501. print(f"[{self.platform_name}] 已点击作品 {i+1}/{min(len(work_items), 10)}", flush=True)
  502. except:
  503. pass
  504. # 关闭选择作品弹窗
  505. close_btn = await self.page.query_selector('[class*="close"], [class*="cancel"]')
  506. if close_btn:
  507. await close_btn.click()
  508. await asyncio.sleep(1)
  509. except Exception as e:
  510. print(f"[{self.platform_name}] 选择作品操作失败: {e}", flush=True)
  511. # 滚动加载更多评论
  512. for i in range(5):
  513. await self.page.evaluate('window.scrollBy(0, 500)')
  514. await asyncio.sleep(1)
  515. await asyncio.sleep(3)
  516. # 移除监听器
  517. self.page.remove_listener('response', handle_response)
  518. print(f"[{self.platform_name}] 最终捕获: {len(captured_comments)} 条评论, {len(captured_works)} 个作品", flush=True)
  519. # 按作品分组评论
  520. work_comments_map = {} # work_id -> work_comments
  521. for comment in captured_comments:
  522. # 从评论中获取作品信息
  523. aweme = comment.get('aweme', {}) or comment.get('item', {})
  524. aweme_id = str(comment.get('aweme_id', '') or aweme.get('aweme_id', '') or aweme.get('item_id', ''))
  525. if not aweme_id:
  526. continue
  527. if aweme_id not in work_comments_map:
  528. work_info = captured_works.get(aweme_id, {})
  529. work_comments_map[aweme_id] = {
  530. 'work_id': aweme_id,
  531. 'title': aweme.get('title', '') or aweme.get('desc', '') or work_info.get('title', ''),
  532. 'cover_url': aweme.get('cover', {}).get('url_list', [''])[0] if aweme.get('cover') else work_info.get('cover', ''),
  533. 'comments': []
  534. }
  535. cid = str(comment.get('cid', ''))
  536. if not cid:
  537. continue
  538. user = comment.get('user', {})
  539. work_comments_map[aweme_id]['comments'].append({
  540. 'comment_id': cid,
  541. 'author_id': str(user.get('uid', '')),
  542. 'author_name': user.get('nickname', ''),
  543. 'author_avatar': user.get('avatar_thumb', {}).get('url_list', [''])[0] if user.get('avatar_thumb') else '',
  544. 'content': comment.get('text', ''),
  545. 'like_count': int(comment.get('digg_count', 0)),
  546. 'create_time': datetime.fromtimestamp(comment.get('create_time', 0)).strftime('%Y-%m-%d %H:%M:%S') if comment.get('create_time') else '',
  547. 'is_author': comment.get('is_author', False),
  548. })
  549. all_work_comments = list(work_comments_map.values())
  550. total_comments = sum(len(w['comments']) for w in all_work_comments)
  551. print(f"[{self.platform_name}] 获取到 {len(all_work_comments)} 个作品的 {total_comments} 条评论", flush=True)
  552. except Exception as e:
  553. import traceback
  554. traceback.print_exc()
  555. return {
  556. 'success': False,
  557. 'platform': self.platform_name,
  558. 'error': str(e),
  559. 'work_comments': []
  560. }
  561. finally:
  562. await self.close_browser()
  563. return {
  564. 'success': True,
  565. 'platform': self.platform_name,
  566. 'work_comments': all_work_comments,
  567. 'total': len(all_work_comments)
  568. }