douyin.py 49 KB


  1. # -*- coding: utf-8 -*-
  2. """
  3. 抖音视频发布器
  4. 参考: matrix/douyin_uploader/main.py
  5. """
  6. import asyncio
  7. import os
  8. import json
  9. import re
  10. from datetime import datetime
  11. from typing import List
  12. from .base import (
  13. BasePublisher, PublishParams, PublishResult,
  14. WorkItem, WorksResult, CommentItem, CommentsResult
  15. )
  16. class DouyinPublisher(BasePublisher):
  17. """
  18. 抖音视频发布器
  19. 使用 Playwright 自动化操作抖音创作者中心
  20. """
  21. platform_name = "douyin"
  22. login_url = "https://creator.douyin.com/"
  23. publish_url = "https://creator.douyin.com/creator-micro/content/upload"
  24. cookie_domain = ".douyin.com"
  25. async def set_schedule_time(self, publish_date: datetime):
  26. """设置定时发布"""
  27. if not self.page:
  28. return
  29. # 选择定时发布
  30. label_element = self.page.locator("label.radio-d4zkru:has-text('定时发布')")
  31. await label_element.click()
  32. await asyncio.sleep(1)
  33. # 输入时间
  34. publish_date_str = publish_date.strftime("%Y-%m-%d %H:%M")
  35. await self.page.locator('.semi-input[placeholder="日期和时间"]').click()
  36. await self.page.keyboard.press("Control+KeyA")
  37. await self.page.keyboard.type(str(publish_date_str))
  38. await self.page.keyboard.press("Enter")
  39. await asyncio.sleep(1)
  40. async def handle_upload_error(self, video_path: str):
  41. """处理上传错误,重新上传"""
  42. if not self.page:
  43. return
  44. print(f"[{self.platform_name}] 视频出错了,重新上传中...")
  45. await self.page.locator('div.progress-div [class^="upload-btn-input"]').set_input_files(video_path)
  46. async def check_captcha(self) -> dict:
  47. """
  48. 检查页面是否需要验证码
  49. 返回: {'need_captcha': bool, 'captcha_type': str}
  50. """
  51. if not self.page:
  52. return {'need_captcha': False, 'captcha_type': ''}
  53. try:
  54. # 检查手机验证码弹窗
  55. phone_captcha_selectors = [
  56. 'text="请输入验证码"',
  57. 'text="输入手机验证码"',
  58. 'text="获取验证码"',
  59. 'text="手机号验证"',
  60. '[class*="captcha"][class*="phone"]',
  61. '[class*="verify"][class*="phone"]',
  62. '[class*="sms-code"]',
  63. 'input[placeholder*="验证码"]',
  64. ]
  65. for selector in phone_captcha_selectors:
  66. try:
  67. if await self.page.locator(selector).count() > 0:
  68. print(f"[{self.platform_name}] 检测到手机验证码: {selector}", flush=True)
  69. return {'need_captcha': True, 'captcha_type': 'phone'}
  70. except:
  71. pass
  72. # 检查滑块验证码
  73. slider_captcha_selectors = [
  74. '[class*="captcha"][class*="slider"]',
  75. '[class*="slide-verify"]',
  76. '[class*="drag-verify"]',
  77. 'text="按住滑块"',
  78. 'text="向右滑动"',
  79. 'text="拖动滑块"',
  80. ]
  81. for selector in slider_captcha_selectors:
  82. try:
  83. if await self.page.locator(selector).count() > 0:
  84. print(f"[{self.platform_name}] 检测到滑块验证码: {selector}", flush=True)
  85. return {'need_captcha': True, 'captcha_type': 'slider'}
  86. except:
  87. pass
  88. # 检查图片验证码
  89. image_captcha_selectors = [
  90. '[class*="captcha"][class*="image"]',
  91. '[class*="verify-image"]',
  92. 'text="点击图片"',
  93. 'text="选择正确的"',
  94. ]
  95. for selector in image_captcha_selectors:
  96. try:
  97. if await self.page.locator(selector).count() > 0:
  98. print(f"[{self.platform_name}] 检测到图片验证码: {selector}", flush=True)
  99. return {'need_captcha': True, 'captcha_type': 'image'}
  100. except:
  101. pass
  102. # 检查登录弹窗(Cookie 过期)
  103. login_selectors = [
  104. 'text="请先登录"',
  105. 'text="登录后继续"',
  106. '[class*="login-modal"]',
  107. '[class*="login-dialog"]',
  108. ]
  109. for selector in login_selectors:
  110. try:
  111. if await self.page.locator(selector).count() > 0:
  112. print(f"[{self.platform_name}] 检测到需要登录: {selector}", flush=True)
  113. return {'need_captcha': True, 'captcha_type': 'login'}
  114. except:
  115. pass
  116. except Exception as e:
  117. print(f"[{self.platform_name}] 验证码检测异常: {e}", flush=True)
  118. return {'need_captcha': False, 'captcha_type': ''}
  119. async def handle_phone_captcha(self) -> bool:
  120. if not self.page:
  121. return False
  122. try:
  123. body_text = ""
  124. try:
  125. body_text = await self.page.inner_text("body")
  126. except:
  127. body_text = ""
  128. phone_match = re.search(r"(1\d{2}\*{4}\d{4})", body_text or "")
  129. masked_phone = phone_match.group(1) if phone_match else ""
  130. async def _get_send_button():
  131. candidates = [
  132. self.page.get_by_role("button", name="获取验证码"),
  133. self.page.get_by_role("button", name="发送验证码"),
  134. self.page.locator('button:has-text("获取验证码")'),
  135. self.page.locator('button:has-text("发送验证码")'),
  136. self.page.locator('[role="button"]:has-text("获取验证码")'),
  137. self.page.locator('[role="button"]:has-text("发送验证码")'),
  138. ]
  139. for c in candidates:
  140. try:
  141. if await c.count() > 0 and await c.first.is_visible():
  142. return c.first
  143. except:
  144. continue
  145. return None
  146. async def _confirm_sent() -> bool:
  147. try:
  148. txt = ""
  149. try:
  150. txt = await self.page.inner_text("body")
  151. except:
  152. txt = ""
  153. if re.search(r"(\d+\s*秒)|(\d+\s*s)|后可重试|重新发送|已发送", txt or ""):
  154. return True
  155. except:
  156. pass
  157. try:
  158. btn = await _get_send_button()
  159. if btn:
  160. disabled = await btn.is_disabled()
  161. if disabled:
  162. return True
  163. label = (await btn.inner_text()) if btn else ""
  164. if re.search(r"(\d+\s*秒)|(\d+\s*s)|后可重试|重新发送|已发送", label or ""):
  165. return True
  166. except:
  167. pass
  168. return False
  169. did_click_send = False
  170. btn = await _get_send_button()
  171. if btn:
  172. try:
  173. if await btn.is_enabled():
  174. await btn.click(timeout=5000)
  175. did_click_send = True
  176. print(f"[{self.platform_name}] 已点击发送短信验证码", flush=True)
  177. except Exception as e:
  178. print(f"[{self.platform_name}] 点击发送验证码按钮失败: {e}", flush=True)
  179. if did_click_send:
  180. try:
  181. await self.page.wait_for_timeout(800)
  182. except:
  183. pass
  184. sent_confirmed = await _confirm_sent() if did_click_send else False
  185. ai_state = await self.ai_analyze_sms_send_state()
  186. try:
  187. if ai_state.get("sent_likely"):
  188. sent_confirmed = True
  189. except:
  190. pass
  191. if (not did_click_send or not sent_confirmed) and ai_state.get("suggested_action") == "click_send":
  192. btn2 = await _get_send_button()
  193. if btn2:
  194. try:
  195. if await btn2.is_enabled():
  196. await btn2.click(timeout=5000)
  197. did_click_send = True
  198. await self.page.wait_for_timeout(800)
  199. sent_confirmed = await _confirm_sent()
  200. ai_state = await self.ai_analyze_sms_send_state()
  201. if ai_state.get("sent_likely"):
  202. sent_confirmed = True
  203. except:
  204. pass
  205. code_hint = "请输入短信验证码。"
  206. if ai_state.get("block_reason") == "slider":
  207. code_hint = "检测到滑块/人机验证阻塞,请先在浏览器窗口完成验证后再发送短信验证码。"
  208. elif ai_state.get("block_reason") in ["rate_limit", "risk"]:
  209. code_hint = f"页面提示可能被限制/风控({ai_state.get('notes','') or '请稍后重试'})。可稍等后重新发送验证码。"
  210. elif not did_click_send:
  211. code_hint = "未找到或无法点击“发送验证码”按钮,请在弹出的浏览器页面手动点击发送后再输入验证码。"
  212. elif sent_confirmed:
  213. code_hint = f"已检测到短信验证码已发送({ai_state.get('notes','') or '请查收短信'})。"
  214. else:
  215. code_hint = f"已尝试点击发送验证码,但未确认发送成功({ai_state.get('notes','') or '请查看是否出现倒计时/重新发送'})。"
  216. code = await self.request_sms_code_from_frontend(masked_phone, message=code_hint)
  217. input_selectors = [
  218. 'input[placeholder*="验证码"]',
  219. 'input[placeholder*="短信"]',
  220. 'input[type="tel"]',
  221. 'input[type="text"]',
  222. ]
  223. filled = False
  224. for selector in input_selectors:
  225. try:
  226. el = self.page.locator(selector).first
  227. if await el.count() > 0:
  228. await el.fill(code)
  229. filled = True
  230. break
  231. except:
  232. continue
  233. if not filled:
  234. raise Exception("未找到验证码输入框")
  235. submit_selectors = [
  236. 'button:has-text("确定")',
  237. 'button:has-text("确认")',
  238. 'button:has-text("提交")',
  239. 'button:has-text("完成")',
  240. ]
  241. for selector in submit_selectors:
  242. try:
  243. btn = self.page.locator(selector).first
  244. if await btn.count() > 0:
  245. await btn.click()
  246. break
  247. except:
  248. continue
  249. try:
  250. await self.page.wait_for_timeout(1000)
  251. await self.page.wait_for_selector('text="请输入验证码"', state="hidden", timeout=15000)
  252. except:
  253. pass
  254. print(f"[{self.platform_name}] 短信验证码已提交,继续执行发布流程", flush=True)
  255. return True
  256. except Exception as e:
  257. print(f"[{self.platform_name}] 处理短信验证码失败: {e}", flush=True)
  258. return False
  259. async def publish(self, cookies: str, params: PublishParams) -> PublishResult:
  260. """发布视频到抖音 - 参考 matrix/douyin_uploader/main.py"""
  261. print(f"\n{'='*60}")
  262. print(f"[{self.platform_name}] 开始发布视频")
  263. print(f"[{self.platform_name}] 视频路径: {params.video_path}")
  264. print(f"[{self.platform_name}] 标题: {params.title}")
  265. print(f"[{self.platform_name}] Headless: {self.headless}")
  266. print(f"{'='*60}")
  267. self.report_progress(5, "正在初始化浏览器...")
  268. # 初始化浏览器
  269. await self.init_browser()
  270. print(f"[{self.platform_name}] 浏览器初始化完成")
  271. # 解析并设置 cookies
  272. cookie_list = self.parse_cookies(cookies)
  273. print(f"[{self.platform_name}] 解析到 {len(cookie_list)} 个 cookies")
  274. await self.set_cookies(cookie_list)
  275. if not self.page:
  276. raise Exception("Page not initialized")
  277. # 检查视频文件
  278. if not os.path.exists(params.video_path):
  279. raise Exception(f"视频文件不存在: {params.video_path}")
  280. print(f"[{self.platform_name}] 视频文件存在,大小: {os.path.getsize(params.video_path)} bytes")
  281. self.report_progress(10, "正在打开上传页面...")
  282. # 访问上传页面 - 参考 matrix
  283. await self.page.goto("https://creator.douyin.com/creator-micro/content/upload")
  284. print(f"[{self.platform_name}] 等待页面加载...")
  285. try:
  286. await self.page.wait_for_url("https://creator.douyin.com/creator-micro/content/upload", timeout=30000)
  287. except:
  288. pass
  289. await asyncio.sleep(3)
  290. # 检查当前 URL 和页面状态
  291. current_url = self.page.url
  292. print(f"[{self.platform_name}] 当前 URL: {current_url}")
  293. # 检查是否在登录页面或需要登录
  294. if "login" in current_url or "passport" in current_url:
  295. screenshot_base64 = await self.capture_screenshot()
  296. return PublishResult(
  297. success=False,
  298. platform=self.platform_name,
  299. error="Cookie 已过期,需要重新登录",
  300. need_captcha=True,
  301. captcha_type='login',
  302. screenshot_base64=screenshot_base64,
  303. page_url=current_url,
  304. status='need_captcha'
  305. )
  306. # 使用 AI 检测验证码
  307. ai_captcha_result = await self.ai_check_captcha()
  308. if ai_captcha_result['has_captcha']:
  309. print(f"[{self.platform_name}] AI检测到验证码: {ai_captcha_result['captcha_type']}", flush=True)
  310. screenshot_base64 = await self.capture_screenshot()
  311. return PublishResult(
  312. success=False,
  313. platform=self.platform_name,
  314. error=f"检测到{ai_captcha_result['captcha_type']}验证码,需要使用有头浏览器完成验证",
  315. need_captcha=True,
  316. captcha_type=ai_captcha_result['captcha_type'],
  317. screenshot_base64=screenshot_base64,
  318. page_url=current_url,
  319. status='need_captcha'
  320. )
  321. # 传统方式检测验证码
  322. captcha_result = await self.check_captcha()
  323. if captcha_result['need_captcha']:
  324. print(f"[{self.platform_name}] 传统方式检测到验证码: {captcha_result['captcha_type']}", flush=True)
  325. if captcha_result['captcha_type'] == 'phone':
  326. handled = await self.handle_phone_captcha()
  327. if handled:
  328. self.report_progress(12, "短信验证码已处理,继续发布...")
  329. else:
  330. screenshot_base64 = await self.capture_screenshot()
  331. return PublishResult(
  332. success=False,
  333. platform=self.platform_name,
  334. error="检测到手机验证码,但自动处理失败",
  335. need_captcha=True,
  336. captcha_type='phone',
  337. screenshot_base64=screenshot_base64,
  338. page_url=current_url,
  339. status='need_captcha'
  340. )
  341. else:
  342. screenshot_base64 = await self.capture_screenshot()
  343. return PublishResult(
  344. success=False,
  345. platform=self.platform_name,
  346. error=f"需要{captcha_result['captcha_type']}验证码,请使用有头浏览器完成验证",
  347. need_captcha=True,
  348. captcha_type=captcha_result['captcha_type'],
  349. screenshot_base64=screenshot_base64,
  350. page_url=current_url,
  351. status='need_captcha'
  352. )
  353. self.report_progress(15, "正在选择视频文件...")
  354. # 点击上传区域 - 参考 matrix: div.container-drag-info-Tl0RGH 或带 container-drag 的 div
  355. upload_selectors = [
  356. "div[class*='container-drag-info']",
  357. "div[class*='container-drag']",
  358. "div.upload-btn",
  359. "div[class*='upload']",
  360. ]
  361. upload_success = False
  362. for selector in upload_selectors:
  363. try:
  364. upload_div = self.page.locator(selector).first
  365. if await upload_div.count() > 0:
  366. print(f"[{self.platform_name}] 找到上传区域: {selector}")
  367. async with self.page.expect_file_chooser(timeout=10000) as fc_info:
  368. await upload_div.click()
  369. file_chooser = await fc_info.value
  370. await file_chooser.set_files(params.video_path)
  371. upload_success = True
  372. print(f"[{self.platform_name}] 视频文件已选择")
  373. break
  374. except Exception as e:
  375. print(f"[{self.platform_name}] 选择器 {selector} 失败: {e}")
  376. if not upload_success:
  377. screenshot_base64 = await self.capture_screenshot()
  378. return PublishResult(
  379. success=False,
  380. platform=self.platform_name,
  381. error="未找到上传入口",
  382. screenshot_base64=screenshot_base64,
  383. page_url=await self.get_page_url(),
  384. status='failed'
  385. )
  386. # 等待跳转到发布页面 - 参考 matrix
  387. self.report_progress(20, "等待进入发布页面...")
  388. for i in range(60):
  389. try:
  390. # matrix 等待的 URL: https://creator.douyin.com/creator-micro/content/post/video?enter_from=publish_page
  391. await self.page.wait_for_url(
  392. "https://creator.douyin.com/creator-micro/content/post/video*",
  393. timeout=2000
  394. )
  395. print(f"[{self.platform_name}] 已进入发布页面")
  396. break
  397. except:
  398. print(f"[{self.platform_name}] 等待进入发布页面... {i+1}/60")
  399. await asyncio.sleep(1)
  400. await asyncio.sleep(2)
  401. self.report_progress(30, "正在填充标题和话题...")
  402. # 填写标题 - 参考 matrix
  403. title_input = self.page.get_by_text('作品标题').locator("..").locator(
  404. "xpath=following-sibling::div[1]").locator("input")
  405. if await title_input.count():
  406. await title_input.fill(params.title[:30])
  407. print(f"[{self.platform_name}] 标题已填写")
  408. else:
  409. # 备用方式 - 参考 matrix
  410. title_container = self.page.locator(".notranslate")
  411. await title_container.click()
  412. await self.page.keyboard.press("Backspace")
  413. await self.page.keyboard.press("Control+KeyA")
  414. await self.page.keyboard.press("Delete")
  415. await self.page.keyboard.type(params.title)
  416. await self.page.keyboard.press("Enter")
  417. print(f"[{self.platform_name}] 标题已填写(备用方式)")
  418. # 添加话题标签 - 参考 matrix
  419. if params.tags:
  420. css_selector = ".zone-container"
  421. for index, tag in enumerate(params.tags, start=1):
  422. print(f"[{self.platform_name}] 正在添加第{index}个话题: #{tag}")
  423. await self.page.type(css_selector, "#" + tag)
  424. await self.page.press(css_selector, "Space")
  425. self.report_progress(40, "等待视频上传完成...")
  426. # 等待视频上传完成 - 参考 matrix: 检测"重新上传"按钮
  427. for i in range(120):
  428. try:
  429. count = await self.page.locator("div").filter(has_text="重新上传").count()
  430. if count > 0:
  431. print(f"[{self.platform_name}] 视频上传完毕")
  432. break
  433. else:
  434. print(f"[{self.platform_name}] 正在上传视频中... {i+1}/120")
  435. # 检查上传错误
  436. if await self.page.locator('div.progress-div > div:has-text("上传失败")').count():
  437. print(f"[{self.platform_name}] 发现上传出错了,重新上传...")
  438. await self.handle_upload_error(params.video_path)
  439. await asyncio.sleep(3)
  440. except:
  441. print(f"[{self.platform_name}] 正在上传视频中...")
  442. await asyncio.sleep(3)
  443. self.report_progress(60, "处理视频设置...")
  444. # 点击"我知道了"弹窗 - 参考 matrix
  445. known_count = await self.page.get_by_role("button", name="我知道了").count()
  446. if known_count > 0:
  447. await self.page.get_by_role("button", name="我知道了").nth(0).click()
  448. print(f"[{self.platform_name}] 关闭弹窗")
  449. await asyncio.sleep(5)
  450. # 设置位置 - 参考 matrix
  451. try:
  452. await self.page.locator('div.semi-select span:has-text("输入地理位置")').click()
  453. await asyncio.sleep(1)
  454. await self.page.keyboard.press("Backspace")
  455. await self.page.keyboard.press("Control+KeyA")
  456. await self.page.keyboard.press("Delete")
  457. await self.page.keyboard.type(params.location)
  458. await asyncio.sleep(1)
  459. await self.page.locator('div[role="listbox"] [role="option"]').first.click()
  460. print(f"[{self.platform_name}] 位置设置成功: {params.location}")
  461. except Exception as e:
  462. print(f"[{self.platform_name}] 设置位置失败: {e}")
  463. # 开启头条/西瓜同步 - 参考 matrix
  464. try:
  465. third_part_element = '[class^="info"] > [class^="first-part"] div div.semi-switch'
  466. if await self.page.locator(third_part_element).count():
  467. class_name = await self.page.eval_on_selector(
  468. third_part_element, 'div => div.className')
  469. if 'semi-switch-checked' not in class_name:
  470. await self.page.locator(third_part_element).locator(
  471. 'input.semi-switch-native-control').click()
  472. print(f"[{self.platform_name}] 已开启头条/西瓜同步")
  473. except:
  474. pass
  475. # 定时发布
  476. if params.publish_date:
  477. self.report_progress(70, "设置定时发布...")
  478. await self.set_schedule_time(params.publish_date)
  479. self.report_progress(80, "正在发布...")
  480. print(f"[{self.platform_name}] 查找发布按钮...")
  481. # 点击发布 - 参考 matrix
  482. for i in range(30):
  483. try:
  484. # 检查验证码(不要在每次循环都调 AI,太慢)
  485. if i % 5 == 0:
  486. ai_captcha = await self.ai_check_captcha()
  487. if ai_captcha['has_captcha']:
  488. print(f"[{self.platform_name}] AI检测到发布过程中需要验证码: {ai_captcha['captcha_type']}", flush=True)
  489. if ai_captcha['captcha_type'] == 'phone':
  490. handled = await self.handle_phone_captcha()
  491. if handled:
  492. continue
  493. screenshot_base64 = await self.capture_screenshot()
  494. page_url = await self.get_page_url()
  495. return PublishResult(
  496. success=False,
  497. platform=self.platform_name,
  498. error=f"发布过程中需要{ai_captcha['captcha_type']}验证码,请使用有头浏览器完成验证",
  499. need_captcha=True,
  500. captcha_type=ai_captcha['captcha_type'],
  501. screenshot_base64=screenshot_base64,
  502. page_url=page_url,
  503. status='need_captcha'
  504. )
  505. publish_btn = self.page.get_by_role('button', name="发布", exact=True)
  506. btn_count = await publish_btn.count()
  507. if btn_count > 0:
  508. print(f"[{self.platform_name}] 点击发布按钮...")
  509. await publish_btn.click()
  510. # 等待跳转到内容管理页面 - 参考 matrix
  511. await self.page.wait_for_url(
  512. "https://creator.douyin.com/creator-micro/content/manage",
  513. timeout=5000
  514. )
  515. self.report_progress(100, "发布成功")
  516. print(f"[{self.platform_name}] 视频发布成功!")
  517. screenshot_base64 = await self.capture_screenshot()
  518. page_url = await self.get_page_url()
  519. return PublishResult(
  520. success=True,
  521. platform=self.platform_name,
  522. message="发布成功",
  523. screenshot_base64=screenshot_base64,
  524. page_url=page_url,
  525. status='success'
  526. )
  527. except Exception as e:
  528. current_url = self.page.url
  529. # 检查是否已经在管理页面
  530. if "https://creator.douyin.com/creator-micro/content/manage" in current_url:
  531. self.report_progress(100, "发布成功")
  532. print(f"[{self.platform_name}] 视频发布成功!")
  533. screenshot_base64 = await self.capture_screenshot()
  534. return PublishResult(
  535. success=True,
  536. platform=self.platform_name,
  537. message="发布成功",
  538. screenshot_base64=screenshot_base64,
  539. page_url=current_url,
  540. status='success'
  541. )
  542. else:
  543. print(f"[{self.platform_name}] 视频正在发布中... {i+1}/30, URL: {current_url}")
  544. await asyncio.sleep(1)
  545. # 发布超时
  546. print(f"[{self.platform_name}] 发布超时,获取截图...")
  547. screenshot_base64 = await self.capture_screenshot()
  548. page_url = await self.get_page_url()
  549. return PublishResult(
  550. success=False,
  551. platform=self.platform_name,
  552. error="发布超时,请检查发布状态",
  553. screenshot_base64=screenshot_base64,
  554. page_url=page_url,
  555. status='need_action'
  556. )
  557. async def get_works(self, cookies: str, page: int = 0, page_size: int = 20) -> WorksResult:
  558. """获取抖音作品列表"""
  559. print(f"\n{'='*60}")
  560. print(f"[{self.platform_name}] 获取作品列表")
  561. print(f"[{self.platform_name}] page={page}, page_size={page_size}")
  562. print(f"{'='*60}")
  563. works: List[WorkItem] = []
  564. total = 0
  565. has_more = False
  566. try:
  567. await self.init_browser()
  568. cookie_list = self.parse_cookies(cookies)
  569. await self.set_cookies(cookie_list)
  570. if not self.page:
  571. raise Exception("Page not initialized")
  572. # 访问创作者中心首页以触发登录验证
  573. await self.page.goto("https://creator.douyin.com/creator-micro/home")
  574. await asyncio.sleep(3)
  575. # 检查登录状态
  576. current_url = self.page.url
  577. if "login" in current_url or "passport" in current_url:
  578. raise Exception("Cookie 已过期,请重新登录")
  579. # 调用作品列表 API
  580. cursor = page * page_size
  581. # 移除 scene=star_atlas 和 aid=1128,使用更通用的参数
  582. api_url = f"https://creator.douyin.com/janus/douyin/creator/pc/work_list?status=0&device_platform=android&count={page_size}&max_cursor={cursor}&cookie_enabled=true&browser_language=zh-CN&browser_platform=Win32&browser_name=Mozilla&browser_online=true&timezone_name=Asia%2FShanghai"
  583. response = await self.page.evaluate(f'''
  584. async () => {{
  585. try {{
  586. const resp = await fetch("{api_url}", {{
  587. credentials: 'include',
  588. headers: {{ 'Accept': 'application/json' }}
  589. }});
  590. return await resp.json();
  591. }} catch (e) {{
  592. return {{ error: e.toString() }};
  593. }}
  594. }}
  595. ''')
  596. if response.get('error'):
  597. print(f"[{self.platform_name}] API 请求失败: {response.get('error')}", flush=True)
  598. print(f"[{self.platform_name}] API 响应: has_more={response.get('has_more')}, aweme_list={len(response.get('aweme_list', []))}")
  599. aweme_list = response.get('aweme_list', [])
  600. has_more = response.get('has_more', False)
  601. for aweme in aweme_list:
  602. aweme_id = str(aweme.get('aweme_id', ''))
  603. if not aweme_id:
  604. continue
  605. statistics = aweme.get('statistics', {})
  606. # 打印调试信息,确认字段存在
  607. # print(f"[{self.platform_name}] 作品 {aweme_id} 统计: {statistics}", flush=True)
  608. # 获取封面
  609. cover_url = ''
  610. if aweme.get('Cover', {}).get('url_list'):
  611. cover_url = aweme['Cover']['url_list'][0]
  612. elif aweme.get('video', {}).get('cover', {}).get('url_list'):
  613. cover_url = aweme['video']['cover']['url_list'][0]
  614. # 获取标题
  615. title = aweme.get('item_title', '') or aweme.get('desc', '').split('\n')[0][:50] or '无标题'
  616. # 获取时长(毫秒转秒)
  617. duration = aweme.get('video', {}).get('duration', 0) // 1000
  618. # 获取发布时间
  619. create_time = aweme.get('create_time', 0)
  620. publish_time = datetime.fromtimestamp(create_time).strftime('%Y-%m-%d %H:%M:%S') if create_time else ''
  621. works.append(WorkItem(
  622. work_id=aweme_id,
  623. title=title,
  624. cover_url=cover_url,
  625. duration=duration,
  626. status='published',
  627. publish_time=publish_time,
  628. play_count=int(statistics.get('play_count', 0)),
  629. like_count=int(statistics.get('digg_count', 0)),
  630. comment_count=int(statistics.get('comment_count', 0)),
  631. share_count=int(statistics.get('share_count', 0)),
  632. ))
  633. total = len(works)
  634. print(f"[{self.platform_name}] 获取到 {total} 个作品")
  635. except Exception as e:
  636. import traceback
  637. traceback.print_exc()
  638. return WorksResult(
  639. success=False,
  640. platform=self.platform_name,
  641. error=str(e)
  642. )
  643. return WorksResult(
  644. success=True,
  645. platform=self.platform_name,
  646. works=works,
  647. total=total,
  648. has_more=has_more
  649. )
  650. async def get_comments(self, cookies: str, work_id: str, cursor: str = "") -> CommentsResult:
  651. """获取抖音作品评论 - 通过访问视频详情页拦截评论 API"""
  652. print(f"\n{'='*60}")
  653. print(f"[{self.platform_name}] 获取作品评论")
  654. print(f"[{self.platform_name}] work_id={work_id}, cursor={cursor}")
  655. print(f"{'='*60}")
  656. comments: List[CommentItem] = []
  657. total = 0
  658. has_more = False
  659. next_cursor = ""
  660. captured_data = {}
  661. try:
  662. await self.init_browser()
  663. cookie_list = self.parse_cookies(cookies)
  664. await self.set_cookies(cookie_list)
  665. if not self.page:
  666. raise Exception("Page not initialized")
  667. # 设置 API 响应监听器
  668. async def handle_response(response):
  669. nonlocal captured_data
  670. url = response.url
  671. # 监听评论列表 API - 抖音视频页面使用的 API
  672. # /aweme/v1/web/comment/list/ 或 /comment/list/
  673. if '/comment/list' in url and ('aweme_id' in url or work_id in url):
  674. try:
  675. json_data = await response.json()
  676. print(f"[{self.platform_name}] 捕获到评论 API: {url[:100]}...", flush=True)
  677. # 检查响应是否成功
  678. if json_data.get('status_code') == 0 or json_data.get('comments'):
  679. captured_data = json_data
  680. comment_count = len(json_data.get('comments', []))
  681. print(f"[{self.platform_name}] 评论 API 响应成功: comments={comment_count}, has_more={json_data.get('has_more')}", flush=True)
  682. except Exception as e:
  683. print(f"[{self.platform_name}] 解析评论响应失败: {e}", flush=True)
  684. self.page.on('response', handle_response)
  685. print(f"[{self.platform_name}] 已注册评论 API 响应监听器", flush=True)
  686. # 访问视频详情页 - 这会自动触发评论 API 请求
  687. video_url = f"https://www.douyin.com/video/{work_id}"
  688. print(f"[{self.platform_name}] 访问视频详情页: {video_url}", flush=True)
  689. await self.page.goto(video_url, wait_until="domcontentloaded", timeout=30000)
  690. await asyncio.sleep(5)
  691. # 检查登录状态
  692. current_url = self.page.url
  693. if "login" in current_url or "passport" in current_url:
  694. raise Exception("Cookie 已过期,请重新登录")
  695. # 等待评论加载
  696. if not captured_data:
  697. print(f"[{self.platform_name}] 等待评论 API 响应...", flush=True)
  698. # 尝试滚动页面触发评论加载
  699. await self.page.evaluate('window.scrollBy(0, 300)')
  700. await asyncio.sleep(3)
  701. if not captured_data:
  702. # 再等待一会
  703. await asyncio.sleep(3)
  704. # 移除监听器
  705. self.page.remove_listener('response', handle_response)
  706. # 解析评论数据
  707. if captured_data:
  708. comment_list = captured_data.get('comments') or []
  709. has_more = captured_data.get('has_more', False) or captured_data.get('has_more', 0) == 1
  710. next_cursor = str(captured_data.get('cursor', ''))
  711. total = captured_data.get('total', 0) or len(comment_list)
  712. print(f"[{self.platform_name}] 解析评论: total={total}, has_more={has_more}, comments={len(comment_list)}", flush=True)
  713. for comment in comment_list:
  714. cid = str(comment.get('cid', ''))
  715. if not cid:
  716. continue
  717. user = comment.get('user', {})
  718. # 解析回复列表
  719. replies = []
  720. reply_list = comment.get('reply_comment', []) or []
  721. for reply in reply_list:
  722. reply_user = reply.get('user', {})
  723. replies.append(CommentItem(
  724. comment_id=str(reply.get('cid', '')),
  725. work_id=work_id,
  726. content=reply.get('text', ''),
  727. author_id=str(reply_user.get('uid', '')),
  728. author_name=reply_user.get('nickname', ''),
  729. author_avatar=reply_user.get('avatar_thumb', {}).get('url_list', [''])[0] if reply_user.get('avatar_thumb') else '',
  730. like_count=int(reply.get('digg_count', 0)),
  731. create_time=datetime.fromtimestamp(reply.get('create_time', 0)).strftime('%Y-%m-%d %H:%M:%S') if reply.get('create_time') else '',
  732. is_author=reply.get('is_author', False),
  733. ))
  734. comments.append(CommentItem(
  735. comment_id=cid,
  736. work_id=work_id,
  737. content=comment.get('text', ''),
  738. author_id=str(user.get('uid', '')),
  739. author_name=user.get('nickname', ''),
  740. author_avatar=user.get('avatar_thumb', {}).get('url_list', [''])[0] if user.get('avatar_thumb') else '',
  741. like_count=int(comment.get('digg_count', 0)),
  742. reply_count=int(comment.get('reply_comment_total', 0)),
  743. create_time=datetime.fromtimestamp(comment.get('create_time', 0)).strftime('%Y-%m-%d %H:%M:%S') if comment.get('create_time') else '',
  744. is_author=comment.get('is_author', False),
  745. replies=replies,
  746. ))
  747. print(f"[{self.platform_name}] 解析到 {len(comments)} 条评论", flush=True)
  748. else:
  749. print(f"[{self.platform_name}] 未捕获到评论 API 响应", flush=True)
  750. except Exception as e:
  751. import traceback
  752. traceback.print_exc()
  753. return CommentsResult(
  754. success=False,
  755. platform=self.platform_name,
  756. work_id=work_id,
  757. error=str(e)
  758. )
  759. finally:
  760. await self.close_browser()
  761. result = CommentsResult(
  762. success=True,
  763. platform=self.platform_name,
  764. work_id=work_id,
  765. comments=comments,
  766. total=total,
  767. has_more=has_more
  768. )
  769. result.__dict__['cursor'] = next_cursor
  770. return result
  771. async def get_all_comments(self, cookies: str) -> dict:
  772. """获取所有作品的评论 - 通过评论管理页面"""
  773. print(f"\n{'='*60}")
  774. print(f"[{self.platform_name}] 获取所有作品评论")
  775. print(f"{'='*60}")
  776. all_work_comments = []
  777. captured_comments = []
  778. captured_works = {} # work_id -> work_info
  779. try:
  780. await self.init_browser()
  781. cookie_list = self.parse_cookies(cookies)
  782. await self.set_cookies(cookie_list)
  783. if not self.page:
  784. raise Exception("Page not initialized")
  785. # 设置 API 响应监听器
  786. async def handle_response(response):
  787. nonlocal captured_comments, captured_works
  788. url = response.url
  789. try:
  790. # 监听评论列表 API - 多种格式
  791. # /comment/list/select/ 或 /comment/read 或 /creator/comment/list
  792. if '/comment/list' in url or '/comment/read' in url or 'comment_list' in url:
  793. json_data = await response.json()
  794. print(f"[{self.platform_name}] 捕获到评论 API: {url[:100]}...", flush=True)
  795. # 格式1: comments 字段
  796. comments = json_data.get('comments', [])
  797. # 格式2: comment_info_list 字段
  798. if not comments:
  799. comments = json_data.get('comment_info_list', [])
  800. if comments:
  801. # 从 URL 中提取 aweme_id
  802. import re
  803. aweme_id_match = re.search(r'aweme_id=(\d+)', url)
  804. aweme_id = aweme_id_match.group(1) if aweme_id_match else ''
  805. for comment in comments:
  806. # 添加 aweme_id 到评论中
  807. if aweme_id and 'aweme_id' not in comment:
  808. comment['aweme_id'] = aweme_id
  809. captured_comments.append(comment)
  810. print(f"[{self.platform_name}] 捕获到 {len(comments)} 条评论 (aweme_id={aweme_id}),总计: {len(captured_comments)}", flush=True)
  811. # 监听作品列表 API
  812. if '/work_list' in url or '/item/list' in url or '/creator/item' in url:
  813. json_data = await response.json()
  814. aweme_list = json_data.get('aweme_list', []) or json_data.get('item_info_list', []) or json_data.get('item_list', [])
  815. print(f"[{self.platform_name}] 捕获到作品列表 API: {len(aweme_list)} 个作品", flush=True)
  816. for aweme in aweme_list:
  817. aweme_id = str(aweme.get('aweme_id', '') or aweme.get('item_id', '') or aweme.get('item_id_plain', ''))
  818. if aweme_id:
  819. cover_url = ''
  820. if aweme.get('Cover', {}).get('url_list'):
  821. cover_url = aweme['Cover']['url_list'][0]
  822. elif aweme.get('video', {}).get('cover', {}).get('url_list'):
  823. cover_url = aweme['video']['cover']['url_list'][0]
  824. elif aweme.get('cover_image_url'):
  825. cover_url = aweme['cover_image_url']
  826. captured_works[aweme_id] = {
  827. 'title': aweme.get('item_title', '') or aweme.get('title', '') or aweme.get('desc', ''),
  828. 'cover': cover_url,
  829. 'comment_count': aweme.get('statistics', {}).get('comment_count', 0) or aweme.get('comment_count', 0),
  830. }
  831. except Exception as e:
  832. print(f"[{self.platform_name}] 解析响应失败: {e}", flush=True)
  833. self.page.on('response', handle_response)
  834. print(f"[{self.platform_name}] 已注册 API 响应监听器", flush=True)
  835. # 访问评论管理页面
  836. print(f"[{self.platform_name}] 访问评论管理页面...", flush=True)
  837. await self.page.goto("https://creator.douyin.com/creator-micro/interactive/comment", wait_until="domcontentloaded", timeout=30000)
  838. await asyncio.sleep(5)
  839. # 检查登录状态
  840. current_url = self.page.url
  841. if "login" in current_url or "passport" in current_url:
  842. raise Exception("Cookie 已过期,请重新登录")
  843. print(f"[{self.platform_name}] 页面加载完成,当前捕获: {len(captured_comments)} 条评论, {len(captured_works)} 个作品", flush=True)
  844. # 尝试点击"选择作品"来加载作品列表
  845. try:
  846. select_btn = await self.page.query_selector('text="选择作品"')
  847. if select_btn:
  848. print(f"[{self.platform_name}] 点击选择作品按钮...", flush=True)
  849. await select_btn.click()
  850. await asyncio.sleep(3)
  851. # 获取作品列表
  852. work_items = await self.page.query_selector_all('[class*="work-item"], [class*="video-item"], [class*="aweme-item"]')
  853. print(f"[{self.platform_name}] 找到 {len(work_items)} 个作品元素", flush=True)
  854. # 点击每个作品加载其评论
  855. for i, item in enumerate(work_items[:10]): # 最多处理10个作品
  856. try:
  857. await item.click()
  858. await asyncio.sleep(2)
  859. print(f"[{self.platform_name}] 已点击作品 {i+1}/{min(len(work_items), 10)}", flush=True)
  860. except:
  861. pass
  862. # 关闭选择作品弹窗
  863. close_btn = await self.page.query_selector('[class*="close"], [class*="cancel"]')
  864. if close_btn:
  865. await close_btn.click()
  866. await asyncio.sleep(1)
  867. except Exception as e:
  868. print(f"[{self.platform_name}] 选择作品操作失败: {e}", flush=True)
  869. # 滚动加载更多评论
  870. for i in range(5):
  871. await self.page.evaluate('window.scrollBy(0, 500)')
  872. await asyncio.sleep(1)
  873. await asyncio.sleep(3)
  874. # 移除监听器
  875. self.page.remove_listener('response', handle_response)
  876. print(f"[{self.platform_name}] 最终捕获: {len(captured_comments)} 条评论, {len(captured_works)} 个作品", flush=True)
  877. # 按作品分组评论
  878. work_comments_map = {} # work_id -> work_comments
  879. for comment in captured_comments:
  880. # 从评论中获取作品信息
  881. aweme = comment.get('aweme', {}) or comment.get('item', {})
  882. aweme_id = str(comment.get('aweme_id', '') or aweme.get('aweme_id', '') or aweme.get('item_id', ''))
  883. if not aweme_id:
  884. continue
  885. if aweme_id not in work_comments_map:
  886. work_info = captured_works.get(aweme_id, {})
  887. work_comments_map[aweme_id] = {
  888. 'work_id': aweme_id,
  889. 'title': aweme.get('title', '') or aweme.get('desc', '') or work_info.get('title', ''),
  890. 'cover_url': aweme.get('cover', {}).get('url_list', [''])[0] if aweme.get('cover') else work_info.get('cover', ''),
  891. 'comments': []
  892. }
  893. cid = str(comment.get('cid', ''))
  894. if not cid:
  895. continue
  896. user = comment.get('user', {})
  897. work_comments_map[aweme_id]['comments'].append({
  898. 'comment_id': cid,
  899. 'author_id': str(user.get('uid', '')),
  900. 'author_name': user.get('nickname', ''),
  901. 'author_avatar': user.get('avatar_thumb', {}).get('url_list', [''])[0] if user.get('avatar_thumb') else '',
  902. 'content': comment.get('text', ''),
  903. 'like_count': int(comment.get('digg_count', 0)),
  904. 'create_time': datetime.fromtimestamp(comment.get('create_time', 0)).strftime('%Y-%m-%d %H:%M:%S') if comment.get('create_time') else '',
  905. 'is_author': comment.get('is_author', False),
  906. })
  907. all_work_comments = list(work_comments_map.values())
  908. total_comments = sum(len(w['comments']) for w in all_work_comments)
  909. print(f"[{self.platform_name}] 获取到 {len(all_work_comments)} 个作品的 {total_comments} 条评论", flush=True)
  910. except Exception as e:
  911. import traceback
  912. traceback.print_exc()
  913. return {
  914. 'success': False,
  915. 'platform': self.platform_name,
  916. 'error': str(e),
  917. 'work_comments': []
  918. }
  919. finally:
  920. await self.close_browser()
  921. return {
  922. 'success': True,
  923. 'platform': self.platform_name,
  924. 'work_comments': all_work_comments,
  925. 'total': len(all_work_comments)
  926. }