douyin.py 67 KB


  1. # -*- coding: utf-8 -*-
  2. """
  3. 抖音视频发布器
  4. 参考: matrix/douyin_uploader/main.py
  5. """
  6. import asyncio
  7. import os
  8. import json
  9. import re
  10. from datetime import datetime
  11. from typing import List
  12. from .base import (
  13. BasePublisher, PublishParams, PublishResult,
  14. WorkItem, WorksResult, CommentItem, CommentsResult
  15. )
  16. class DouyinPublisher(BasePublisher):
  17. """
  18. 抖音视频发布器
  19. 使用 Playwright 自动化操作抖音创作者中心
  20. """
  21. platform_name = "douyin"
  22. login_url = "https://creator.douyin.com/"
  23. publish_url = "https://creator.douyin.com/creator-micro/content/upload"
  24. cookie_domain = ".douyin.com"
  25. async def set_schedule_time(self, publish_date: datetime):
  26. """设置定时发布"""
  27. if not self.page:
  28. return
  29. # 选择定时发布
  30. label_element = self.page.locator("label.radio-d4zkru:has-text('定时发布')")
  31. await label_element.click()
  32. await asyncio.sleep(1)
  33. # 输入时间
  34. publish_date_str = publish_date.strftime("%Y-%m-%d %H:%M")
  35. await self.page.locator('.semi-input[placeholder="日期和时间"]').click()
  36. await self.page.keyboard.press("Control+KeyA")
  37. await self.page.keyboard.type(str(publish_date_str))
  38. await self.page.keyboard.press("Enter")
  39. await asyncio.sleep(1)
  40. async def handle_upload_error(self, video_path: str):
  41. """处理上传错误,重新上传"""
  42. if not self.page:
  43. return
  44. print(f"[{self.platform_name}] 视频出错了,重新上传中...")
  45. await self.page.locator('div.progress-div [class^="upload-btn-input"]').set_input_files(video_path)
  46. async def check_captcha(self) -> dict:
  47. """
  48. 检查页面是否需要验证码
  49. 返回: {'need_captcha': bool, 'captcha_type': str}
  50. """
  51. if not self.page:
  52. return {'need_captcha': False, 'captcha_type': ''}
  53. try:
  54. # 检查手机验证码弹窗
  55. phone_captcha_selectors = [
  56. 'text="请输入验证码"',
  57. 'text="输入手机验证码"',
  58. 'text="获取验证码"',
  59. 'text="手机号验证"',
  60. '[class*="captcha"][class*="phone"]',
  61. '[class*="verify"][class*="phone"]',
  62. '[class*="sms-code"]',
  63. 'input[placeholder*="验证码"]',
  64. ]
  65. for selector in phone_captcha_selectors:
  66. try:
  67. if await self.page.locator(selector).count() > 0:
  68. print(f"[{self.platform_name}] 检测到手机验证码: {selector}", flush=True)
  69. return {'need_captcha': True, 'captcha_type': 'phone'}
  70. except:
  71. pass
  72. # 检查滑块验证码
  73. slider_captcha_selectors = [
  74. '[class*="captcha"][class*="slider"]',
  75. '[class*="slide-verify"]',
  76. '[class*="drag-verify"]',
  77. 'text="按住滑块"',
  78. 'text="向右滑动"',
  79. 'text="拖动滑块"',
  80. ]
  81. for selector in slider_captcha_selectors:
  82. try:
  83. if await self.page.locator(selector).count() > 0:
  84. print(f"[{self.platform_name}] 检测到滑块验证码: {selector}", flush=True)
  85. return {'need_captcha': True, 'captcha_type': 'slider'}
  86. except:
  87. pass
  88. # 检查图片验证码
  89. image_captcha_selectors = [
  90. '[class*="captcha"][class*="image"]',
  91. '[class*="verify-image"]',
  92. 'text="点击图片"',
  93. 'text="选择正确的"',
  94. ]
  95. for selector in image_captcha_selectors:
  96. try:
  97. if await self.page.locator(selector).count() > 0:
  98. print(f"[{self.platform_name}] 检测到图片验证码: {selector}", flush=True)
  99. return {'need_captcha': True, 'captcha_type': 'image'}
  100. except:
  101. pass
  102. # 检查登录弹窗(Cookie 过期)
  103. login_selectors = [
  104. 'text="请先登录"',
  105. 'text="登录后继续"',
  106. '[class*="login-modal"]',
  107. '[class*="login-dialog"]',
  108. ]
  109. for selector in login_selectors:
  110. try:
  111. if await self.page.locator(selector).count() > 0:
  112. print(f"[{self.platform_name}] 检测到需要登录: {selector}", flush=True)
  113. return {'need_captcha': True, 'captcha_type': 'login'}
  114. except:
  115. pass
  116. except Exception as e:
  117. print(f"[{self.platform_name}] 验证码检测异常: {e}", flush=True)
  118. return {'need_captcha': False, 'captcha_type': ''}
  119. async def handle_phone_captcha(self) -> bool:
  120. if not self.page:
  121. return False
  122. try:
  123. body_text = ""
  124. try:
  125. body_text = await self.page.inner_text("body")
  126. except:
  127. body_text = ""
  128. phone_match = re.search(r"(1\d{2}\*{4}\d{4})", body_text or "")
  129. masked_phone = phone_match.group(1) if phone_match else ""
  130. async def _get_send_button():
  131. candidates = [
  132. self.page.get_by_role("button", name="获取验证码"),
  133. self.page.get_by_role("button", name="发送验证码"),
  134. self.page.locator('button:has-text("获取验证码")'),
  135. self.page.locator('button:has-text("发送验证码")'),
  136. self.page.locator('[role="button"]:has-text("获取验证码")'),
  137. self.page.locator('[role="button"]:has-text("发送验证码")'),
  138. ]
  139. for c in candidates:
  140. try:
  141. if await c.count() > 0 and await c.first.is_visible():
  142. return c.first
  143. except:
  144. continue
  145. return None
  146. async def _confirm_sent() -> bool:
  147. try:
  148. txt = ""
  149. try:
  150. txt = await self.page.inner_text("body")
  151. except:
  152. txt = ""
  153. if re.search(r"(\d+\s*秒)|(\d+\s*s)|后可重试|重新发送|已发送", txt or ""):
  154. return True
  155. except:
  156. pass
  157. try:
  158. btn = await _get_send_button()
  159. if btn:
  160. disabled = await btn.is_disabled()
  161. if disabled:
  162. return True
  163. label = (await btn.inner_text()) if btn else ""
  164. if re.search(r"(\d+\s*秒)|(\d+\s*s)|后可重试|重新发送|已发送", label or ""):
  165. return True
  166. except:
  167. pass
  168. return False
  169. did_click_send = False
  170. btn = await _get_send_button()
  171. if btn:
  172. try:
  173. if await btn.is_enabled():
  174. await btn.click(timeout=5000)
  175. did_click_send = True
  176. print(f"[{self.platform_name}] 已点击发送短信验证码", flush=True)
  177. except Exception as e:
  178. print(f"[{self.platform_name}] 点击发送验证码按钮失败: {e}", flush=True)
  179. if did_click_send:
  180. try:
  181. await self.page.wait_for_timeout(800)
  182. except:
  183. pass
  184. sent_confirmed = await _confirm_sent() if did_click_send else False
  185. ai_state = await self.ai_analyze_sms_send_state()
  186. try:
  187. if ai_state.get("sent_likely"):
  188. sent_confirmed = True
  189. except:
  190. pass
  191. if (not did_click_send or not sent_confirmed) and ai_state.get("suggested_action") == "click_send":
  192. btn2 = await _get_send_button()
  193. if btn2:
  194. try:
  195. if await btn2.is_enabled():
  196. await btn2.click(timeout=5000)
  197. did_click_send = True
  198. await self.page.wait_for_timeout(800)
  199. sent_confirmed = await _confirm_sent()
  200. ai_state = await self.ai_analyze_sms_send_state()
  201. if ai_state.get("sent_likely"):
  202. sent_confirmed = True
  203. except:
  204. pass
  205. code_hint = "请输入短信验证码。"
  206. if ai_state.get("block_reason") == "slider":
  207. code_hint = "检测到滑块/人机验证阻塞,请先在浏览器窗口完成验证后再发送短信验证码。"
  208. elif ai_state.get("block_reason") in ["rate_limit", "risk"]:
  209. code_hint = f"页面提示可能被限制/风控({ai_state.get('notes','') or '请稍后重试'})。可稍等后重新发送验证码。"
  210. elif not did_click_send:
  211. code_hint = "未找到或无法点击“发送验证码”按钮,请在弹出的浏览器页面手动点击发送后再输入验证码。"
  212. elif sent_confirmed:
  213. code_hint = f"已检测到短信验证码已发送({ai_state.get('notes','') or '请查收短信'})。"
  214. else:
  215. code_hint = f"已尝试点击发送验证码,但未确认发送成功({ai_state.get('notes','') or '请查看是否出现倒计时/重新发送'})。"
  216. code = await self.request_sms_code_from_frontend(masked_phone, message=code_hint)
  217. input_selectors = [
  218. 'input[placeholder*="验证码"]',
  219. 'input[placeholder*="短信"]',
  220. 'input[type="tel"]',
  221. 'input[type="text"]',
  222. ]
  223. filled = False
  224. for selector in input_selectors:
  225. try:
  226. el = self.page.locator(selector).first
  227. if await el.count() > 0:
  228. await el.fill(code)
  229. filled = True
  230. break
  231. except:
  232. continue
  233. if not filled:
  234. raise Exception("未找到验证码输入框")
  235. submit_selectors = [
  236. 'button:has-text("确定")',
  237. 'button:has-text("确认")',
  238. 'button:has-text("提交")',
  239. 'button:has-text("完成")',
  240. ]
  241. for selector in submit_selectors:
  242. try:
  243. btn = self.page.locator(selector).first
  244. if await btn.count() > 0:
  245. await btn.click()
  246. break
  247. except:
  248. continue
  249. try:
  250. await self.page.wait_for_timeout(1000)
  251. await self.page.wait_for_selector('text="请输入验证码"', state="hidden", timeout=15000)
  252. except:
  253. pass
  254. print(f"[{self.platform_name}] 短信验证码已提交,继续执行发布流程", flush=True)
  255. return True
  256. except Exception as e:
  257. print(f"[{self.platform_name}] 处理短信验证码失败: {e}", flush=True)
  258. return False
  259. async def publish(self, cookies: str, params: PublishParams) -> PublishResult:
  260. """发布视频到抖音 - 参考 matrix/douyin_uploader/main.py"""
  261. print(f"\n{'='*60}")
  262. print(f"[{self.platform_name}] 开始发布视频")
  263. print(f"[{self.platform_name}] 视频路径: {params.video_path}")
  264. print(f"[{self.platform_name}] 标题: {params.title}")
  265. print(f"[{self.platform_name}] Headless: {self.headless}")
  266. print(f"{'='*60}")
  267. self.report_progress(5, "正在初始化浏览器...")
  268. # 初始化浏览器
  269. await self.init_browser()
  270. print(f"[{self.platform_name}] 浏览器初始化完成")
  271. # 解析并设置 cookies
  272. cookie_list = self.parse_cookies(cookies)
  273. print(f"[{self.platform_name}] 解析到 {len(cookie_list)} 个 cookies")
  274. await self.set_cookies(cookie_list)
  275. if not self.page:
  276. raise Exception("Page not initialized")
  277. # 检查视频文件
  278. if not os.path.exists(params.video_path):
  279. raise Exception(f"视频文件不存在: {params.video_path}")
  280. print(f"[{self.platform_name}] 视频文件存在,大小: {os.path.getsize(params.video_path)} bytes")
  281. self.report_progress(10, "正在打开上传页面...")
  282. # 访问上传页面 - 参考 matrix
  283. await self.page.goto("https://creator.douyin.com/creator-micro/content/upload")
  284. print(f"[{self.platform_name}] 等待页面加载...")
  285. try:
  286. await self.page.wait_for_url("https://creator.douyin.com/creator-micro/content/upload", timeout=30000)
  287. except:
  288. pass
  289. await asyncio.sleep(3)
  290. # 检查当前 URL 和页面状态
  291. current_url = self.page.url
  292. print(f"[{self.platform_name}] 当前 URL: {current_url}")
  293. async def wait_for_manual_login(timeout_seconds: int = 300) -> bool:
  294. if not self.page:
  295. return False
  296. self.report_progress(8, "检测到需要登录,请在浏览器窗口完成登录...")
  297. try:
  298. await self.page.bring_to_front()
  299. except:
  300. pass
  301. waited = 0
  302. while waited < timeout_seconds:
  303. try:
  304. url = self.page.url
  305. if "login" not in url and "passport" not in url:
  306. if "creator.douyin.com" in url:
  307. return True
  308. await asyncio.sleep(2)
  309. waited += 2
  310. except:
  311. await asyncio.sleep(2)
  312. waited += 2
  313. return False
  314. # 检查是否在登录页面或需要登录
  315. if "login" in current_url or "passport" in current_url:
  316. if not self.headless:
  317. logged_in = await wait_for_manual_login()
  318. if logged_in:
  319. try:
  320. if self.context:
  321. cookies_after = await self.context.cookies()
  322. await self.sync_cookies_to_node(cookies_after)
  323. except:
  324. pass
  325. await self.page.goto("https://creator.douyin.com/creator-micro/content/upload")
  326. await asyncio.sleep(3)
  327. current_url = self.page.url
  328. else:
  329. screenshot_base64 = await self.capture_screenshot()
  330. return PublishResult(
  331. success=False,
  332. platform=self.platform_name,
  333. error="需要登录:请在浏览器窗口完成登录后重试",
  334. need_captcha=True,
  335. captcha_type='login',
  336. screenshot_base64=screenshot_base64,
  337. page_url=current_url,
  338. status='need_captcha'
  339. )
  340. else:
  341. screenshot_base64 = await self.capture_screenshot()
  342. return PublishResult(
  343. success=False,
  344. platform=self.platform_name,
  345. error="Cookie 已过期,需要重新登录",
  346. need_captcha=True,
  347. captcha_type='login',
  348. screenshot_base64=screenshot_base64,
  349. page_url=current_url,
  350. status='need_captcha'
  351. )
  352. # 使用 AI 检测验证码
  353. ai_captcha_result = await self.ai_check_captcha()
  354. if ai_captcha_result['has_captcha']:
  355. print(f"[{self.platform_name}] AI检测到验证码: {ai_captcha_result['captcha_type']}", flush=True)
  356. screenshot_base64 = await self.capture_screenshot()
  357. return PublishResult(
  358. success=False,
  359. platform=self.platform_name,
  360. error=f"检测到{ai_captcha_result['captcha_type']}验证码,需要使用有头浏览器完成验证",
  361. need_captcha=True,
  362. captcha_type=ai_captcha_result['captcha_type'],
  363. screenshot_base64=screenshot_base64,
  364. page_url=current_url,
  365. status='need_captcha'
  366. )
  367. # 传统方式检测验证码
  368. captcha_result = await self.check_captcha()
  369. if captcha_result['need_captcha']:
  370. print(f"[{self.platform_name}] 传统方式检测到验证码: {captcha_result['captcha_type']}", flush=True)
  371. if captcha_result['captcha_type'] == 'phone':
  372. handled = await self.handle_phone_captcha()
  373. if handled:
  374. self.report_progress(12, "短信验证码已处理,继续发布...")
  375. else:
  376. screenshot_base64 = await self.capture_screenshot()
  377. return PublishResult(
  378. success=False,
  379. platform=self.platform_name,
  380. error="检测到手机验证码,但自动处理失败",
  381. need_captcha=True,
  382. captcha_type='phone',
  383. screenshot_base64=screenshot_base64,
  384. page_url=current_url,
  385. status='need_captcha'
  386. )
  387. else:
  388. screenshot_base64 = await self.capture_screenshot()
  389. return PublishResult(
  390. success=False,
  391. platform=self.platform_name,
  392. error=f"需要{captcha_result['captcha_type']}验证码,请使用有头浏览器完成验证",
  393. need_captcha=True,
  394. captcha_type=captcha_result['captcha_type'],
  395. screenshot_base64=screenshot_base64,
  396. page_url=current_url,
  397. status='need_captcha'
  398. )
  399. self.report_progress(15, "正在选择视频文件...")
  400. # 点击上传区域 - 参考 matrix: div.container-drag-info-Tl0RGH 或带 container-drag 的 div
  401. upload_selectors = [
  402. "div[class*='container-drag-info']",
  403. "div[class*='container-drag']",
  404. "div.upload-btn",
  405. "div[class*='upload']",
  406. ]
  407. upload_success = False
  408. for selector in upload_selectors:
  409. try:
  410. upload_div = self.page.locator(selector).first
  411. if await upload_div.count() > 0:
  412. print(f"[{self.platform_name}] 找到上传区域: {selector}")
  413. async with self.page.expect_file_chooser(timeout=10000) as fc_info:
  414. await upload_div.click()
  415. file_chooser = await fc_info.value
  416. await file_chooser.set_files(params.video_path)
  417. upload_success = True
  418. print(f"[{self.platform_name}] 视频文件已选择")
  419. break
  420. except Exception as e:
  421. print(f"[{self.platform_name}] 选择器 {selector} 失败: {e}")
  422. if not upload_success:
  423. screenshot_base64 = await self.capture_screenshot()
  424. return PublishResult(
  425. success=False,
  426. platform=self.platform_name,
  427. error="未找到上传入口",
  428. screenshot_base64=screenshot_base64,
  429. page_url=await self.get_page_url(),
  430. status='failed'
  431. )
  432. # 等待跳转到发布页面 - 参考 matrix
  433. self.report_progress(20, "等待进入发布页面...")
  434. for i in range(60):
  435. try:
  436. # matrix 等待的 URL: https://creator.douyin.com/creator-micro/content/post/video?enter_from=publish_page
  437. await self.page.wait_for_url(
  438. "https://creator.douyin.com/creator-micro/content/post/video*",
  439. timeout=2000
  440. )
  441. print(f"[{self.platform_name}] 已进入发布页面")
  442. break
  443. except:
  444. print(f"[{self.platform_name}] 等待进入发布页面... {i+1}/60")
  445. await asyncio.sleep(1)
  446. await asyncio.sleep(2)
  447. self.report_progress(30, "正在填充标题和话题...")
  448. # 填写标题 - 参考 matrix
  449. title_input = self.page.get_by_text('作品标题').locator("..").locator(
  450. "xpath=following-sibling::div[1]").locator("input")
  451. if await title_input.count():
  452. await title_input.fill(params.title[:30])
  453. print(f"[{self.platform_name}] 标题已填写")
  454. else:
  455. # 备用方式 - 参考 matrix
  456. title_container = self.page.locator(".notranslate")
  457. await title_container.click()
  458. await self.page.keyboard.press("Backspace")
  459. await self.page.keyboard.press("Control+KeyA")
  460. await self.page.keyboard.press("Delete")
  461. await self.page.keyboard.type(params.title)
  462. await self.page.keyboard.press("Enter")
  463. print(f"[{self.platform_name}] 标题已填写(备用方式)")
  464. # 添加话题标签 - 参考 matrix
  465. if params.tags:
  466. css_selector = ".zone-container"
  467. for index, tag in enumerate(params.tags, start=1):
  468. print(f"[{self.platform_name}] 正在添加第{index}个话题: #{tag}")
  469. await self.page.type(css_selector, "#" + tag)
  470. await self.page.press(css_selector, "Space")
  471. self.report_progress(40, "等待视频上传完成...")
  472. # 等待视频上传完成 - 参考 matrix: 检测"重新上传"按钮
  473. for i in range(120):
  474. try:
  475. count = await self.page.locator("div").filter(has_text="重新上传").count()
  476. if count > 0:
  477. print(f"[{self.platform_name}] 视频上传完毕")
  478. break
  479. else:
  480. print(f"[{self.platform_name}] 正在上传视频中... {i+1}/120")
  481. # 检查上传错误
  482. if await self.page.locator('div.progress-div > div:has-text("上传失败")').count():
  483. print(f"[{self.platform_name}] 发现上传出错了,重新上传...")
  484. await self.handle_upload_error(params.video_path)
  485. await asyncio.sleep(3)
  486. except:
  487. print(f"[{self.platform_name}] 正在上传视频中...")
  488. await asyncio.sleep(3)
  489. self.report_progress(60, "处理视频设置...")
  490. # 点击"我知道了"弹窗 - 参考 matrix
  491. known_count = await self.page.get_by_role("button", name="我知道了").count()
  492. if known_count > 0:
  493. await self.page.get_by_role("button", name="我知道了").nth(0).click()
  494. print(f"[{self.platform_name}] 关闭弹窗")
  495. await asyncio.sleep(5)
  496. # 设置位置 - 参考 matrix
  497. try:
  498. await self.page.locator('div.semi-select span:has-text("输入地理位置")').click()
  499. await asyncio.sleep(1)
  500. await self.page.keyboard.press("Backspace")
  501. await self.page.keyboard.press("Control+KeyA")
  502. await self.page.keyboard.press("Delete")
  503. await self.page.keyboard.type(params.location)
  504. await asyncio.sleep(1)
  505. await self.page.locator('div[role="listbox"] [role="option"]').first.click()
  506. print(f"[{self.platform_name}] 位置设置成功: {params.location}")
  507. except Exception as e:
  508. print(f"[{self.platform_name}] 设置位置失败: {e}")
  509. # 开启头条/西瓜同步 - 参考 matrix
  510. try:
  511. third_part_element = '[class^="info"] > [class^="first-part"] div div.semi-switch'
  512. if await self.page.locator(third_part_element).count():
  513. class_name = await self.page.eval_on_selector(
  514. third_part_element, 'div => div.className')
  515. if 'semi-switch-checked' not in class_name:
  516. await self.page.locator(third_part_element).locator(
  517. 'input.semi-switch-native-control').click()
  518. print(f"[{self.platform_name}] 已开启头条/西瓜同步")
  519. except:
  520. pass
  521. # 定时发布
  522. if params.publish_date:
  523. self.report_progress(70, "设置定时发布...")
  524. await self.set_schedule_time(params.publish_date)
  525. self.report_progress(80, "正在发布...")
  526. print(f"[{self.platform_name}] 查找发布按钮...")
  527. # 点击发布 - 参考 matrix
  528. for i in range(30):
  529. try:
  530. # 检查验证码(不要在每次循环都调 AI,太慢)
  531. if i % 5 == 0:
  532. ai_captcha = await self.ai_check_captcha()
  533. if ai_captcha['has_captcha']:
  534. print(f"[{self.platform_name}] AI检测到发布过程中需要验证码: {ai_captcha['captcha_type']}", flush=True)
  535. if ai_captcha['captcha_type'] == 'phone':
  536. handled = await self.handle_phone_captcha()
  537. if handled:
  538. continue
  539. screenshot_base64 = await self.capture_screenshot()
  540. page_url = await self.get_page_url()
  541. return PublishResult(
  542. success=False,
  543. platform=self.platform_name,
  544. error=f"发布过程中需要{ai_captcha['captcha_type']}验证码,请使用有头浏览器完成验证",
  545. need_captcha=True,
  546. captcha_type=ai_captcha['captcha_type'],
  547. screenshot_base64=screenshot_base64,
  548. page_url=page_url,
  549. status='need_captcha'
  550. )
  551. publish_btn = self.page.get_by_role('button', name="发布", exact=True)
  552. btn_count = await publish_btn.count()
  553. if btn_count > 0:
  554. print(f"[{self.platform_name}] 点击发布按钮...")
  555. await publish_btn.click()
  556. # 等待跳转到内容管理页面 - 参考 matrix
  557. await self.page.wait_for_url(
  558. "https://creator.douyin.com/creator-micro/content/manage",
  559. timeout=5000
  560. )
  561. self.report_progress(100, "发布成功")
  562. print(f"[{self.platform_name}] 视频发布成功!")
  563. screenshot_base64 = await self.capture_screenshot()
  564. page_url = await self.get_page_url()
  565. return PublishResult(
  566. success=True,
  567. platform=self.platform_name,
  568. message="发布成功",
  569. screenshot_base64=screenshot_base64,
  570. page_url=page_url,
  571. status='success'
  572. )
  573. except Exception as e:
  574. current_url = self.page.url
  575. # 检查是否已经在管理页面
  576. if "https://creator.douyin.com/creator-micro/content/manage" in current_url:
  577. self.report_progress(100, "发布成功")
  578. print(f"[{self.platform_name}] 视频发布成功!")
  579. screenshot_base64 = await self.capture_screenshot()
  580. return PublishResult(
  581. success=True,
  582. platform=self.platform_name,
  583. message="发布成功",
  584. screenshot_base64=screenshot_base64,
  585. page_url=current_url,
  586. status='success'
  587. )
  588. else:
  589. print(f"[{self.platform_name}] 视频正在发布中... {i+1}/30, URL: {current_url}")
  590. await asyncio.sleep(1)
  591. # 发布超时
  592. print(f"[{self.platform_name}] 发布超时,获取截图...")
  593. screenshot_base64 = await self.capture_screenshot()
  594. page_url = await self.get_page_url()
  595. return PublishResult(
  596. success=False,
  597. platform=self.platform_name,
  598. error="发布超时,请检查发布状态",
  599. screenshot_base64=screenshot_base64,
  600. page_url=page_url,
  601. status='need_action'
  602. )
  603. async def get_works(self, cookies: str, page: int = 0, page_size: int = 20) -> WorksResult:
  604. """获取抖音作品列表
  605. Args:
  606. cookies: Cookie 字符串或 JSON
  607. page: 分页参数,首次请求传 0,后续传上一次返回的 next_page(即 API 的 max_cursor)
  608. page_size: 每页数量
  609. Returns:
  610. WorksResult: 包含 works, total, has_more, next_page(用于下一页请求)
  611. """
  612. print(f"\n{'='*60}")
  613. print(f"[{self.platform_name}] 获取作品列表")
  614. print(f"[{self.platform_name}] cursor={page}, page_size={page_size}")
  615. print(f"{'='*60}")
  616. works: List[WorkItem] = []
  617. total = 0
  618. has_more = False
  619. next_cursor = 0
  620. try:
  621. await self.init_browser()
  622. cookie_list = self.parse_cookies(cookies)
  623. await self.set_cookies(cookie_list)
  624. if not self.page:
  625. raise Exception("Page not initialized")
  626. # 访问创作者中心首页以触发登录验证
  627. await self.page.goto("https://creator.douyin.com/creator-micro/home")
  628. await asyncio.sleep(3)
  629. # 检查登录状态
  630. current_url = self.page.url
  631. if "login" in current_url or "passport" in current_url:
  632. raise Exception("Cookie 已过期,请重新登录")
  633. # 调用作品列表 API:page 作为 max_cursor(首次 0,后续为上一页返回的 max_cursor)
  634. max_cursor = page
  635. api_url = f"https://creator.douyin.com/janus/douyin/creator/pc/work_list?status=0&device_platform=android&count={page_size}&max_cursor={max_cursor}&cookie_enabled=true&browser_language=zh-CN&browser_platform=Win32&browser_name=Mozilla&browser_online=true&timezone_name=Asia%2FShanghai"
  636. response = await self.page.evaluate(f'''
  637. async () => {{
  638. try {{
  639. const resp = await fetch("{api_url}", {{
  640. credentials: 'include',
  641. headers: {{ 'Accept': 'application/json' }}
  642. }});
  643. return await resp.json();
  644. }} catch (e) {{
  645. return {{ error: e.toString() }};
  646. }}
  647. }}
  648. ''')
  649. if response.get('error'):
  650. print(f"[{self.platform_name}] API 请求失败: {response.get('error')}", flush=True)
  651. aweme_list = response.get('aweme_list', []) or []
  652. has_more = response.get('has_more', False)
  653. next_cursor = response.get('max_cursor', 0)
  654. # 从第一个作品的 author.aweme_count 获取总作品数
  655. if aweme_list and len(aweme_list) > 0:
  656. first_aweme = aweme_list[0]
  657. author_aweme_count = first_aweme.get('author', {}).get('aweme_count', 0)
  658. if author_aweme_count > 0:
  659. total = author_aweme_count
  660. print(f"[{self.platform_name}] 从 author.aweme_count 获取总作品数: {total}")
  661. print(f"[{self.platform_name}] API 响应: has_more={has_more}, aweme_list={len(aweme_list)}, next_cursor={next_cursor}")
  662. for aweme in aweme_list:
  663. aweme_id = str(aweme.get('aweme_id', ''))
  664. if not aweme_id:
  665. continue
  666. statistics = aweme.get('statistics', {})
  667. # 打印调试信息,确认字段存在
  668. # print(f"[{self.platform_name}] 作品 {aweme_id} 统计: {statistics}", flush=True)
  669. # 获取封面
  670. cover_url = ''
  671. if aweme.get('Cover', {}).get('url_list'):
  672. cover_url = aweme['Cover']['url_list'][0]
  673. elif aweme.get('video', {}).get('cover', {}).get('url_list'):
  674. cover_url = aweme['video']['cover']['url_list'][0]
  675. # 获取标题
  676. title = aweme.get('item_title', '') or aweme.get('desc', '').split('\n')[0][:50] or '无标题'
  677. # 获取时长(毫秒转秒)
  678. duration = aweme.get('video', {}).get('duration', 0) // 1000
  679. # 获取发布时间
  680. create_time = aweme.get('create_time', 0)
  681. publish_time = datetime.fromtimestamp(create_time).strftime('%Y-%m-%d %H:%M:%S') if create_time else ''
  682. works.append(WorkItem(
  683. work_id=aweme_id,
  684. title=title,
  685. cover_url=cover_url,
  686. duration=duration,
  687. status='published',
  688. publish_time=publish_time,
  689. play_count=int(statistics.get('play_count', 0)),
  690. like_count=int(statistics.get('digg_count', 0)),
  691. comment_count=int(statistics.get('comment_count', 0)),
  692. share_count=int(statistics.get('share_count', 0)),
  693. ))
  694. if total == 0:
  695. total = len(works)
  696. print(f"[{self.platform_name}] 本页获取到 {len(works)} 个作品")
  697. except Exception as e:
  698. import traceback
  699. traceback.print_exc()
  700. return WorksResult(
  701. success=False,
  702. platform=self.platform_name,
  703. error=str(e)
  704. )
  705. return WorksResult(
  706. success=True,
  707. platform=self.platform_name,
  708. works=works,
  709. total=total,
  710. has_more=has_more,
  711. next_page=next_cursor
  712. )
  713. async def get_comments(self, cookies: str, work_id: str, cursor: str = "") -> CommentsResult:
  714. """获取抖音作品评论 - 通过访问视频详情页拦截评论 API"""
  715. print(f"\n{'='*60}")
  716. print(f"[{self.platform_name}] 获取作品评论")
  717. print(f"[{self.platform_name}] work_id={work_id}, cursor={cursor}")
  718. print(f"{'='*60}")
  719. comments: List[CommentItem] = []
  720. total = 0
  721. has_more = False
  722. next_cursor = ""
  723. captured_data = {}
  724. try:
  725. await self.init_browser()
  726. cookie_list = self.parse_cookies(cookies)
  727. await self.set_cookies(cookie_list)
  728. if not self.page:
  729. raise Exception("Page not initialized")
  730. # 设置 API 响应监听器
  731. async def handle_response(response):
  732. nonlocal captured_data
  733. url = response.url
  734. # 监听评论列表 API - 抖音视频页面使用的 API
  735. # /aweme/v1/web/comment/list/ 或 /comment/list/
  736. if '/comment/list' in url and ('aweme_id' in url or work_id in url):
  737. try:
  738. json_data = await response.json()
  739. print(f"[{self.platform_name}] 捕获到评论 API: {url[:100]}...", flush=True)
  740. # 检查响应是否成功
  741. if json_data.get('status_code') == 0 or json_data.get('comments'):
  742. captured_data = json_data
  743. comment_count = len(json_data.get('comments', []))
  744. print(f"[{self.platform_name}] 评论 API 响应成功: comments={comment_count}, has_more={json_data.get('has_more')}", flush=True)
  745. except Exception as e:
  746. print(f"[{self.platform_name}] 解析评论响应失败: {e}", flush=True)
  747. self.page.on('response', handle_response)
  748. print(f"[{self.platform_name}] 已注册评论 API 响应监听器", flush=True)
  749. # 访问视频详情页 - 这会自动触发评论 API 请求
  750. video_url = f"https://www.douyin.com/video/{work_id}"
  751. print(f"[{self.platform_name}] 访问视频详情页: {video_url}", flush=True)
  752. await self.page.goto(video_url, wait_until="domcontentloaded", timeout=30000)
  753. await asyncio.sleep(5)
  754. # 检查登录状态
  755. current_url = self.page.url
  756. if "login" in current_url or "passport" in current_url:
  757. raise Exception("Cookie 已过期,请重新登录")
  758. # 等待评论加载
  759. if not captured_data:
  760. print(f"[{self.platform_name}] 等待评论 API 响应...", flush=True)
  761. # 尝试滚动页面触发评论加载
  762. await self.page.evaluate('window.scrollBy(0, 300)')
  763. await asyncio.sleep(3)
  764. if not captured_data:
  765. # 再等待一会
  766. await asyncio.sleep(3)
  767. # 移除监听器
  768. self.page.remove_listener('response', handle_response)
  769. # 解析评论数据
  770. if captured_data:
  771. comment_list = captured_data.get('comments') or []
  772. has_more = captured_data.get('has_more', False) or captured_data.get('has_more', 0) == 1
  773. next_cursor = str(captured_data.get('cursor', ''))
  774. total = captured_data.get('total', 0) or len(comment_list)
  775. print(f"[{self.platform_name}] 解析评论: total={total}, has_more={has_more}, comments={len(comment_list)}", flush=True)
  776. for comment in comment_list:
  777. cid = str(comment.get('cid', ''))
  778. if not cid:
  779. continue
  780. user = comment.get('user', {})
  781. # 解析回复列表
  782. replies = []
  783. reply_list = comment.get('reply_comment', []) or []
  784. for reply in reply_list:
  785. reply_user = reply.get('user', {})
  786. replies.append(CommentItem(
  787. comment_id=str(reply.get('cid', '')),
  788. work_id=work_id,
  789. content=reply.get('text', ''),
  790. author_id=str(reply_user.get('uid', '')),
  791. author_name=reply_user.get('nickname', ''),
  792. author_avatar=reply_user.get('avatar_thumb', {}).get('url_list', [''])[0] if reply_user.get('avatar_thumb') else '',
  793. like_count=int(reply.get('digg_count', 0)),
  794. create_time=datetime.fromtimestamp(reply.get('create_time', 0)).strftime('%Y-%m-%d %H:%M:%S') if reply.get('create_time') else '',
  795. is_author=reply.get('is_author', False),
  796. ))
  797. comments.append(CommentItem(
  798. comment_id=cid,
  799. work_id=work_id,
  800. content=comment.get('text', ''),
  801. author_id=str(user.get('uid', '')),
  802. author_name=user.get('nickname', ''),
  803. author_avatar=user.get('avatar_thumb', {}).get('url_list', [''])[0] if user.get('avatar_thumb') else '',
  804. like_count=int(comment.get('digg_count', 0)),
  805. reply_count=int(comment.get('reply_comment_total', 0)),
  806. create_time=datetime.fromtimestamp(comment.get('create_time', 0)).strftime('%Y-%m-%d %H:%M:%S') if comment.get('create_time') else '',
  807. is_author=comment.get('is_author', False),
  808. replies=replies,
  809. ))
  810. print(f"[{self.platform_name}] 解析到 {len(comments)} 条评论", flush=True)
  811. else:
  812. print(f"[{self.platform_name}] 未捕获到评论 API 响应", flush=True)
  813. except Exception as e:
  814. import traceback
  815. traceback.print_exc()
  816. return CommentsResult(
  817. success=False,
  818. platform=self.platform_name,
  819. work_id=work_id,
  820. error=str(e)
  821. )
  822. finally:
  823. await self.close_browser()
  824. result = CommentsResult(
  825. success=True,
  826. platform=self.platform_name,
  827. work_id=work_id,
  828. comments=comments,
  829. total=total,
  830. has_more=has_more
  831. )
  832. result.__dict__['cursor'] = next_cursor
  833. return result
  834. async def get_all_comments(self, cookies: str) -> dict:
  835. """获取所有作品的评论 - 通过评论管理页面"""
  836. print(f"\n{'='*60}")
  837. print(f"[{self.platform_name}] 获取所有作品评论")
  838. print(f"{'='*60}")
  839. all_work_comments = []
  840. captured_comments = []
  841. captured_works = {} # work_id -> work_info
  842. try:
  843. await self.init_browser()
  844. cookie_list = self.parse_cookies(cookies)
  845. await self.set_cookies(cookie_list)
  846. if not self.page:
  847. raise Exception("Page not initialized")
  848. # 设置 API 响应监听器
  849. async def handle_response(response):
  850. nonlocal captured_comments, captured_works
  851. url = response.url
  852. try:
  853. # 监听评论列表 API - 多种格式
  854. # /comment/list/select/ 或 /comment/read 或 /creator/comment/list
  855. if '/comment/list' in url or '/comment/read' in url or 'comment_list' in url:
  856. json_data = await response.json()
  857. print(f"[{self.platform_name}] 捕获到评论 API: {url[:100]}...", flush=True)
  858. # 格式1: comments 字段
  859. comments = json_data.get('comments', [])
  860. # 格式2: comment_info_list 字段
  861. if not comments:
  862. comments = json_data.get('comment_info_list', [])
  863. if comments:
  864. # 从 URL 中提取 aweme_id
  865. import re
  866. aweme_id_match = re.search(r'aweme_id=(\d+)', url)
  867. aweme_id = aweme_id_match.group(1) if aweme_id_match else ''
  868. for comment in comments:
  869. # 添加 aweme_id 到评论中
  870. if aweme_id and 'aweme_id' not in comment:
  871. comment['aweme_id'] = aweme_id
  872. captured_comments.append(comment)
  873. print(f"[{self.platform_name}] 捕获到 {len(comments)} 条评论 (aweme_id={aweme_id}),总计: {len(captured_comments)}", flush=True)
  874. # 监听作品列表 API
  875. if '/work_list' in url or '/item/list' in url or '/creator/item' in url:
  876. json_data = await response.json()
  877. aweme_list = json_data.get('aweme_list', []) or json_data.get('item_info_list', []) or json_data.get('item_list', [])
  878. print(f"[{self.platform_name}] 捕获到作品列表 API: {len(aweme_list)} 个作品", flush=True)
  879. for aweme in aweme_list:
  880. aweme_id = str(aweme.get('aweme_id', '') or aweme.get('item_id', '') or aweme.get('item_id_plain', ''))
  881. if aweme_id:
  882. cover_url = ''
  883. if aweme.get('Cover', {}).get('url_list'):
  884. cover_url = aweme['Cover']['url_list'][0]
  885. elif aweme.get('video', {}).get('cover', {}).get('url_list'):
  886. cover_url = aweme['video']['cover']['url_list'][0]
  887. elif aweme.get('cover_image_url'):
  888. cover_url = aweme['cover_image_url']
  889. captured_works[aweme_id] = {
  890. 'title': aweme.get('item_title', '') or aweme.get('title', '') or aweme.get('desc', ''),
  891. 'cover': cover_url,
  892. 'comment_count': aweme.get('statistics', {}).get('comment_count', 0) or aweme.get('comment_count', 0),
  893. }
  894. except Exception as e:
  895. print(f"[{self.platform_name}] 解析响应失败: {e}", flush=True)
  896. self.page.on('response', handle_response)
  897. print(f"[{self.platform_name}] 已注册 API 响应监听器", flush=True)
  898. # 访问评论管理页面
  899. print(f"[{self.platform_name}] 访问评论管理页面...", flush=True)
  900. await self.page.goto("https://creator.douyin.com/creator-micro/interactive/comment", wait_until="domcontentloaded", timeout=30000)
  901. await asyncio.sleep(5)
  902. # 检查登录状态
  903. current_url = self.page.url
  904. if "login" in current_url or "passport" in current_url:
  905. raise Exception("Cookie 已过期,请重新登录")
  906. print(f"[{self.platform_name}] 页面加载完成,当前捕获: {len(captured_comments)} 条评论, {len(captured_works)} 个作品", flush=True)
  907. # 尝试点击"选择作品"来加载作品列表
  908. try:
  909. select_btn = await self.page.query_selector('text="选择作品"')
  910. if select_btn:
  911. print(f"[{self.platform_name}] 点击选择作品按钮...", flush=True)
  912. await select_btn.click()
  913. await asyncio.sleep(3)
  914. # 获取作品列表
  915. work_items = await self.page.query_selector_all('[class*="work-item"], [class*="video-item"], [class*="aweme-item"]')
  916. print(f"[{self.platform_name}] 找到 {len(work_items)} 个作品元素", flush=True)
  917. # 点击每个作品加载其评论
  918. for i, item in enumerate(work_items[:10]): # 最多处理10个作品
  919. try:
  920. await item.click()
  921. await asyncio.sleep(2)
  922. print(f"[{self.platform_name}] 已点击作品 {i+1}/{min(len(work_items), 10)}", flush=True)
  923. except:
  924. pass
  925. # 关闭选择作品弹窗
  926. close_btn = await self.page.query_selector('[class*="close"], [class*="cancel"]')
  927. if close_btn:
  928. await close_btn.click()
  929. await asyncio.sleep(1)
  930. except Exception as e:
  931. print(f"[{self.platform_name}] 选择作品操作失败: {e}", flush=True)
  932. # 滚动加载更多评论
  933. for i in range(5):
  934. await self.page.evaluate('window.scrollBy(0, 500)')
  935. await asyncio.sleep(1)
  936. await asyncio.sleep(3)
  937. # 移除监听器
  938. self.page.remove_listener('response', handle_response)
  939. print(f"[{self.platform_name}] 最终捕获: {len(captured_comments)} 条评论, {len(captured_works)} 个作品", flush=True)
  940. # 按作品分组评论
  941. work_comments_map = {} # work_id -> work_comments
  942. for comment in captured_comments:
  943. # 从评论中获取作品信息
  944. aweme = comment.get('aweme', {}) or comment.get('item', {})
  945. aweme_id = str(comment.get('aweme_id', '') or aweme.get('aweme_id', '') or aweme.get('item_id', ''))
  946. if not aweme_id:
  947. continue
  948. if aweme_id not in work_comments_map:
  949. work_info = captured_works.get(aweme_id, {})
  950. work_comments_map[aweme_id] = {
  951. 'work_id': aweme_id,
  952. 'title': aweme.get('title', '') or aweme.get('desc', '') or work_info.get('title', ''),
  953. 'cover_url': aweme.get('cover', {}).get('url_list', [''])[0] if aweme.get('cover') else work_info.get('cover', ''),
  954. 'comments': []
  955. }
  956. cid = str(comment.get('cid', ''))
  957. if not cid:
  958. continue
  959. user = comment.get('user', {})
  960. work_comments_map[aweme_id]['comments'].append({
  961. 'comment_id': cid,
  962. 'author_id': str(user.get('uid', '')),
  963. 'author_name': user.get('nickname', ''),
  964. 'author_avatar': user.get('avatar_thumb', {}).get('url_list', [''])[0] if user.get('avatar_thumb') else '',
  965. 'content': comment.get('text', ''),
  966. 'like_count': int(comment.get('digg_count', 0)),
  967. 'create_time': datetime.fromtimestamp(comment.get('create_time', 0)).strftime('%Y-%m-%d %H:%M:%S') if comment.get('create_time') else '',
  968. 'is_author': comment.get('is_author', False),
  969. })
  970. all_work_comments = list(work_comments_map.values())
  971. total_comments = sum(len(w['comments']) for w in all_work_comments)
  972. print(f"[{self.platform_name}] 获取到 {len(all_work_comments)} 个作品的 {total_comments} 条评论", flush=True)
  973. except Exception as e:
  974. import traceback
  975. traceback.print_exc()
  976. return {
  977. 'success': False,
  978. 'platform': self.platform_name,
  979. 'error': str(e),
  980. 'work_comments': []
  981. }
  982. finally:
  983. await self.close_browser()
  984. return {
  985. 'success': True,
  986. 'platform': self.platform_name,
  987. 'work_comments': all_work_comments,
  988. 'total': len(all_work_comments)
  989. }
  990. async def auto_reply_private_messages(self, cookies: str) -> dict:
  991. """自动回复抖音私信 - 适配新页面结构"""
  992. print(f"\n{'='*60}")
  993. print(f"[{self.platform_name}] 开始自动回复抖音私信")
  994. print(f"{'='*60}")
  995. try:
  996. await self.init_browser()
  997. cookie_list = self.parse_cookies(cookies)
  998. await self.set_cookies(cookie_list)
  999. if not self.page:
  1000. raise Exception("Page not initialized")
  1001. # 访问抖音私信页面
  1002. await self.page.goto("https://creator.douyin.com/creator-micro/data/following/chat", timeout=30000)
  1003. await asyncio.sleep(3)
  1004. # 检查登录状态
  1005. current_url = self.page.url
  1006. print(f"[{self.platform_name}] 当前 URL: {current_url}")
  1007. if "login" in current_url or "passport" in current_url:
  1008. raise Exception("Cookie 已过期,请重新登录")
  1009. replied_count = 0
  1010. # 处理两个tab: 陌生人私信 和 朋友私信
  1011. for tab_name in ["陌生人私信", "朋友私信"]:
  1012. print(f"\n{'='*50}")
  1013. print(f"[{self.platform_name}] 处理 {tab_name} ...")
  1014. print(f"{'='*50}")
  1015. # 点击对应tab
  1016. tab_locator = self.page.locator(f'div.semi-tabs-tab:text-is("{tab_name}")')
  1017. if await tab_locator.count() > 0:
  1018. await tab_locator.click()
  1019. await asyncio.sleep(2)
  1020. else:
  1021. print(f"⚠️ 未找到 {tab_name} 标签,跳过")
  1022. continue
  1023. # 获取私信列表
  1024. session_items = self.page.locator('.semi-list-item')
  1025. session_count = await session_items.count()
  1026. print(f"[{self.platform_name}] {tab_name} 共找到 {session_count} 条会话")
  1027. if session_count == 0:
  1028. print(f"[{self.platform_name}] {tab_name} 无新私信")
  1029. continue
  1030. for idx in range(session_count):
  1031. try:
  1032. # 重新获取列表(防止 DOM 变化)
  1033. current_sessions = self.page.locator('.semi-list-item')
  1034. if idx >= await current_sessions.count():
  1035. break
  1036. session = current_sessions.nth(idx)
  1037. user_name = await session.locator('.item-header-name-vL_79m').inner_text()
  1038. last_msg = await session.locator('.text-whxV9A').inner_text()
  1039. print(f"\n ➤ [{idx+1}/{session_count}] 处理用户: {user_name} | 最后消息: {last_msg[:30]}...")
  1040. # 检查会话预览消息是否包含非文字内容
  1041. if "分享" in last_msg and ("视频" in last_msg or "图片" in last_msg or "链接" in last_msg):
  1042. print(" ➤ 会话预览为非文字消息,跳过")
  1043. continue
  1044. # 点击进入聊天
  1045. await session.click()
  1046. await asyncio.sleep(2)
  1047. # 提取聊天历史(判断最后一条是否是自己发的)
  1048. chat_messages = self.page.locator('.box-item-dSA1TJ:not(.time-Za5gKL)')
  1049. msg_count = await chat_messages.count()
  1050. should_reply = True
  1051. if msg_count > 0:
  1052. # 最后一条消息
  1053. last_msg_el = chat_messages.nth(msg_count - 1)
  1054. # 获取元素的 class 属性判断是否是自己发的
  1055. classes = await last_msg_el.get_attribute('class') or ''
  1056. is_my_message = 'is-me-' in classes # 包含 is-me- 表示是自己发的
  1057. should_reply = not is_my_message # 如果是自己发的就不回复
  1058. if should_reply:
  1059. # 提取完整聊天历史
  1060. chat_history = await self._extract_chat_history()
  1061. if chat_history:
  1062. # 生成回复
  1063. reply_text = await self._generate_reply_with_ai(chat_history)
  1064. if not reply_text:
  1065. reply_text = self._generate_reply(chat_history)
  1066. if reply_text:
  1067. print(f" 📝 回复内容: {reply_text}")
  1068. # 填充输入框
  1069. input_box = self.page.locator('div.chat-input-dccKiL[contenteditable="true"]')
  1070. send_btn = self.page.locator('button:has-text("发送")')
  1071. if await input_box.is_visible() and await send_btn.is_visible():
  1072. await input_box.fill(reply_text)
  1073. await asyncio.sleep(0.5)
  1074. await send_btn.click()
  1075. print(" ✅ 已发送")
  1076. replied_count += 1
  1077. await asyncio.sleep(2)
  1078. else:
  1079. print(" ❌ 输入框或发送按钮不可见")
  1080. else:
  1081. print(" ➤ 无需回复")
  1082. else:
  1083. print(" ➤ 聊天历史为空,跳过")
  1084. else:
  1085. print(" ➤ 最后一条是我发的,跳过")
  1086. except Exception as e:
  1087. print(f" ❌ 处理会话 {idx+1} 时出错: {e}")
  1088. continue
  1089. print(f"[{self.platform_name}] 自动回复完成,共回复 {replied_count} 条消息")
  1090. return {
  1091. 'success': True,
  1092. 'platform': self.platform_name,
  1093. 'replied_count': replied_count,
  1094. 'message': f'成功回复 {replied_count} 条私信'
  1095. }
  1096. except Exception as e:
  1097. import traceback
  1098. traceback.print_exc()
  1099. return {
  1100. 'success': False,
  1101. 'platform': self.platform_name,
  1102. 'error': str(e)
  1103. }
  1104. finally:
  1105. await self.close_browser()
  1106. # 辅助方法保持兼容(可复用)
  1107. def _generate_reply(self, chat_history: list) -> str:
  1108. """规则回复"""
  1109. if not chat_history:
  1110. return "你好!感谢联系~"
  1111. last_msg = chat_history[-1]["content"]
  1112. if "谢谢" in last_msg or "感谢" in last_msg:
  1113. return "不客气!欢迎常来交流~"
  1114. elif "你好" in last_msg or "在吗" in last_msg:
  1115. return "你好!请问有什么可以帮您的?"
  1116. elif "视频" in last_msg or "怎么拍" in last_msg:
  1117. return "视频是用手机拍摄的,注意光线和稳定哦!"
  1118. else:
  1119. return "收到!我会认真阅读您的留言~"
  1120. async def _extract_chat_history(self) -> list:
  1121. """精准提取聊天记录,区分作者(自己)和用户"""
  1122. if not self.page:
  1123. return []
  1124. history = []
  1125. # 获取所有聊天消息(排除时间戳元素)
  1126. message_wrappers = self.page.locator('.box-item-dSA1TJ:not(.time-Za5gKL)')
  1127. count = await message_wrappers.count()
  1128. for i in range(count):
  1129. try:
  1130. wrapper = message_wrappers.nth(i)
  1131. # 检查是否为自己发送的消息
  1132. classes = await wrapper.get_attribute('class') or ''
  1133. is_author = 'is-me-' in classes # 包含 is-me- 表示是自己发的
  1134. # 获取消息文本内容
  1135. text_element = wrapper.locator('.text-X2d7fS')
  1136. if await text_element.count() > 0:
  1137. content = await text_element.inner_text()
  1138. content = content.strip()
  1139. if content: # 只添加非空消息
  1140. # 获取用户名(如果是对方消息)
  1141. author_name = ''
  1142. if not is_author:
  1143. # 尝试获取对方用户名
  1144. name_elements = wrapper.locator('.aweme-author-name-m8uoXU')
  1145. if await name_elements.count() > 0:
  1146. author_name = await name_elements.nth(0).inner_text()
  1147. else:
  1148. author_name = '用户'
  1149. else:
  1150. author_name = '我'
  1151. history.append({
  1152. "author": author_name,
  1153. "content": content,
  1154. "is_author": is_author,
  1155. })
  1156. except Exception as e:
  1157. print(f" ⚠️ 解析第 {i+1} 条消息失败: {e}")
  1158. continue
  1159. return history
  1160. async def _generate_reply_with_ai(self, chat_history: list) -> str:
  1161. """使用 AI 生成回复(保留原逻辑)"""
  1162. import os, requests, json
  1163. try:
  1164. ai_api_key = os.environ.get('DASHSCOPE_API_KEY', '')
  1165. ai_base_url = os.environ.get('DASHSCOPE_BASE_URL', 'https://dashscope.aliyuncs.com/compatible-mode/v1')
  1166. ai_model = os.environ.get('AI_MODEL', 'qwen-plus')
  1167. if not ai_api_key:
  1168. return self._generate_reply(chat_history)
  1169. messages = [{"role": "system", "content": "你是一个友好的抖音创作者助手,负责回复粉丝私信。请保持简洁、友好、专业的语气。回复长度不超过20字。"}]
  1170. for msg in chat_history:
  1171. role = "assistant" if msg.get("is_author", False) else "user"
  1172. messages.append({"role": role, "content": msg["content"]})
  1173. headers = {'Authorization': f'Bearer {ai_api_key}', 'Content-Type': 'application/json'}
  1174. payload = {"model": ai_model, "messages": messages, "max_tokens": 150, "temperature": 0.8}
  1175. response = requests.post(f"{ai_base_url}/chat/completions", headers=headers, json=payload, timeout=30)
  1176. if response.status_code == 200:
  1177. ai_reply = response.json().get('choices', [{}])[0].get('message', {}).get('content', '').strip()
  1178. return ai_reply if ai_reply else self._generate_reply(chat_history)
  1179. else:
  1180. return self._generate_reply(chat_history)
  1181. except:
  1182. return self._generate_reply(chat_history)
  1183. async def get_work_comments_mapping(self, cookies: str) -> dict:
  1184. """获取所有作品及其评论的对应关系
  1185. Args:
  1186. cookies: 抖音创作者平台的cookies
  1187. Returns:
  1188. dict: 包含作品和评论对应关系的JSON数据
  1189. """
  1190. print(f"\n{'='*60}")
  1191. print(f"[{self.platform_name}] 获取作品和评论对应关系")
  1192. print(f"{'='*60}")
  1193. work_comments_mapping = []
  1194. try:
  1195. await self.init_browser()
  1196. cookie_list = self.parse_cookies(cookies)
  1197. await self.set_cookies(cookie_list)
  1198. if not self.page:
  1199. raise Exception("Page not initialized")
  1200. # 访问创作者中心首页
  1201. await self.page.goto("https://creator.douyin.com/creator-micro/home", timeout=30000)
  1202. await asyncio.sleep(3)
  1203. # 检查登录状态
  1204. current_url = self.page.url
  1205. if "login" in current_url or "passport" in current_url:
  1206. raise Exception("Cookie 已过期,请重新登录")
  1207. # 访问内容管理页面获取作品列表
  1208. print(f"[{self.platform_name}] 访问内容管理页面...")
  1209. await self.page.goto("https://creator.douyin.com/creator-micro/content/manage", timeout=30000)
  1210. await asyncio.sleep(5)
  1211. # 获取作品列表
  1212. works_result = await self.get_works(cookies, page=0, page_size=20)
  1213. if not works_result.success:
  1214. print(f"[{self.platform_name}] 获取作品列表失败: {works_result.error}")
  1215. return {
  1216. 'success': False,
  1217. 'platform': self.platform_name,
  1218. 'error': works_result.error,
  1219. 'work_comments': []
  1220. }
  1221. print(f"[{self.platform_name}] 获取到 {len(works_result.works)} 个作品")
  1222. # 对每个作品获取评论
  1223. for i, work in enumerate(works_result.works):
  1224. print(f"[{self.platform_name}] 正在获取作品 {i+1}/{len(works_result.works)} 的评论: {work.title[:20]}...")
  1225. # 获取单个作品的评论
  1226. comments_result = await self.get_comments(cookies, work.work_id)
  1227. if comments_result.success:
  1228. work_comments_mapping.append({
  1229. 'work_info': work.to_dict(),
  1230. 'comments': [comment.to_dict() for comment in comments_result.comments]
  1231. })
  1232. print(f"[{self.platform_name}] 作品 '{work.title[:20]}...' 获取到 {len(comments_result.comments)} 条评论")
  1233. else:
  1234. print(f"[{self.platform_name}] 获取作品 '{work.title[:20]}...' 评论失败: {comments_result.error}")
  1235. work_comments_mapping.append({
  1236. 'work_info': work.to_dict(),
  1237. 'comments': [],
  1238. 'error': comments_result.error
  1239. })
  1240. # 添加延时避免请求过于频繁
  1241. await asyncio.sleep(2)
  1242. print(f"[{self.platform_name}] 所有作品评论获取完成")
  1243. except Exception as e:
  1244. import traceback
  1245. traceback.print_exc()
  1246. return {
  1247. 'success': False,
  1248. 'platform': self.platform_name,
  1249. 'error': str(e),
  1250. 'work_comments': []
  1251. }
  1252. finally:
  1253. await self.close_browser()
  1254. return {
  1255. 'success': True,
  1256. 'platform': self.platform_name,
  1257. 'work_comments': work_comments_mapping,
  1258. 'summary': {
  1259. 'total_works': len(work_comments_mapping),
  1260. 'total_comments': sum(len(item['comments']) for item in work_comments_mapping),
  1261. }
  1262. }