douyin.py 74 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579
  1. # -*- coding: utf-8 -*-
  2. """
  3. 抖音视频发布器
  4. 参考: matrix/douyin_uploader/main.py
  5. """
  6. import asyncio
  7. import os
  8. import json
  9. import re
  10. from datetime import datetime
  11. from typing import List
  12. from .base import (
  13. BasePublisher, PublishParams, PublishResult,
  14. WorkItem, WorksResult, CommentItem, CommentsResult
  15. )
  16. class DouyinPublisher(BasePublisher):
  17. """
  18. 抖音视频发布器
  19. 使用 Playwright 自动化操作抖音创作者中心
  20. """
  21. platform_name = "douyin"
  22. login_url = "https://creator.douyin.com/"
  23. publish_url = "https://creator.douyin.com/creator-micro/content/upload"
  24. cookie_domain = ".douyin.com"
  25. async def set_schedule_time(self, publish_date: datetime):
  26. """设置定时发布"""
  27. if not self.page:
  28. return
  29. # 选择定时发布
  30. label_element = self.page.locator("label.radio-d4zkru:has-text('定时发布')")
  31. await label_element.click()
  32. await asyncio.sleep(1)
  33. # 输入时间
  34. publish_date_str = publish_date.strftime("%Y-%m-%d %H:%M")
  35. await self.page.locator('.semi-input[placeholder="日期和时间"]').click()
  36. await self.page.keyboard.press("Control+KeyA")
  37. await self.page.keyboard.type(str(publish_date_str))
  38. await self.page.keyboard.press("Enter")
  39. await asyncio.sleep(1)
  40. async def handle_upload_error(self, video_path: str):
  41. """处理上传错误,重新上传"""
  42. if not self.page:
  43. return
  44. print(f"[{self.platform_name}] 视频出错了,重新上传中...")
  45. await self.page.locator('div.progress-div [class^="upload-btn-input"]').set_input_files(video_path)
  46. async def check_captcha(self) -> dict:
  47. """
  48. 检查页面是否需要验证码
  49. 返回: {'need_captcha': bool, 'captcha_type': str}
  50. """
  51. if not self.page:
  52. return {'need_captcha': False, 'captcha_type': ''}
  53. try:
  54. # 检查手机验证码弹窗
  55. phone_captcha_selectors = [
  56. 'text="请输入验证码"',
  57. 'text="输入手机验证码"',
  58. 'text="接收短信验证码"',
  59. 'text="获取验证码"',
  60. 'text="手机号验证"',
  61. 'text="短信验证"',
  62. '[class*="captcha"][class*="phone"]',
  63. '[class*="verify"][class*="phone"]',
  64. '[class*="sms-code"]',
  65. 'input[placeholder*="验证码"]',
  66. ]
  67. for selector in phone_captcha_selectors:
  68. try:
  69. if await self.page.locator(selector).count() > 0:
  70. print(f"[{self.platform_name}] 检测到手机验证码: {selector}", flush=True)
  71. return {'need_captcha': True, 'captcha_type': 'phone'}
  72. except:
  73. pass
  74. # 检查滑块验证码
  75. slider_captcha_selectors = [
  76. '[class*="captcha"][class*="slider"]',
  77. '[class*="slide-verify"]',
  78. '[class*="drag-verify"]',
  79. 'text="按住滑块"',
  80. 'text="向右滑动"',
  81. 'text="拖动滑块"',
  82. ]
  83. for selector in slider_captcha_selectors:
  84. try:
  85. if await self.page.locator(selector).count() > 0:
  86. print(f"[{self.platform_name}] 检测到滑块验证码: {selector}", flush=True)
  87. return {'need_captcha': True, 'captcha_type': 'slider'}
  88. except:
  89. pass
  90. # 检查图片验证码
  91. image_captcha_selectors = [
  92. '[class*="captcha"][class*="image"]',
  93. '[class*="verify-image"]',
  94. 'text="点击图片"',
  95. 'text="选择正确的"',
  96. ]
  97. for selector in image_captcha_selectors:
  98. try:
  99. if await self.page.locator(selector).count() > 0:
  100. print(f"[{self.platform_name}] 检测到图片验证码: {selector}", flush=True)
  101. return {'need_captcha': True, 'captcha_type': 'image'}
  102. except:
  103. pass
  104. # 检查登录弹窗(Cookie 过期)
  105. login_selectors = [
  106. 'text="请先登录"',
  107. 'text="登录后继续"',
  108. '[class*="login-modal"]',
  109. '[class*="login-dialog"]',
  110. ]
  111. for selector in login_selectors:
  112. try:
  113. if await self.page.locator(selector).count() > 0:
  114. print(f"[{self.platform_name}] 检测到需要登录: {selector}", flush=True)
  115. return {'need_captcha': True, 'captcha_type': 'login'}
  116. except:
  117. pass
  118. except Exception as e:
  119. print(f"[{self.platform_name}] 验证码检测异常: {e}", flush=True)
  120. return {'need_captcha': False, 'captcha_type': ''}
  121. async def handle_phone_captcha(self) -> bool:
  122. if not self.page:
  123. return False
  124. try:
  125. body_text = ""
  126. try:
  127. body_text = await self.page.inner_text("body")
  128. except:
  129. body_text = ""
  130. phone_match = re.search(r"(1\d{2}\*{4}\d{4})", body_text or "")
  131. masked_phone = phone_match.group(1) if phone_match else ""
  132. async def _get_send_button():
  133. candidates = [
  134. self.page.get_by_role("button", name="获取验证码"),
  135. self.page.get_by_role("button", name="发送验证码"),
  136. self.page.locator('button:has-text("获取验证码")'),
  137. self.page.locator('button:has-text("发送验证码")'),
  138. self.page.locator('[role="button"]:has-text("获取验证码")'),
  139. self.page.locator('[role="button"]:has-text("发送验证码")'),
  140. # 非 button 元素(链接、span、div 等)
  141. self.page.locator('a:has-text("获取验证码")'),
  142. self.page.locator('span:has-text("获取验证码")'),
  143. self.page.locator('div:has-text("获取验证码"):not(:has(div:has-text("获取验证码")))'),
  144. self.page.locator('a:has-text("发送验证码")'),
  145. self.page.locator('span:has-text("发送验证码")'),
  146. self.page.locator('[class*="send"]:has-text("验证码")'),
  147. self.page.locator('[class*="code"]:has-text("获取")'),
  148. self.page.locator('[class*="code"]:has-text("发送")'),
  149. ]
  150. for c in candidates:
  151. try:
  152. if await c.count() > 0 and await c.first.is_visible():
  153. return c.first
  154. except:
  155. continue
  156. return None
  157. async def _confirm_sent() -> bool:
  158. try:
  159. txt = ""
  160. try:
  161. txt = await self.page.inner_text("body")
  162. except:
  163. txt = ""
  164. if re.search(r"(\d+\s*秒)|(\d+\s*s)|后可重试|重新发送|已发送", txt or ""):
  165. return True
  166. except:
  167. pass
  168. try:
  169. btn = await _get_send_button()
  170. if btn:
  171. disabled = await btn.is_disabled()
  172. if disabled:
  173. return True
  174. label = (await btn.inner_text()) if btn else ""
  175. if re.search(r"(\d+\s*秒)|(\d+\s*s)|后可重试|重新发送|已发送", label or ""):
  176. return True
  177. except:
  178. pass
  179. return False
  180. did_click_send = False
  181. btn = await _get_send_button()
  182. if btn:
  183. try:
  184. await btn.click(timeout=5000)
  185. did_click_send = True
  186. print(f"[{self.platform_name}] 已点击发送短信验证码", flush=True)
  187. except Exception as e:
  188. print(f"[{self.platform_name}] 点击发送验证码按钮失败: {e}", flush=True)
  189. # 常规选择器找不到时,用 AI 识别并点击
  190. if not did_click_send:
  191. print(f"[{self.platform_name}] 常规选择器未命中,使用 AI 查找发送验证码按钮...", flush=True)
  192. try:
  193. suggest = await self.ai_suggest_playwright_selector(
  194. "点击弹窗中的'获取验证码'或'发送验证码'链接/按钮,用于获取短信验证码"
  195. )
  196. if suggest.get("has_selector") and suggest.get("selector"):
  197. sel = suggest["selector"]
  198. print(f"[{self.platform_name}] AI 建议选择器: {sel} (置信度: {suggest.get('confidence')})", flush=True)
  199. ai_btn = self.page.locator(sel).first
  200. if await ai_btn.count() > 0 and await ai_btn.is_visible():
  201. await ai_btn.click(timeout=5000)
  202. did_click_send = True
  203. print(f"[{self.platform_name}] AI 方式点击发送验证码成功", flush=True)
  204. except Exception as e:
  205. print(f"[{self.platform_name}] AI 查找发送验证码失败: {e}", flush=True)
  206. if did_click_send:
  207. try:
  208. await self.page.wait_for_timeout(800)
  209. except:
  210. pass
  211. sent_confirmed = await _confirm_sent() if did_click_send else False
  212. ai_state = await self.ai_analyze_sms_send_state()
  213. try:
  214. if ai_state.get("sent_likely"):
  215. sent_confirmed = True
  216. except:
  217. pass
  218. if (not did_click_send or not sent_confirmed) and ai_state.get("suggested_action") == "click_send":
  219. btn2 = await _get_send_button()
  220. if btn2:
  221. try:
  222. await btn2.click(timeout=5000)
  223. did_click_send = True
  224. await self.page.wait_for_timeout(800)
  225. sent_confirmed = await _confirm_sent()
  226. ai_state = await self.ai_analyze_sms_send_state()
  227. if ai_state.get("sent_likely"):
  228. sent_confirmed = True
  229. except:
  230. pass
  231. code_hint = "请输入短信验证码。"
  232. if ai_state.get("block_reason") == "slider":
  233. code_hint = "检测到滑块/人机验证阻塞,请先在浏览器窗口完成验证后再发送短信验证码。"
  234. elif ai_state.get("block_reason") in ["rate_limit", "risk"]:
  235. code_hint = f"页面提示可能被限制/风控({ai_state.get('notes','') or '请稍后重试'})。可稍等后重新发送验证码。"
  236. elif not did_click_send:
  237. code_hint = "未找到或无法点击“发送验证码”按钮,请在弹出的浏览器页面手动点击发送后再输入验证码。"
  238. elif sent_confirmed:
  239. code_hint = f"已检测到短信验证码已发送({ai_state.get('notes','') or '请查收短信'})。"
  240. else:
  241. code_hint = f"已尝试点击发送验证码,但未确认发送成功({ai_state.get('notes','') or '请查看是否出现倒计时/重新发送'})。"
  242. code = await self.request_sms_code_from_frontend(masked_phone, message=code_hint)
  243. input_selectors = [
  244. 'input[placeholder*="验证码"]',
  245. 'input[placeholder*="短信"]',
  246. 'input[type="tel"]',
  247. 'input[type="text"]',
  248. ]
  249. filled = False
  250. for selector in input_selectors:
  251. try:
  252. el = self.page.locator(selector).first
  253. if await el.count() > 0:
  254. await el.fill(code)
  255. filled = True
  256. break
  257. except:
  258. continue
  259. if not filled:
  260. raise Exception("未找到验证码输入框")
  261. # 点击提交/验证按钮
  262. submit_clicked = False
  263. # 方法1: 常规选择器(button 和非 button 元素)
  264. submit_selectors = [
  265. 'button:has-text("验证"):not(:has-text("验证码"))',
  266. 'button:has-text("确定")',
  267. 'button:has-text("确认")',
  268. 'button:has-text("提交")',
  269. 'button:has-text("完成")',
  270. # 非 button 元素
  271. '[role="button"]:has-text("验证"):not(:has-text("验证码"))',
  272. 'div:has-text("验证"):not(:has(div:has-text("验证"))):not(:has-text("验证码")):not(:has-text("短信"))',
  273. 'span:has-text("验证"):not(:has-text("验证码"))',
  274. 'a:has-text("验证"):not(:has-text("验证码"))',
  275. ]
  276. for selector in submit_selectors:
  277. try:
  278. btn = self.page.locator(selector).first
  279. if await btn.count() > 0 and await btn.is_visible():
  280. print(f"[{self.platform_name}] 找到验证按钮: {selector}", flush=True)
  281. await btn.click(timeout=5000)
  282. submit_clicked = True
  283. break
  284. except Exception as e:
  285. print(f"[{self.platform_name}] 选择器 {selector} 失败: {e}", flush=True)
  286. continue
  287. # 方法2: 遍历弹窗内的所有可见按钮/可点击元素
  288. if not submit_clicked:
  289. print(f"[{self.platform_name}] 常规选择器未命中,遍历弹窗按钮...", flush=True)
  290. try:
  291. # 在弹窗/对话框中查找
  292. dialog_selectors = [
  293. '[class*="modal"] button, [class*="modal"] [role="button"]',
  294. '[class*="dialog"] button, [class*="dialog"] [role="button"]',
  295. '[class*="popup"] button, [class*="popup"] [role="button"]',
  296. 'button',
  297. ]
  298. for dialog_sel in dialog_selectors:
  299. btns = self.page.locator(dialog_sel)
  300. count = await btns.count()
  301. for idx in range(count):
  302. b = btns.nth(idx)
  303. try:
  304. text = (await b.text_content() or "").strip()
  305. if text == "验证" and await b.is_visible():
  306. print(f"[{self.platform_name}] 遍历找到验证按钮: '{text}'", flush=True)
  307. await b.click(timeout=5000)
  308. submit_clicked = True
  309. break
  310. except:
  311. continue
  312. if submit_clicked:
  313. break
  314. except Exception as e:
  315. print(f"[{self.platform_name}] 遍历按钮失败: {e}", flush=True)
  316. # 方法3: AI 识别并点击
  317. if not submit_clicked:
  318. print(f"[{self.platform_name}] 使用 AI 查找验证按钮...", flush=True)
  319. try:
  320. suggest = await self.ai_suggest_playwright_selector(
  321. "点击弹窗中红色的'验证'确认按钮(不是'取消'按钮),用于提交短信验证码"
  322. )
  323. if suggest.get("has_selector") and suggest.get("selector"):
  324. sel = suggest["selector"]
  325. print(f"[{self.platform_name}] AI 建议验证按钮选择器: {sel} (置信度: {suggest.get('confidence')})", flush=True)
  326. ai_btn = self.page.locator(sel).first
  327. if await ai_btn.count() > 0 and await ai_btn.is_visible():
  328. await ai_btn.click(timeout=5000)
  329. submit_clicked = True
  330. print(f"[{self.platform_name}] AI 方式点击验证按钮成功", flush=True)
  331. except Exception as e:
  332. print(f"[{self.platform_name}] AI 查找验证按钮失败: {e}", flush=True)
  333. if not submit_clicked:
  334. print(f"[{self.platform_name}] ⚠ 未能点击验证按钮", flush=True)
  335. try:
  336. await self.page.wait_for_timeout(1500)
  337. await self.page.wait_for_selector('text="接收短信验证码"', state="hidden", timeout=15000)
  338. except:
  339. try:
  340. await self.page.wait_for_selector('text="请输入验证码"', state="hidden", timeout=5000)
  341. except:
  342. pass
  343. print(f"[{self.platform_name}] 短信验证码已提交,继续执行发布流程", flush=True)
  344. return True
  345. except Exception as e:
  346. print(f"[{self.platform_name}] 处理短信验证码失败: {e}", flush=True)
  347. return False
  348. async def publish(self, cookies: str, params: PublishParams) -> PublishResult:
  349. """发布视频到抖音 - 参考 matrix/douyin_uploader/main.py"""
  350. print(f"\n{'='*60}")
  351. print(f"[{self.platform_name}] 开始发布视频")
  352. print(f"[{self.platform_name}] 视频路径: {params.video_path}")
  353. print(f"[{self.platform_name}] 标题: {params.title}")
  354. print(f"[{self.platform_name}] Headless: {self.headless}")
  355. print(f"{'='*60}")
  356. self.report_progress(5, "正在初始化浏览器...")
  357. # 初始化浏览器
  358. await self.init_browser()
  359. print(f"[{self.platform_name}] 浏览器初始化完成")
  360. # 解析并设置 cookies
  361. cookie_list = self.parse_cookies(cookies)
  362. print(f"[{self.platform_name}] 解析到 {len(cookie_list)} 个 cookies")
  363. await self.set_cookies(cookie_list)
  364. if not self.page:
  365. raise Exception("Page not initialized")
  366. # 检查视频文件
  367. if not os.path.exists(params.video_path):
  368. raise Exception(f"视频文件不存在: {params.video_path}")
  369. print(f"[{self.platform_name}] 视频文件存在,大小: {os.path.getsize(params.video_path)} bytes")
  370. self.report_progress(10, "正在打开上传页面...")
  371. # 访问上传页面 - 参考 matrix
  372. await self.page.goto("https://creator.douyin.com/creator-micro/content/upload")
  373. print(f"[{self.platform_name}] 等待页面加载...")
  374. try:
  375. await self.page.wait_for_url("https://creator.douyin.com/creator-micro/content/upload", timeout=30000)
  376. except:
  377. pass
  378. await asyncio.sleep(3)
  379. # 检查当前 URL 和页面状态
  380. current_url = self.page.url
  381. print(f"[{self.platform_name}] 当前 URL: {current_url}")
  382. async def wait_for_manual_login(timeout_seconds: int = 300) -> bool:
  383. if not self.page:
  384. return False
  385. self.report_progress(8, "检测到需要登录,请在浏览器窗口完成登录...")
  386. try:
  387. await self.page.bring_to_front()
  388. except:
  389. pass
  390. waited = 0
  391. while waited < timeout_seconds:
  392. try:
  393. url = self.page.url
  394. if "login" not in url and "passport" not in url:
  395. if "creator.douyin.com" in url:
  396. return True
  397. await asyncio.sleep(2)
  398. waited += 2
  399. except:
  400. await asyncio.sleep(2)
  401. waited += 2
  402. return False
  403. # 检查是否在登录页面或需要登录
  404. if "login" in current_url or "passport" in current_url:
  405. if not self.headless:
  406. logged_in = await wait_for_manual_login()
  407. if logged_in:
  408. try:
  409. if self.context:
  410. cookies_after = await self.context.cookies()
  411. await self.sync_cookies_to_node(cookies_after)
  412. except:
  413. pass
  414. await self.page.goto("https://creator.douyin.com/creator-micro/content/upload")
  415. await asyncio.sleep(3)
  416. current_url = self.page.url
  417. else:
  418. screenshot_base64 = await self.capture_screenshot()
  419. return PublishResult(
  420. success=False,
  421. platform=self.platform_name,
  422. error="需要登录:请在浏览器窗口完成登录后重试",
  423. need_captcha=True,
  424. captcha_type='login',
  425. screenshot_base64=screenshot_base64,
  426. page_url=current_url,
  427. status='need_captcha'
  428. )
  429. else:
  430. screenshot_base64 = await self.capture_screenshot()
  431. return PublishResult(
  432. success=False,
  433. platform=self.platform_name,
  434. error="Cookie 已过期,需要重新登录",
  435. need_captcha=True,
  436. captcha_type='login',
  437. screenshot_base64=screenshot_base64,
  438. page_url=current_url,
  439. status='need_captcha'
  440. )
  441. # 使用 AI 检测验证码
  442. ai_captcha_result = await self.ai_check_captcha()
  443. if ai_captcha_result['has_captcha']:
  444. print(f"[{self.platform_name}] AI检测到验证码: {ai_captcha_result['captcha_type']}", flush=True)
  445. screenshot_base64 = await self.capture_screenshot()
  446. return PublishResult(
  447. success=False,
  448. platform=self.platform_name,
  449. error=f"检测到{ai_captcha_result['captcha_type']}验证码,需要使用有头浏览器完成验证",
  450. need_captcha=True,
  451. captcha_type=ai_captcha_result['captcha_type'],
  452. screenshot_base64=screenshot_base64,
  453. page_url=current_url,
  454. status='need_captcha'
  455. )
  456. # 传统方式检测验证码
  457. captcha_result = await self.check_captcha()
  458. if captcha_result['need_captcha']:
  459. print(f"[{self.platform_name}] 传统方式检测到验证码: {captcha_result['captcha_type']}", flush=True)
  460. if captcha_result['captcha_type'] == 'phone':
  461. handled = await self.handle_phone_captcha()
  462. if handled:
  463. self.report_progress(12, "短信验证码已处理,继续发布...")
  464. else:
  465. screenshot_base64 = await self.capture_screenshot()
  466. return PublishResult(
  467. success=False,
  468. platform=self.platform_name,
  469. error="检测到手机验证码,但自动处理失败",
  470. need_captcha=True,
  471. captcha_type='phone',
  472. screenshot_base64=screenshot_base64,
  473. page_url=current_url,
  474. status='need_captcha'
  475. )
  476. else:
  477. screenshot_base64 = await self.capture_screenshot()
  478. return PublishResult(
  479. success=False,
  480. platform=self.platform_name,
  481. error=f"需要{captcha_result['captcha_type']}验证码,请使用有头浏览器完成验证",
  482. need_captcha=True,
  483. captcha_type=captcha_result['captcha_type'],
  484. screenshot_base64=screenshot_base64,
  485. page_url=current_url,
  486. status='need_captcha'
  487. )
  488. self.report_progress(15, "正在选择视频文件...")
  489. # 点击上传区域 - 参考 matrix: div.container-drag-info-Tl0RGH 或带 container-drag 的 div
  490. upload_selectors = [
  491. "div[class*='container-drag-info']",
  492. "div[class*='container-drag']",
  493. "div.upload-btn",
  494. "div[class*='upload']",
  495. ]
  496. upload_success = False
  497. for selector in upload_selectors:
  498. try:
  499. upload_div = self.page.locator(selector).first
  500. if await upload_div.count() > 0:
  501. print(f"[{self.platform_name}] 找到上传区域: {selector}")
  502. async with self.page.expect_file_chooser(timeout=10000) as fc_info:
  503. await upload_div.click()
  504. file_chooser = await fc_info.value
  505. await file_chooser.set_files(params.video_path)
  506. upload_success = True
  507. print(f"[{self.platform_name}] 视频文件已选择")
  508. break
  509. except Exception as e:
  510. print(f"[{self.platform_name}] 选择器 {selector} 失败: {e}")
  511. if not upload_success:
  512. screenshot_base64 = await self.capture_screenshot()
  513. return PublishResult(
  514. success=False,
  515. platform=self.platform_name,
  516. error="未找到上传入口",
  517. screenshot_base64=screenshot_base64,
  518. page_url=await self.get_page_url(),
  519. status='failed'
  520. )
  521. # 等待跳转到发布页面 - 参考 matrix
  522. self.report_progress(20, "等待进入发布页面...")
  523. for i in range(60):
  524. try:
  525. # matrix 等待的 URL: https://creator.douyin.com/creator-micro/content/post/video?enter_from=publish_page
  526. await self.page.wait_for_url(
  527. "https://creator.douyin.com/creator-micro/content/post/video*",
  528. timeout=2000
  529. )
  530. print(f"[{self.platform_name}] 已进入发布页面")
  531. break
  532. except:
  533. print(f"[{self.platform_name}] 等待进入发布页面... {i+1}/60")
  534. await asyncio.sleep(1)
  535. await asyncio.sleep(2)
  536. self.report_progress(30, "正在填充标题和话题...")
  537. # 填写标题 - 参考 matrix
  538. title_input = self.page.get_by_text('作品标题').locator("..").locator(
  539. "xpath=following-sibling::div[1]").locator("input")
  540. if await title_input.count():
  541. await title_input.fill(params.title[:30])
  542. print(f"[{self.platform_name}] 标题已填写")
  543. else:
  544. # 备用方式 - 参考 matrix
  545. title_container = self.page.locator(".notranslate")
  546. await title_container.click()
  547. await self.page.keyboard.press("Backspace")
  548. await self.page.keyboard.press("Control+KeyA")
  549. await self.page.keyboard.press("Delete")
  550. await self.page.keyboard.type(params.title)
  551. await self.page.keyboard.press("Enter")
  552. print(f"[{self.platform_name}] 标题已填写(备用方式)")
  553. # 添加话题标签 - 参考 matrix
  554. if params.tags:
  555. css_selector = ".zone-container"
  556. for index, tag in enumerate(params.tags, start=1):
  557. print(f"[{self.platform_name}] 正在添加第{index}个话题: #{tag}")
  558. await self.page.type(css_selector, "#" + tag)
  559. await self.page.press(css_selector, "Space")
  560. self.report_progress(40, "等待视频上传完成...")
  561. # 等待视频上传完成 - 参考 matrix: 检测"重新上传"按钮
  562. for i in range(120):
  563. try:
  564. count = await self.page.locator("div").filter(has_text="重新上传").count()
  565. if count > 0:
  566. print(f"[{self.platform_name}] 视频上传完毕")
  567. break
  568. else:
  569. print(f"[{self.platform_name}] 正在上传视频中... {i+1}/120")
  570. # 检查上传错误
  571. if await self.page.locator('div.progress-div > div:has-text("上传失败")').count():
  572. print(f"[{self.platform_name}] 发现上传出错了,重新上传...")
  573. await self.handle_upload_error(params.video_path)
  574. await asyncio.sleep(3)
  575. except:
  576. print(f"[{self.platform_name}] 正在上传视频中...")
  577. await asyncio.sleep(3)
  578. self.report_progress(60, "处理视频设置...")
  579. # 点击"我知道了"弹窗 - 参考 matrix
  580. known_count = await self.page.get_by_role("button", name="我知道了").count()
  581. if known_count > 0:
  582. await self.page.get_by_role("button", name="我知道了").nth(0).click()
  583. print(f"[{self.platform_name}] 关闭弹窗")
  584. await asyncio.sleep(5)
  585. # 设置位置 - 参考 matrix
  586. try:
  587. await self.page.locator('div.semi-select span:has-text("输入地理位置")').click()
  588. await asyncio.sleep(1)
  589. await self.page.keyboard.press("Backspace")
  590. await self.page.keyboard.press("Control+KeyA")
  591. await self.page.keyboard.press("Delete")
  592. await self.page.keyboard.type(params.location)
  593. await asyncio.sleep(1)
  594. await self.page.locator('div[role="listbox"] [role="option"]').first.click()
  595. print(f"[{self.platform_name}] 位置设置成功: {params.location}")
  596. except Exception as e:
  597. print(f"[{self.platform_name}] 设置位置失败: {e}")
  598. # 开启头条/西瓜同步 - 参考 matrix
  599. try:
  600. third_part_element = '[class^="info"] > [class^="first-part"] div div.semi-switch'
  601. if await self.page.locator(third_part_element).count():
  602. class_name = await self.page.eval_on_selector(
  603. third_part_element, 'div => div.className')
  604. if 'semi-switch-checked' not in class_name:
  605. await self.page.locator(third_part_element).locator(
  606. 'input.semi-switch-native-control').click()
  607. print(f"[{self.platform_name}] 已开启头条/西瓜同步")
  608. except:
  609. pass
  610. # 定时发布
  611. if params.publish_date:
  612. self.report_progress(70, "设置定时发布...")
  613. await self.set_schedule_time(params.publish_date)
  614. self.report_progress(80, "正在发布...")
  615. print(f"[{self.platform_name}] 查找发布按钮...")
  616. # 点击发布 - 参考 matrix
  617. for i in range(30):
  618. try:
  619. # 先用快速方式检查验证码弹窗(每次都检测,不需要 AI)
  620. captcha_detected = False
  621. captcha_type = ''
  622. fast_captcha = await self.check_captcha()
  623. if fast_captcha['need_captcha']:
  624. captcha_detected = True
  625. captcha_type = fast_captcha['captcha_type']
  626. print(f"[{self.platform_name}] 快速检测到验证码: {captcha_type}", flush=True)
  627. elif i > 0 and i % 5 == 0:
  628. ai_captcha = await self.ai_check_captcha()
  629. if ai_captcha['has_captcha']:
  630. captcha_detected = True
  631. captcha_type = ai_captcha['captcha_type']
  632. print(f"[{self.platform_name}] AI检测到发布过程中需要验证码: {captcha_type}", flush=True)
  633. if captcha_detected:
  634. if captcha_type == 'phone':
  635. handled = await self.handle_phone_captcha()
  636. if handled:
  637. continue
  638. screenshot_base64 = await self.capture_screenshot()
  639. page_url = await self.get_page_url()
  640. return PublishResult(
  641. success=False,
  642. platform=self.platform_name,
  643. error=f"发布过程中需要{captcha_type}验证码,请使用有头浏览器完成验证",
  644. need_captcha=True,
  645. captcha_type=captcha_type,
  646. screenshot_base64=screenshot_base64,
  647. page_url=page_url,
  648. status='need_captcha'
  649. )
  650. publish_btn = self.page.get_by_role('button', name="发布", exact=True)
  651. btn_count = await publish_btn.count()
  652. if btn_count > 0:
  653. print(f"[{self.platform_name}] 点击发布按钮...")
  654. await publish_btn.click()
  655. # 等待跳转到内容管理页面 - 参考 matrix
  656. await self.page.wait_for_url(
  657. "https://creator.douyin.com/creator-micro/content/manage",
  658. timeout=5000
  659. )
  660. self.report_progress(100, "发布成功")
  661. print(f"[{self.platform_name}] 视频发布成功!")
  662. screenshot_base64 = await self.capture_screenshot()
  663. page_url = await self.get_page_url()
  664. return PublishResult(
  665. success=True,
  666. platform=self.platform_name,
  667. message="发布成功",
  668. screenshot_base64=screenshot_base64,
  669. page_url=page_url,
  670. status='success'
  671. )
  672. except Exception as e:
  673. current_url = self.page.url
  674. # 检查是否已经在管理页面
  675. if "https://creator.douyin.com/creator-micro/content/manage" in current_url:
  676. self.report_progress(100, "发布成功")
  677. print(f"[{self.platform_name}] 视频发布成功!")
  678. screenshot_base64 = await self.capture_screenshot()
  679. return PublishResult(
  680. success=True,
  681. platform=self.platform_name,
  682. message="发布成功",
  683. screenshot_base64=screenshot_base64,
  684. page_url=current_url,
  685. status='success'
  686. )
  687. else:
  688. print(f"[{self.platform_name}] 视频正在发布中... {i+1}/30, URL: {current_url}")
  689. await asyncio.sleep(1)
  690. # 发布超时
  691. print(f"[{self.platform_name}] 发布超时,获取截图...")
  692. screenshot_base64 = await self.capture_screenshot()
  693. page_url = await self.get_page_url()
  694. return PublishResult(
  695. success=False,
  696. platform=self.platform_name,
  697. error="发布超时,请检查发布状态",
  698. screenshot_base64=screenshot_base64,
  699. page_url=page_url,
  700. status='need_action'
  701. )
  702. async def get_works(self, cookies: str, page: int = 0, page_size: int = 20) -> WorksResult:
  703. """获取抖音作品列表
  704. Args:
  705. cookies: Cookie 字符串或 JSON
  706. page: 分页参数,首次请求传 0,后续传上一次返回的 next_page(即 API 的 max_cursor)
  707. page_size: 每页数量
  708. Returns:
  709. WorksResult: 包含 works, total, has_more, next_page(用于下一页请求)
  710. """
  711. print(f"\n{'='*60}")
  712. print(f"[{self.platform_name}] 获取作品列表")
  713. print(f"[{self.platform_name}] cursor={page}, page_size={page_size}")
  714. print(f"{'='*60}")
  715. works: List[WorkItem] = []
  716. total = 0
  717. has_more = False
  718. next_cursor = 0
  719. try:
  720. await self.init_browser()
  721. cookie_list = self.parse_cookies(cookies)
  722. await self.set_cookies(cookie_list)
  723. if not self.page:
  724. raise Exception("Page not initialized")
  725. # 访问创作者中心首页以触发登录验证
  726. await self.page.goto("https://creator.douyin.com/creator-micro/home")
  727. await asyncio.sleep(3)
  728. # 检查登录状态
  729. current_url = self.page.url
  730. if "login" in current_url or "passport" in current_url:
  731. raise Exception("Cookie 已过期,请重新登录")
  732. # 调用作品列表 API:page 作为 max_cursor(首次 0,后续为上一页返回的 max_cursor)
  733. max_cursor = page
  734. api_url = f"https://creator.douyin.com/janus/douyin/creator/pc/work_list?status=0&device_platform=android&count={page_size}&max_cursor={max_cursor}&cookie_enabled=true&browser_language=zh-CN&browser_platform=Win32&browser_name=Mozilla&browser_online=true&timezone_name=Asia%2FShanghai"
  735. response = await self.page.evaluate(f'''
  736. async () => {{
  737. try {{
  738. const resp = await fetch("{api_url}", {{
  739. credentials: 'include',
  740. headers: {{ 'Accept': 'application/json' }}
  741. }});
  742. return await resp.json();
  743. }} catch (e) {{
  744. return {{ error: e.toString() }};
  745. }}
  746. }}
  747. ''')
  748. if response.get('error'):
  749. print(f"[{self.platform_name}] API 请求失败: {response.get('error')}", flush=True)
  750. aweme_list = response.get('aweme_list', []) or []
  751. has_more = response.get('has_more', False)
  752. # 下一页游标:优先 max_cursor,兼容 next_cursor(与创作者中心 work_list 一致)
  753. next_cursor = response.get('max_cursor') if 'max_cursor' in response else response.get('next_cursor')
  754. if next_cursor is None:
  755. next_cursor = 0
  756. # 从第一个作品的 author.aweme_count 获取总作品数
  757. if aweme_list and len(aweme_list) > 0:
  758. first_aweme = aweme_list[0]
  759. author_aweme_count = first_aweme.get('author', {}).get('aweme_count', 0)
  760. if author_aweme_count > 0:
  761. total = author_aweme_count
  762. print(f"[{self.platform_name}] 从 author.aweme_count 获取总作品数: {total}")
  763. print(f"[{self.platform_name}] API 响应: has_more={has_more}, aweme_list={len(aweme_list)}, next_cursor={next_cursor}")
  764. for aweme in aweme_list:
  765. aweme_id = str(aweme.get('aweme_id', ''))
  766. if not aweme_id:
  767. continue
  768. statistics = aweme.get('statistics', {})
  769. # 打印调试信息,确认字段存在
  770. # print(f"[{self.platform_name}] 作品 {aweme_id} 统计: {statistics}", flush=True)
  771. # 获取封面
  772. cover_url = ''
  773. if aweme.get('Cover', {}).get('url_list'):
  774. cover_url = aweme['Cover']['url_list'][0]
  775. elif aweme.get('video', {}).get('cover', {}).get('url_list'):
  776. cover_url = aweme['video']['cover']['url_list'][0]
  777. # 获取标题
  778. title = aweme.get('item_title', '') or aweme.get('desc', '').split('\n')[0][:50] or '无标题'
  779. # 获取时长(毫秒转秒)
  780. duration = aweme.get('video', {}).get('duration', 0) // 1000
  781. # 获取发布时间
  782. create_time = aweme.get('create_time', 0)
  783. publish_time = datetime.fromtimestamp(create_time).strftime('%Y-%m-%d %H:%M:%S') if create_time else ''
  784. # 入库 video_url 使用 play_addr.url_list 的第一项,无则用分享页链接
  785. url_list = (aweme.get('video') or {}).get('play_addr', {}).get('url_list') or []
  786. video_url = url_list[0] if url_list else (f"https://www.douyin.com/video/{aweme_id}" if aweme_id else "")
  787. works.append(WorkItem(
  788. work_id=aweme_id,
  789. title=title,
  790. cover_url=cover_url,
  791. video_url=video_url,
  792. duration=duration,
  793. status='published',
  794. publish_time=publish_time,
  795. play_count=int(statistics.get('play_count', 0)),
  796. like_count=int(statistics.get('digg_count', 0)),
  797. comment_count=int(statistics.get('comment_count', 0)),
  798. share_count=int(statistics.get('share_count', 0)),
  799. collect_count=int(statistics.get('collect_count', 0)),
  800. ))
  801. if total == 0:
  802. total = len(works)
  803. print(f"[{self.platform_name}] 本页获取到 {len(works)} 个作品")
  804. except Exception as e:
  805. import traceback
  806. traceback.print_exc()
  807. return WorksResult(
  808. success=False,
  809. platform=self.platform_name,
  810. error=str(e)
  811. )
  812. return WorksResult(
  813. success=True,
  814. platform=self.platform_name,
  815. works=works,
  816. total=total,
  817. has_more=has_more,
  818. next_page=next_cursor
  819. )
  820. async def get_comments(self, cookies: str, work_id: str, cursor: str = "") -> CommentsResult:
  821. """获取抖音作品评论 - 通过访问视频详情页拦截评论 API"""
  822. print(f"\n{'='*60}")
  823. print(f"[{self.platform_name}] 获取作品评论")
  824. print(f"[{self.platform_name}] work_id={work_id}, cursor={cursor}")
  825. print(f"{'='*60}")
  826. comments: List[CommentItem] = []
  827. total = 0
  828. has_more = False
  829. next_cursor = ""
  830. captured_data = {}
  831. try:
  832. await self.init_browser()
  833. cookie_list = self.parse_cookies(cookies)
  834. await self.set_cookies(cookie_list)
  835. if not self.page:
  836. raise Exception("Page not initialized")
  837. # 设置 API 响应监听器
  838. async def handle_response(response):
  839. nonlocal captured_data
  840. url = response.url
  841. # 监听评论列表 API - 抖音视频页面使用的 API
  842. # /aweme/v1/web/comment/list/ 或 /comment/list/
  843. if '/comment/list' in url and ('aweme_id' in url or work_id in url):
  844. try:
  845. json_data = await response.json()
  846. print(f"[{self.platform_name}] 捕获到评论 API: {url[:100]}...", flush=True)
  847. # 检查响应是否成功
  848. if json_data.get('status_code') == 0 or json_data.get('comments'):
  849. captured_data = json_data
  850. comment_count = len(json_data.get('comments', []))
  851. print(f"[{self.platform_name}] 评论 API 响应成功: comments={comment_count}, has_more={json_data.get('has_more')}", flush=True)
  852. except Exception as e:
  853. print(f"[{self.platform_name}] 解析评论响应失败: {e}", flush=True)
  854. self.page.on('response', handle_response)
  855. print(f"[{self.platform_name}] 已注册评论 API 响应监听器", flush=True)
  856. # 访问视频详情页 - 这会自动触发评论 API 请求
  857. video_url = f"https://www.douyin.com/video/{work_id}"
  858. print(f"[{self.platform_name}] 访问视频详情页: {video_url}", flush=True)
  859. await self.page.goto(video_url, wait_until="domcontentloaded", timeout=30000)
  860. await asyncio.sleep(5)
  861. # 检查登录状态
  862. current_url = self.page.url
  863. if "login" in current_url or "passport" in current_url:
  864. raise Exception("Cookie 已过期,请重新登录")
  865. # 等待评论加载
  866. if not captured_data:
  867. print(f"[{self.platform_name}] 等待评论 API 响应...", flush=True)
  868. # 尝试滚动页面触发评论加载
  869. await self.page.evaluate('window.scrollBy(0, 300)')
  870. await asyncio.sleep(3)
  871. if not captured_data:
  872. # 再等待一会
  873. await asyncio.sleep(3)
  874. # 移除监听器
  875. self.page.remove_listener('response', handle_response)
  876. # 解析评论数据
  877. if captured_data:
  878. comment_list = captured_data.get('comments') or []
  879. has_more = captured_data.get('has_more', False) or captured_data.get('has_more', 0) == 1
  880. next_cursor = str(captured_data.get('cursor', ''))
  881. total = captured_data.get('total', 0) or len(comment_list)
  882. print(f"[{self.platform_name}] 解析评论: total={total}, has_more={has_more}, comments={len(comment_list)}", flush=True)
  883. for comment in comment_list:
  884. cid = str(comment.get('cid', ''))
  885. if not cid:
  886. continue
  887. user = comment.get('user', {})
  888. # 解析回复列表
  889. replies = []
  890. reply_list = comment.get('reply_comment', []) or []
  891. for reply in reply_list:
  892. reply_user = reply.get('user', {})
  893. replies.append(CommentItem(
  894. comment_id=str(reply.get('cid', '')),
  895. work_id=work_id,
  896. content=reply.get('text', ''),
  897. author_id=str(reply_user.get('uid', '')),
  898. author_name=reply_user.get('nickname', ''),
  899. author_avatar=reply_user.get('avatar_thumb', {}).get('url_list', [''])[0] if reply_user.get('avatar_thumb') else '',
  900. like_count=int(reply.get('digg_count', 0)),
  901. create_time=datetime.fromtimestamp(reply.get('create_time', 0)).strftime('%Y-%m-%d %H:%M:%S') if reply.get('create_time') else '',
  902. is_author=reply.get('is_author', False),
  903. ))
  904. comments.append(CommentItem(
  905. comment_id=cid,
  906. work_id=work_id,
  907. content=comment.get('text', ''),
  908. author_id=str(user.get('uid', '')),
  909. author_name=user.get('nickname', ''),
  910. author_avatar=user.get('avatar_thumb', {}).get('url_list', [''])[0] if user.get('avatar_thumb') else '',
  911. like_count=int(comment.get('digg_count', 0)),
  912. reply_count=int(comment.get('reply_comment_total', 0)),
  913. create_time=datetime.fromtimestamp(comment.get('create_time', 0)).strftime('%Y-%m-%d %H:%M:%S') if comment.get('create_time') else '',
  914. is_author=comment.get('is_author', False),
  915. replies=replies,
  916. ))
  917. print(f"[{self.platform_name}] 解析到 {len(comments)} 条评论", flush=True)
  918. else:
  919. print(f"[{self.platform_name}] 未捕获到评论 API 响应", flush=True)
  920. except Exception as e:
  921. import traceback
  922. traceback.print_exc()
  923. return CommentsResult(
  924. success=False,
  925. platform=self.platform_name,
  926. work_id=work_id,
  927. error=str(e)
  928. )
  929. finally:
  930. await self.close_browser()
  931. result = CommentsResult(
  932. success=True,
  933. platform=self.platform_name,
  934. work_id=work_id,
  935. comments=comments,
  936. total=total,
  937. has_more=has_more
  938. )
  939. result.__dict__['cursor'] = next_cursor
  940. return result
  941. async def get_all_comments(self, cookies: str) -> dict:
  942. """获取所有作品的评论 - 通过评论管理页面"""
  943. print(f"\n{'='*60}")
  944. print(f"[{self.platform_name}] 获取所有作品评论")
  945. print(f"{'='*60}")
  946. all_work_comments = []
  947. captured_comments = []
  948. captured_works = {} # work_id -> work_info
  949. try:
  950. await self.init_browser()
  951. cookie_list = self.parse_cookies(cookies)
  952. await self.set_cookies(cookie_list)
  953. if not self.page:
  954. raise Exception("Page not initialized")
  955. # 设置 API 响应监听器
  956. async def handle_response(response):
  957. nonlocal captured_comments, captured_works
  958. url = response.url
  959. try:
  960. # 监听评论列表 API - 多种格式
  961. # /comment/list/select/ 或 /comment/read 或 /creator/comment/list
  962. if '/comment/list' in url or '/comment/read' in url or 'comment_list' in url:
  963. json_data = await response.json()
  964. print(f"[{self.platform_name}] 捕获到评论 API: {url[:100]}...", flush=True)
  965. # 格式1: comments 字段
  966. comments = json_data.get('comments', [])
  967. # 格式2: comment_info_list 字段
  968. if not comments:
  969. comments = json_data.get('comment_info_list', [])
  970. if comments:
  971. # 从 URL 中提取 aweme_id
  972. import re
  973. aweme_id_match = re.search(r'aweme_id=(\d+)', url)
  974. aweme_id = aweme_id_match.group(1) if aweme_id_match else ''
  975. for comment in comments:
  976. # 添加 aweme_id 到评论中
  977. if aweme_id and 'aweme_id' not in comment:
  978. comment['aweme_id'] = aweme_id
  979. captured_comments.append(comment)
  980. print(f"[{self.platform_name}] 捕获到 {len(comments)} 条评论 (aweme_id={aweme_id}),总计: {len(captured_comments)}", flush=True)
  981. # 监听作品列表 API
  982. if '/work_list' in url or '/item/list' in url or '/creator/item' in url:
  983. json_data = await response.json()
  984. aweme_list = json_data.get('aweme_list', []) or json_data.get('item_info_list', []) or json_data.get('item_list', [])
  985. print(f"[{self.platform_name}] 捕获到作品列表 API: {len(aweme_list)} 个作品", flush=True)
  986. for aweme in aweme_list:
  987. aweme_id = str(aweme.get('aweme_id', '') or aweme.get('item_id', '') or aweme.get('item_id_plain', ''))
  988. if aweme_id:
  989. cover_url = ''
  990. if aweme.get('Cover', {}).get('url_list'):
  991. cover_url = aweme['Cover']['url_list'][0]
  992. elif aweme.get('video', {}).get('cover', {}).get('url_list'):
  993. cover_url = aweme['video']['cover']['url_list'][0]
  994. elif aweme.get('cover_image_url'):
  995. cover_url = aweme['cover_image_url']
  996. captured_works[aweme_id] = {
  997. 'title': aweme.get('item_title', '') or aweme.get('title', '') or aweme.get('desc', ''),
  998. 'cover': cover_url,
  999. 'comment_count': aweme.get('statistics', {}).get('comment_count', 0) or aweme.get('comment_count', 0),
  1000. }
  1001. except Exception as e:
  1002. print(f"[{self.platform_name}] 解析响应失败: {e}", flush=True)
  1003. self.page.on('response', handle_response)
  1004. print(f"[{self.platform_name}] 已注册 API 响应监听器", flush=True)
  1005. # 访问评论管理页面
  1006. print(f"[{self.platform_name}] 访问评论管理页面...", flush=True)
  1007. await self.page.goto("https://creator.douyin.com/creator-micro/interactive/comment", wait_until="domcontentloaded", timeout=30000)
  1008. await asyncio.sleep(5)
  1009. # 检查登录状态
  1010. current_url = self.page.url
  1011. if "login" in current_url or "passport" in current_url:
  1012. raise Exception("Cookie 已过期,请重新登录")
  1013. print(f"[{self.platform_name}] 页面加载完成,当前捕获: {len(captured_comments)} 条评论, {len(captured_works)} 个作品", flush=True)
  1014. # 尝试点击"选择作品"来加载作品列表
  1015. try:
  1016. select_btn = await self.page.query_selector('text="选择作品"')
  1017. if select_btn:
  1018. print(f"[{self.platform_name}] 点击选择作品按钮...", flush=True)
  1019. await select_btn.click()
  1020. await asyncio.sleep(3)
  1021. # 获取作品列表
  1022. work_items = await self.page.query_selector_all('[class*="work-item"], [class*="video-item"], [class*="aweme-item"]')
  1023. print(f"[{self.platform_name}] 找到 {len(work_items)} 个作品元素", flush=True)
  1024. # 点击每个作品加载其评论
  1025. for i, item in enumerate(work_items[:10]): # 最多处理10个作品
  1026. try:
  1027. await item.click()
  1028. await asyncio.sleep(2)
  1029. print(f"[{self.platform_name}] 已点击作品 {i+1}/{min(len(work_items), 10)}", flush=True)
  1030. except:
  1031. pass
  1032. # 关闭选择作品弹窗
  1033. close_btn = await self.page.query_selector('[class*="close"], [class*="cancel"]')
  1034. if close_btn:
  1035. await close_btn.click()
  1036. await asyncio.sleep(1)
  1037. except Exception as e:
  1038. print(f"[{self.platform_name}] 选择作品操作失败: {e}", flush=True)
  1039. # 滚动加载更多评论
  1040. for i in range(5):
  1041. await self.page.evaluate('window.scrollBy(0, 500)')
  1042. await asyncio.sleep(1)
  1043. await asyncio.sleep(3)
  1044. # 移除监听器
  1045. self.page.remove_listener('response', handle_response)
  1046. print(f"[{self.platform_name}] 最终捕获: {len(captured_comments)} 条评论, {len(captured_works)} 个作品", flush=True)
  1047. # 按作品分组评论
  1048. work_comments_map = {} # work_id -> work_comments
  1049. for comment in captured_comments:
  1050. # 从评论中获取作品信息
  1051. aweme = comment.get('aweme', {}) or comment.get('item', {})
  1052. aweme_id = str(comment.get('aweme_id', '') or aweme.get('aweme_id', '') or aweme.get('item_id', ''))
  1053. if not aweme_id:
  1054. continue
  1055. if aweme_id not in work_comments_map:
  1056. work_info = captured_works.get(aweme_id, {})
  1057. work_comments_map[aweme_id] = {
  1058. 'work_id': aweme_id,
  1059. 'title': aweme.get('title', '') or aweme.get('desc', '') or work_info.get('title', ''),
  1060. 'cover_url': aweme.get('cover', {}).get('url_list', [''])[0] if aweme.get('cover') else work_info.get('cover', ''),
  1061. 'comments': []
  1062. }
  1063. cid = str(comment.get('cid', ''))
  1064. if not cid:
  1065. continue
  1066. user = comment.get('user', {})
  1067. work_comments_map[aweme_id]['comments'].append({
  1068. 'comment_id': cid,
  1069. 'author_id': str(user.get('uid', '')),
  1070. 'author_name': user.get('nickname', ''),
  1071. 'author_avatar': user.get('avatar_thumb', {}).get('url_list', [''])[0] if user.get('avatar_thumb') else '',
  1072. 'content': comment.get('text', ''),
  1073. 'like_count': int(comment.get('digg_count', 0)),
  1074. 'create_time': datetime.fromtimestamp(comment.get('create_time', 0)).strftime('%Y-%m-%d %H:%M:%S') if comment.get('create_time') else '',
  1075. 'is_author': comment.get('is_author', False),
  1076. })
  1077. all_work_comments = list(work_comments_map.values())
  1078. total_comments = sum(len(w['comments']) for w in all_work_comments)
  1079. print(f"[{self.platform_name}] 获取到 {len(all_work_comments)} 个作品的 {total_comments} 条评论", flush=True)
  1080. except Exception as e:
  1081. import traceback
  1082. traceback.print_exc()
  1083. return {
  1084. 'success': False,
  1085. 'platform': self.platform_name,
  1086. 'error': str(e),
  1087. 'work_comments': []
  1088. }
  1089. finally:
  1090. await self.close_browser()
  1091. return {
  1092. 'success': True,
  1093. 'platform': self.platform_name,
  1094. 'work_comments': all_work_comments,
  1095. 'total': len(all_work_comments)
  1096. }
  1097. async def auto_reply_private_messages(self, cookies: str) -> dict:
  1098. """自动回复抖音私信 - 适配新页面结构"""
  1099. print(f"\n{'='*60}")
  1100. print(f"[{self.platform_name}] 开始自动回复抖音私信")
  1101. print(f"{'='*60}")
  1102. try:
  1103. await self.init_browser()
  1104. cookie_list = self.parse_cookies(cookies)
  1105. await self.set_cookies(cookie_list)
  1106. if not self.page:
  1107. raise Exception("Page not initialized")
  1108. # 访问抖音私信页面
  1109. await self.page.goto("https://creator.douyin.com/creator-micro/data/following/chat", timeout=30000)
  1110. await asyncio.sleep(3)
  1111. # 检查登录状态
  1112. current_url = self.page.url
  1113. print(f"[{self.platform_name}] 当前 URL: {current_url}")
  1114. if "login" in current_url or "passport" in current_url:
  1115. raise Exception("Cookie 已过期,请重新登录")
  1116. replied_count = 0
  1117. # 处理两个tab: 陌生人私信 和 朋友私信
  1118. for tab_name in ["陌生人私信", "朋友私信"]:
  1119. print(f"\n{'='*50}")
  1120. print(f"[{self.platform_name}] 处理 {tab_name} ...")
  1121. print(f"{'='*50}")
  1122. # 点击对应tab
  1123. tab_locator = self.page.locator(f'div.semi-tabs-tab:text-is("{tab_name}")')
  1124. if await tab_locator.count() > 0:
  1125. await tab_locator.click()
  1126. await asyncio.sleep(2)
  1127. else:
  1128. print(f"⚠️ 未找到 {tab_name} 标签,跳过")
  1129. continue
  1130. # 获取私信列表
  1131. session_items = self.page.locator('.semi-list-item')
  1132. session_count = await session_items.count()
  1133. print(f"[{self.platform_name}] {tab_name} 共找到 {session_count} 条会话")
  1134. if session_count == 0:
  1135. print(f"[{self.platform_name}] {tab_name} 无新私信")
  1136. continue
  1137. for idx in range(session_count):
  1138. try:
  1139. # 重新获取列表(防止 DOM 变化)
  1140. current_sessions = self.page.locator('.semi-list-item')
  1141. if idx >= await current_sessions.count():
  1142. break
  1143. session = current_sessions.nth(idx)
  1144. user_name = await session.locator('.item-header-name-vL_79m').inner_text()
  1145. last_msg = await session.locator('.text-whxV9A').inner_text()
  1146. print(f"\n ➤ [{idx+1}/{session_count}] 处理用户: {user_name} | 最后消息: {last_msg[:30]}...")
  1147. # 检查会话预览消息是否包含非文字内容
  1148. if "分享" in last_msg and ("视频" in last_msg or "图片" in last_msg or "链接" in last_msg):
  1149. print(" ➤ 会话预览为非文字消息,跳过")
  1150. continue
  1151. # 点击进入聊天
  1152. await session.click()
  1153. await asyncio.sleep(2)
  1154. # 提取聊天历史(判断最后一条是否是自己发的)
  1155. chat_messages = self.page.locator('.box-item-dSA1TJ:not(.time-Za5gKL)')
  1156. msg_count = await chat_messages.count()
  1157. should_reply = True
  1158. if msg_count > 0:
  1159. # 最后一条消息
  1160. last_msg_el = chat_messages.nth(msg_count - 1)
  1161. # 获取元素的 class 属性判断是否是自己发的
  1162. classes = await last_msg_el.get_attribute('class') or ''
  1163. is_my_message = 'is-me-' in classes # 包含 is-me- 表示是自己发的
  1164. should_reply = not is_my_message # 如果是自己发的就不回复
  1165. if should_reply:
  1166. # 提取完整聊天历史
  1167. chat_history = await self._extract_chat_history()
  1168. if chat_history:
  1169. # 生成回复
  1170. reply_text = await self._generate_reply_with_ai(chat_history)
  1171. if not reply_text:
  1172. reply_text = self._generate_reply(chat_history)
  1173. if reply_text:
  1174. print(f" 📝 回复内容: {reply_text}")
  1175. # 填充输入框
  1176. input_box = self.page.locator('div.chat-input-dccKiL[contenteditable="true"]')
  1177. send_btn = self.page.locator('button:has-text("发送")')
  1178. if await input_box.is_visible() and await send_btn.is_visible():
  1179. await input_box.fill(reply_text)
  1180. await asyncio.sleep(0.5)
  1181. await send_btn.click()
  1182. print(" ✅ 已发送")
  1183. replied_count += 1
  1184. await asyncio.sleep(2)
  1185. else:
  1186. print(" ❌ 输入框或发送按钮不可见")
  1187. else:
  1188. print(" ➤ 无需回复")
  1189. else:
  1190. print(" ➤ 聊天历史为空,跳过")
  1191. else:
  1192. print(" ➤ 最后一条是我发的,跳过")
  1193. except Exception as e:
  1194. print(f" ❌ 处理会话 {idx+1} 时出错: {e}")
  1195. continue
  1196. print(f"[{self.platform_name}] 自动回复完成,共回复 {replied_count} 条消息")
  1197. return {
  1198. 'success': True,
  1199. 'platform': self.platform_name,
  1200. 'replied_count': replied_count,
  1201. 'message': f'成功回复 {replied_count} 条私信'
  1202. }
  1203. except Exception as e:
  1204. import traceback
  1205. traceback.print_exc()
  1206. return {
  1207. 'success': False,
  1208. 'platform': self.platform_name,
  1209. 'error': str(e)
  1210. }
  1211. finally:
  1212. await self.close_browser()
  1213. # 辅助方法保持兼容(可复用)
  1214. def _generate_reply(self, chat_history: list) -> str:
  1215. """规则回复"""
  1216. if not chat_history:
  1217. return "你好!感谢联系~"
  1218. last_msg = chat_history[-1]["content"]
  1219. if "谢谢" in last_msg or "感谢" in last_msg:
  1220. return "不客气!欢迎常来交流~"
  1221. elif "你好" in last_msg or "在吗" in last_msg:
  1222. return "你好!请问有什么可以帮您的?"
  1223. elif "视频" in last_msg or "怎么拍" in last_msg:
  1224. return "视频是用手机拍摄的,注意光线和稳定哦!"
  1225. else:
  1226. return "收到!我会认真阅读您的留言~"
  1227. async def _extract_chat_history(self) -> list:
  1228. """精准提取聊天记录,区分作者(自己)和用户"""
  1229. if not self.page:
  1230. return []
  1231. history = []
  1232. # 获取所有聊天消息(排除时间戳元素)
  1233. message_wrappers = self.page.locator('.box-item-dSA1TJ:not(.time-Za5gKL)')
  1234. count = await message_wrappers.count()
  1235. for i in range(count):
  1236. try:
  1237. wrapper = message_wrappers.nth(i)
  1238. # 检查是否为自己发送的消息
  1239. classes = await wrapper.get_attribute('class') or ''
  1240. is_author = 'is-me-' in classes # 包含 is-me- 表示是自己发的
  1241. # 获取消息文本内容
  1242. text_element = wrapper.locator('.text-X2d7fS')
  1243. if await text_element.count() > 0:
  1244. content = await text_element.inner_text()
  1245. content = content.strip()
  1246. if content: # 只添加非空消息
  1247. # 获取用户名(如果是对方消息)
  1248. author_name = ''
  1249. if not is_author:
  1250. # 尝试获取对方用户名
  1251. name_elements = wrapper.locator('.aweme-author-name-m8uoXU')
  1252. if await name_elements.count() > 0:
  1253. author_name = await name_elements.nth(0).inner_text()
  1254. else:
  1255. author_name = '用户'
  1256. else:
  1257. author_name = '我'
  1258. history.append({
  1259. "author": author_name,
  1260. "content": content,
  1261. "is_author": is_author,
  1262. })
  1263. except Exception as e:
  1264. print(f" ⚠️ 解析第 {i+1} 条消息失败: {e}")
  1265. continue
  1266. return history
  1267. async def _generate_reply_with_ai(self, chat_history: list) -> str:
  1268. """使用 AI 生成回复(保留原逻辑)"""
  1269. import os, requests, json
  1270. try:
  1271. ai_api_key = os.environ.get('DASHSCOPE_API_KEY', '')
  1272. ai_base_url = os.environ.get('DASHSCOPE_BASE_URL', 'https://dashscope.aliyuncs.com/compatible-mode/v1')
  1273. ai_model = os.environ.get('AI_MODEL', 'qwen-plus')
  1274. if not ai_api_key:
  1275. return self._generate_reply(chat_history)
  1276. messages = [{"role": "system", "content": "你是一个友好的抖音创作者助手,负责回复粉丝私信。请保持简洁、友好、专业的语气。回复长度不超过20字。"}]
  1277. for msg in chat_history:
  1278. role = "assistant" if msg.get("is_author", False) else "user"
  1279. messages.append({"role": role, "content": msg["content"]})
  1280. headers = {'Authorization': f'Bearer {ai_api_key}', 'Content-Type': 'application/json'}
  1281. payload = {"model": ai_model, "messages": messages, "max_tokens": 150, "temperature": 0.8}
  1282. response = requests.post(f"{ai_base_url}/chat/completions", headers=headers, json=payload, timeout=30)
  1283. if response.status_code == 200:
  1284. ai_reply = response.json().get('choices', [{}])[0].get('message', {}).get('content', '').strip()
  1285. return ai_reply if ai_reply else self._generate_reply(chat_history)
  1286. else:
  1287. return self._generate_reply(chat_history)
  1288. except:
  1289. return self._generate_reply(chat_history)
  1290. async def get_work_comments_mapping(self, cookies: str) -> dict:
  1291. """获取所有作品及其评论的对应关系
  1292. Args:
  1293. cookies: 抖音创作者平台的cookies
  1294. Returns:
  1295. dict: 包含作品和评论对应关系的JSON数据
  1296. """
  1297. print(f"\n{'='*60}")
  1298. print(f"[{self.platform_name}] 获取作品和评论对应关系")
  1299. print(f"{'='*60}")
  1300. work_comments_mapping = []
  1301. try:
  1302. await self.init_browser()
  1303. cookie_list = self.parse_cookies(cookies)
  1304. await self.set_cookies(cookie_list)
  1305. if not self.page:
  1306. raise Exception("Page not initialized")
  1307. # 访问创作者中心首页
  1308. await self.page.goto("https://creator.douyin.com/creator-micro/home", timeout=30000)
  1309. await asyncio.sleep(3)
  1310. # 检查登录状态
  1311. current_url = self.page.url
  1312. if "login" in current_url or "passport" in current_url:
  1313. raise Exception("Cookie 已过期,请重新登录")
  1314. # 访问内容管理页面获取作品列表
  1315. print(f"[{self.platform_name}] 访问内容管理页面...")
  1316. await self.page.goto("https://creator.douyin.com/creator-micro/content/manage", timeout=30000)
  1317. await asyncio.sleep(5)
  1318. # 获取作品列表
  1319. works_result = await self.get_works(cookies, page=0, page_size=20)
  1320. if not works_result.success:
  1321. print(f"[{self.platform_name}] 获取作品列表失败: {works_result.error}")
  1322. return {
  1323. 'success': False,
  1324. 'platform': self.platform_name,
  1325. 'error': works_result.error,
  1326. 'work_comments': []
  1327. }
  1328. print(f"[{self.platform_name}] 获取到 {len(works_result.works)} 个作品")
  1329. # 对每个作品获取评论
  1330. for i, work in enumerate(works_result.works):
  1331. print(f"[{self.platform_name}] 正在获取作品 {i+1}/{len(works_result.works)} 的评论: {work.title[:20]}...")
  1332. # 获取单个作品的评论
  1333. comments_result = await self.get_comments(cookies, work.work_id)
  1334. if comments_result.success:
  1335. work_comments_mapping.append({
  1336. 'work_info': work.to_dict(),
  1337. 'comments': [comment.to_dict() for comment in comments_result.comments]
  1338. })
  1339. print(f"[{self.platform_name}] 作品 '{work.title[:20]}...' 获取到 {len(comments_result.comments)} 条评论")
  1340. else:
  1341. print(f"[{self.platform_name}] 获取作品 '{work.title[:20]}...' 评论失败: {comments_result.error}")
  1342. work_comments_mapping.append({
  1343. 'work_info': work.to_dict(),
  1344. 'comments': [],
  1345. 'error': comments_result.error
  1346. })
  1347. # 添加延时避免请求过于频繁
  1348. await asyncio.sleep(2)
  1349. print(f"[{self.platform_name}] 所有作品评论获取完成")
  1350. except Exception as e:
  1351. import traceback
  1352. traceback.print_exc()
  1353. return {
  1354. 'success': False,
  1355. 'platform': self.platform_name,
  1356. 'error': str(e),
  1357. 'work_comments': []
  1358. }
  1359. finally:
  1360. await self.close_browser()
  1361. return {
  1362. 'success': True,
  1363. 'platform': self.platform_name,
  1364. 'work_comments': work_comments_mapping,
  1365. 'summary': {
  1366. 'total_works': len(work_comments_mapping),
  1367. 'total_comments': sum(len(item['comments']) for item in work_comments_mapping),
  1368. }
  1369. }