douyin.py 67 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469
  1. # -*- coding: utf-8 -*-
  2. """
  3. 抖音视频发布器
  4. 参考: matrix/douyin_uploader/main.py
  5. """
  6. import asyncio
  7. import os
  8. import json
  9. import re
  10. from datetime import datetime
  11. from typing import List
  12. from .base import (
  13. BasePublisher, PublishParams, PublishResult,
  14. WorkItem, WorksResult, CommentItem, CommentsResult
  15. )
  16. class DouyinPublisher(BasePublisher):
  17. """
  18. 抖音视频发布器
  19. 使用 Playwright 自动化操作抖音创作者中心
  20. """
  21. platform_name = "douyin"
  22. login_url = "https://creator.douyin.com/"
  23. publish_url = "https://creator.douyin.com/creator-micro/content/upload"
  24. cookie_domain = ".douyin.com"
  25. async def set_schedule_time(self, publish_date: datetime):
  26. """设置定时发布"""
  27. if not self.page:
  28. return
  29. # 选择定时发布
  30. label_element = self.page.locator("label.radio-d4zkru:has-text('定时发布')")
  31. await label_element.click()
  32. await asyncio.sleep(1)
  33. # 输入时间
  34. publish_date_str = publish_date.strftime("%Y-%m-%d %H:%M")
  35. await self.page.locator('.semi-input[placeholder="日期和时间"]').click()
  36. await self.page.keyboard.press("Control+KeyA")
  37. await self.page.keyboard.type(str(publish_date_str))
  38. await self.page.keyboard.press("Enter")
  39. await asyncio.sleep(1)
  40. async def handle_upload_error(self, video_path: str):
  41. """处理上传错误,重新上传"""
  42. if not self.page:
  43. return
  44. print(f"[{self.platform_name}] 视频出错了,重新上传中...")
  45. await self.page.locator('div.progress-div [class^="upload-btn-input"]').set_input_files(video_path)
  46. async def check_captcha(self) -> dict:
  47. """
  48. 检查页面是否需要验证码
  49. 返回: {'need_captcha': bool, 'captcha_type': str}
  50. """
  51. if not self.page:
  52. return {'need_captcha': False, 'captcha_type': ''}
  53. try:
  54. # 检查手机验证码弹窗
  55. phone_captcha_selectors = [
  56. 'text="请输入验证码"',
  57. 'text="输入手机验证码"',
  58. 'text="获取验证码"',
  59. 'text="手机号验证"',
  60. '[class*="captcha"][class*="phone"]',
  61. '[class*="verify"][class*="phone"]',
  62. '[class*="sms-code"]',
  63. 'input[placeholder*="验证码"]',
  64. ]
  65. for selector in phone_captcha_selectors:
  66. try:
  67. if await self.page.locator(selector).count() > 0:
  68. print(f"[{self.platform_name}] 检测到手机验证码: {selector}", flush=True)
  69. return {'need_captcha': True, 'captcha_type': 'phone'}
  70. except:
  71. pass
  72. # 检查滑块验证码
  73. slider_captcha_selectors = [
  74. '[class*="captcha"][class*="slider"]',
  75. '[class*="slide-verify"]',
  76. '[class*="drag-verify"]',
  77. 'text="按住滑块"',
  78. 'text="向右滑动"',
  79. 'text="拖动滑块"',
  80. ]
  81. for selector in slider_captcha_selectors:
  82. try:
  83. if await self.page.locator(selector).count() > 0:
  84. print(f"[{self.platform_name}] 检测到滑块验证码: {selector}", flush=True)
  85. return {'need_captcha': True, 'captcha_type': 'slider'}
  86. except:
  87. pass
  88. # 检查图片验证码
  89. image_captcha_selectors = [
  90. '[class*="captcha"][class*="image"]',
  91. '[class*="verify-image"]',
  92. 'text="点击图片"',
  93. 'text="选择正确的"',
  94. ]
  95. for selector in image_captcha_selectors:
  96. try:
  97. if await self.page.locator(selector).count() > 0:
  98. print(f"[{self.platform_name}] 检测到图片验证码: {selector}", flush=True)
  99. return {'need_captcha': True, 'captcha_type': 'image'}
  100. except:
  101. pass
  102. # 检查登录弹窗(Cookie 过期)
  103. login_selectors = [
  104. 'text="请先登录"',
  105. 'text="登录后继续"',
  106. '[class*="login-modal"]',
  107. '[class*="login-dialog"]',
  108. ]
  109. for selector in login_selectors:
  110. try:
  111. if await self.page.locator(selector).count() > 0:
  112. print(f"[{self.platform_name}] 检测到需要登录: {selector}", flush=True)
  113. return {'need_captcha': True, 'captcha_type': 'login'}
  114. except:
  115. pass
  116. except Exception as e:
  117. print(f"[{self.platform_name}] 验证码检测异常: {e}", flush=True)
  118. return {'need_captcha': False, 'captcha_type': ''}
  119. async def handle_phone_captcha(self) -> bool:
  120. if not self.page:
  121. return False
  122. try:
  123. body_text = ""
  124. try:
  125. body_text = await self.page.inner_text("body")
  126. except:
  127. body_text = ""
  128. phone_match = re.search(r"(1\d{2}\*{4}\d{4})", body_text or "")
  129. masked_phone = phone_match.group(1) if phone_match else ""
  130. async def _get_send_button():
  131. candidates = [
  132. self.page.get_by_role("button", name="获取验证码"),
  133. self.page.get_by_role("button", name="发送验证码"),
  134. self.page.locator('button:has-text("获取验证码")'),
  135. self.page.locator('button:has-text("发送验证码")'),
  136. self.page.locator('[role="button"]:has-text("获取验证码")'),
  137. self.page.locator('[role="button"]:has-text("发送验证码")'),
  138. ]
  139. for c in candidates:
  140. try:
  141. if await c.count() > 0 and await c.first.is_visible():
  142. return c.first
  143. except:
  144. continue
  145. return None
  146. async def _confirm_sent() -> bool:
  147. try:
  148. txt = ""
  149. try:
  150. txt = await self.page.inner_text("body")
  151. except:
  152. txt = ""
  153. if re.search(r"(\d+\s*秒)|(\d+\s*s)|后可重试|重新发送|已发送", txt or ""):
  154. return True
  155. except:
  156. pass
  157. try:
  158. btn = await _get_send_button()
  159. if btn:
  160. disabled = await btn.is_disabled()
  161. if disabled:
  162. return True
  163. label = (await btn.inner_text()) if btn else ""
  164. if re.search(r"(\d+\s*秒)|(\d+\s*s)|后可重试|重新发送|已发送", label or ""):
  165. return True
  166. except:
  167. pass
  168. return False
  169. did_click_send = False
  170. btn = await _get_send_button()
  171. if btn:
  172. try:
  173. if await btn.is_enabled():
  174. await btn.click(timeout=5000)
  175. did_click_send = True
  176. print(f"[{self.platform_name}] 已点击发送短信验证码", flush=True)
  177. except Exception as e:
  178. print(f"[{self.platform_name}] 点击发送验证码按钮失败: {e}", flush=True)
  179. if did_click_send:
  180. try:
  181. await self.page.wait_for_timeout(800)
  182. except:
  183. pass
  184. sent_confirmed = await _confirm_sent() if did_click_send else False
  185. ai_state = await self.ai_analyze_sms_send_state()
  186. try:
  187. if ai_state.get("sent_likely"):
  188. sent_confirmed = True
  189. except:
  190. pass
  191. if (not did_click_send or not sent_confirmed) and ai_state.get("suggested_action") == "click_send":
  192. btn2 = await _get_send_button()
  193. if btn2:
  194. try:
  195. if await btn2.is_enabled():
  196. await btn2.click(timeout=5000)
  197. did_click_send = True
  198. await self.page.wait_for_timeout(800)
  199. sent_confirmed = await _confirm_sent()
  200. ai_state = await self.ai_analyze_sms_send_state()
  201. if ai_state.get("sent_likely"):
  202. sent_confirmed = True
  203. except:
  204. pass
  205. code_hint = "请输入短信验证码。"
  206. if ai_state.get("block_reason") == "slider":
  207. code_hint = "检测到滑块/人机验证阻塞,请先在浏览器窗口完成验证后再发送短信验证码。"
  208. elif ai_state.get("block_reason") in ["rate_limit", "risk"]:
  209. code_hint = f"页面提示可能被限制/风控({ai_state.get('notes','') or '请稍后重试'})。可稍等后重新发送验证码。"
  210. elif not did_click_send:
  211. code_hint = "未找到或无法点击“发送验证码”按钮,请在弹出的浏览器页面手动点击发送后再输入验证码。"
  212. elif sent_confirmed:
  213. code_hint = f"已检测到短信验证码已发送({ai_state.get('notes','') or '请查收短信'})。"
  214. else:
  215. code_hint = f"已尝试点击发送验证码,但未确认发送成功({ai_state.get('notes','') or '请查看是否出现倒计时/重新发送'})。"
  216. code = await self.request_sms_code_from_frontend(masked_phone, message=code_hint)
  217. input_selectors = [
  218. 'input[placeholder*="验证码"]',
  219. 'input[placeholder*="短信"]',
  220. 'input[type="tel"]',
  221. 'input[type="text"]',
  222. ]
  223. filled = False
  224. for selector in input_selectors:
  225. try:
  226. el = self.page.locator(selector).first
  227. if await el.count() > 0:
  228. await el.fill(code)
  229. filled = True
  230. break
  231. except:
  232. continue
  233. if not filled:
  234. raise Exception("未找到验证码输入框")
  235. submit_selectors = [
  236. 'button:has-text("确定")',
  237. 'button:has-text("确认")',
  238. 'button:has-text("提交")',
  239. 'button:has-text("完成")',
  240. ]
  241. for selector in submit_selectors:
  242. try:
  243. btn = self.page.locator(selector).first
  244. if await btn.count() > 0:
  245. await btn.click()
  246. break
  247. except:
  248. continue
  249. try:
  250. await self.page.wait_for_timeout(1000)
  251. await self.page.wait_for_selector('text="请输入验证码"', state="hidden", timeout=15000)
  252. except:
  253. pass
  254. print(f"[{self.platform_name}] 短信验证码已提交,继续执行发布流程", flush=True)
  255. return True
  256. except Exception as e:
  257. print(f"[{self.platform_name}] 处理短信验证码失败: {e}", flush=True)
  258. return False
  259. async def publish(self, cookies: str, params: PublishParams) -> PublishResult:
  260. """发布视频到抖音 - 参考 matrix/douyin_uploader/main.py"""
  261. print(f"\n{'='*60}")
  262. print(f"[{self.platform_name}] 开始发布视频")
  263. print(f"[{self.platform_name}] 视频路径: {params.video_path}")
  264. print(f"[{self.platform_name}] 标题: {params.title}")
  265. print(f"[{self.platform_name}] Headless: {self.headless}")
  266. print(f"{'='*60}")
  267. self.report_progress(5, "正在初始化浏览器...")
  268. # 初始化浏览器
  269. await self.init_browser()
  270. print(f"[{self.platform_name}] 浏览器初始化完成")
  271. # 解析并设置 cookies
  272. cookie_list = self.parse_cookies(cookies)
  273. print(f"[{self.platform_name}] 解析到 {len(cookie_list)} 个 cookies")
  274. await self.set_cookies(cookie_list)
  275. if not self.page:
  276. raise Exception("Page not initialized")
  277. # 检查视频文件
  278. if not os.path.exists(params.video_path):
  279. raise Exception(f"视频文件不存在: {params.video_path}")
  280. print(f"[{self.platform_name}] 视频文件存在,大小: {os.path.getsize(params.video_path)} bytes")
  281. self.report_progress(10, "正在打开上传页面...")
  282. # 访问上传页面 - 参考 matrix
  283. await self.page.goto("https://creator.douyin.com/creator-micro/content/upload")
  284. print(f"[{self.platform_name}] 等待页面加载...")
  285. try:
  286. await self.page.wait_for_url("https://creator.douyin.com/creator-micro/content/upload", timeout=30000)
  287. except:
  288. pass
  289. await asyncio.sleep(3)
  290. # 检查当前 URL 和页面状态
  291. current_url = self.page.url
  292. print(f"[{self.platform_name}] 当前 URL: {current_url}")
  293. async def wait_for_manual_login(timeout_seconds: int = 300) -> bool:
  294. if not self.page:
  295. return False
  296. self.report_progress(8, "检测到需要登录,请在浏览器窗口完成登录...")
  297. try:
  298. await self.page.bring_to_front()
  299. except:
  300. pass
  301. waited = 0
  302. while waited < timeout_seconds:
  303. try:
  304. url = self.page.url
  305. if "login" not in url and "passport" not in url:
  306. if "creator.douyin.com" in url:
  307. return True
  308. await asyncio.sleep(2)
  309. waited += 2
  310. except:
  311. await asyncio.sleep(2)
  312. waited += 2
  313. return False
  314. # 检查是否在登录页面或需要登录
  315. if "login" in current_url or "passport" in current_url:
  316. if not self.headless:
  317. logged_in = await wait_for_manual_login()
  318. if logged_in:
  319. try:
  320. if self.context:
  321. cookies_after = await self.context.cookies()
  322. await self.sync_cookies_to_node(cookies_after)
  323. except:
  324. pass
  325. await self.page.goto("https://creator.douyin.com/creator-micro/content/upload")
  326. await asyncio.sleep(3)
  327. current_url = self.page.url
  328. else:
  329. screenshot_base64 = await self.capture_screenshot()
  330. return PublishResult(
  331. success=False,
  332. platform=self.platform_name,
  333. error="需要登录:请在浏览器窗口完成登录后重试",
  334. need_captcha=True,
  335. captcha_type='login',
  336. screenshot_base64=screenshot_base64,
  337. page_url=current_url,
  338. status='need_captcha'
  339. )
  340. else:
  341. screenshot_base64 = await self.capture_screenshot()
  342. return PublishResult(
  343. success=False,
  344. platform=self.platform_name,
  345. error="Cookie 已过期,需要重新登录",
  346. need_captcha=True,
  347. captcha_type='login',
  348. screenshot_base64=screenshot_base64,
  349. page_url=current_url,
  350. status='need_captcha'
  351. )
  352. # 使用 AI 检测验证码
  353. ai_captcha_result = await self.ai_check_captcha()
  354. if ai_captcha_result['has_captcha']:
  355. print(f"[{self.platform_name}] AI检测到验证码: {ai_captcha_result['captcha_type']}", flush=True)
  356. screenshot_base64 = await self.capture_screenshot()
  357. return PublishResult(
  358. success=False,
  359. platform=self.platform_name,
  360. error=f"检测到{ai_captcha_result['captcha_type']}验证码,需要使用有头浏览器完成验证",
  361. need_captcha=True,
  362. captcha_type=ai_captcha_result['captcha_type'],
  363. screenshot_base64=screenshot_base64,
  364. page_url=current_url,
  365. status='need_captcha'
  366. )
  367. # 传统方式检测验证码
  368. captcha_result = await self.check_captcha()
  369. if captcha_result['need_captcha']:
  370. print(f"[{self.platform_name}] 传统方式检测到验证码: {captcha_result['captcha_type']}", flush=True)
  371. if captcha_result['captcha_type'] == 'phone':
  372. handled = await self.handle_phone_captcha()
  373. if handled:
  374. self.report_progress(12, "短信验证码已处理,继续发布...")
  375. else:
  376. screenshot_base64 = await self.capture_screenshot()
  377. return PublishResult(
  378. success=False,
  379. platform=self.platform_name,
  380. error="检测到手机验证码,但自动处理失败",
  381. need_captcha=True,
  382. captcha_type='phone',
  383. screenshot_base64=screenshot_base64,
  384. page_url=current_url,
  385. status='need_captcha'
  386. )
  387. else:
  388. screenshot_base64 = await self.capture_screenshot()
  389. return PublishResult(
  390. success=False,
  391. platform=self.platform_name,
  392. error=f"需要{captcha_result['captcha_type']}验证码,请使用有头浏览器完成验证",
  393. need_captcha=True,
  394. captcha_type=captcha_result['captcha_type'],
  395. screenshot_base64=screenshot_base64,
  396. page_url=current_url,
  397. status='need_captcha'
  398. )
  399. self.report_progress(15, "正在选择视频文件...")
  400. # 点击上传区域 - 参考 matrix: div.container-drag-info-Tl0RGH 或带 container-drag 的 div
  401. upload_selectors = [
  402. "div[class*='container-drag-info']",
  403. "div[class*='container-drag']",
  404. "div.upload-btn",
  405. "div[class*='upload']",
  406. ]
  407. upload_success = False
  408. for selector in upload_selectors:
  409. try:
  410. upload_div = self.page.locator(selector).first
  411. if await upload_div.count() > 0:
  412. print(f"[{self.platform_name}] 找到上传区域: {selector}")
  413. async with self.page.expect_file_chooser(timeout=10000) as fc_info:
  414. await upload_div.click()
  415. file_chooser = await fc_info.value
  416. await file_chooser.set_files(params.video_path)
  417. upload_success = True
  418. print(f"[{self.platform_name}] 视频文件已选择")
  419. break
  420. except Exception as e:
  421. print(f"[{self.platform_name}] 选择器 {selector} 失败: {e}")
  422. if not upload_success:
  423. screenshot_base64 = await self.capture_screenshot()
  424. return PublishResult(
  425. success=False,
  426. platform=self.platform_name,
  427. error="未找到上传入口",
  428. screenshot_base64=screenshot_base64,
  429. page_url=await self.get_page_url(),
  430. status='failed'
  431. )
  432. # 等待跳转到发布页面 - 参考 matrix
  433. self.report_progress(20, "等待进入发布页面...")
  434. for i in range(60):
  435. try:
  436. # matrix 等待的 URL: https://creator.douyin.com/creator-micro/content/post/video?enter_from=publish_page
  437. await self.page.wait_for_url(
  438. "https://creator.douyin.com/creator-micro/content/post/video*",
  439. timeout=2000
  440. )
  441. print(f"[{self.platform_name}] 已进入发布页面")
  442. break
  443. except:
  444. print(f"[{self.platform_name}] 等待进入发布页面... {i+1}/60")
  445. await asyncio.sleep(1)
  446. await asyncio.sleep(2)
  447. self.report_progress(30, "正在填充标题和话题...")
  448. # 填写标题 - 参考 matrix
  449. title_input = self.page.get_by_text('作品标题').locator("..").locator(
  450. "xpath=following-sibling::div[1]").locator("input")
  451. if await title_input.count():
  452. await title_input.fill(params.title[:30])
  453. print(f"[{self.platform_name}] 标题已填写")
  454. else:
  455. # 备用方式 - 参考 matrix
  456. title_container = self.page.locator(".notranslate")
  457. await title_container.click()
  458. await self.page.keyboard.press("Backspace")
  459. await self.page.keyboard.press("Control+KeyA")
  460. await self.page.keyboard.press("Delete")
  461. await self.page.keyboard.type(params.title)
  462. await self.page.keyboard.press("Enter")
  463. print(f"[{self.platform_name}] 标题已填写(备用方式)")
  464. # 添加话题标签 - 参考 matrix
  465. if params.tags:
  466. css_selector = ".zone-container"
  467. for index, tag in enumerate(params.tags, start=1):
  468. print(f"[{self.platform_name}] 正在添加第{index}个话题: #{tag}")
  469. await self.page.type(css_selector, "#" + tag)
  470. await self.page.press(css_selector, "Space")
  471. self.report_progress(40, "等待视频上传完成...")
  472. # 等待视频上传完成 - 参考 matrix: 检测"重新上传"按钮
  473. for i in range(120):
  474. try:
  475. count = await self.page.locator("div").filter(has_text="重新上传").count()
  476. if count > 0:
  477. print(f"[{self.platform_name}] 视频上传完毕")
  478. break
  479. else:
  480. print(f"[{self.platform_name}] 正在上传视频中... {i+1}/120")
  481. # 检查上传错误
  482. if await self.page.locator('div.progress-div > div:has-text("上传失败")').count():
  483. print(f"[{self.platform_name}] 发现上传出错了,重新上传...")
  484. await self.handle_upload_error(params.video_path)
  485. await asyncio.sleep(3)
  486. except:
  487. print(f"[{self.platform_name}] 正在上传视频中...")
  488. await asyncio.sleep(3)
  489. self.report_progress(60, "处理视频设置...")
  490. # 点击"我知道了"弹窗 - 参考 matrix
  491. known_count = await self.page.get_by_role("button", name="我知道了").count()
  492. if known_count > 0:
  493. await self.page.get_by_role("button", name="我知道了").nth(0).click()
  494. print(f"[{self.platform_name}] 关闭弹窗")
  495. await asyncio.sleep(5)
  496. # 设置位置 - 参考 matrix
  497. try:
  498. await self.page.locator('div.semi-select span:has-text("输入地理位置")').click()
  499. await asyncio.sleep(1)
  500. await self.page.keyboard.press("Backspace")
  501. await self.page.keyboard.press("Control+KeyA")
  502. await self.page.keyboard.press("Delete")
  503. await self.page.keyboard.type(params.location)
  504. await asyncio.sleep(1)
  505. await self.page.locator('div[role="listbox"] [role="option"]').first.click()
  506. print(f"[{self.platform_name}] 位置设置成功: {params.location}")
  507. except Exception as e:
  508. print(f"[{self.platform_name}] 设置位置失败: {e}")
  509. # 开启头条/西瓜同步 - 参考 matrix
  510. try:
  511. third_part_element = '[class^="info"] > [class^="first-part"] div div.semi-switch'
  512. if await self.page.locator(third_part_element).count():
  513. class_name = await self.page.eval_on_selector(
  514. third_part_element, 'div => div.className')
  515. if 'semi-switch-checked' not in class_name:
  516. await self.page.locator(third_part_element).locator(
  517. 'input.semi-switch-native-control').click()
  518. print(f"[{self.platform_name}] 已开启头条/西瓜同步")
  519. except:
  520. pass
  521. # 定时发布
  522. if params.publish_date:
  523. self.report_progress(70, "设置定时发布...")
  524. await self.set_schedule_time(params.publish_date)
  525. self.report_progress(80, "正在发布...")
  526. print(f"[{self.platform_name}] 查找发布按钮...")
  527. # 点击发布 - 参考 matrix
  528. for i in range(30):
  529. try:
  530. # 检查验证码(不要在每次循环都调 AI,太慢)
  531. if i % 5 == 0:
  532. ai_captcha = await self.ai_check_captcha()
  533. if ai_captcha['has_captcha']:
  534. print(f"[{self.platform_name}] AI检测到发布过程中需要验证码: {ai_captcha['captcha_type']}", flush=True)
  535. if ai_captcha['captcha_type'] == 'phone':
  536. handled = await self.handle_phone_captcha()
  537. if handled:
  538. continue
  539. screenshot_base64 = await self.capture_screenshot()
  540. page_url = await self.get_page_url()
  541. return PublishResult(
  542. success=False,
  543. platform=self.platform_name,
  544. error=f"发布过程中需要{ai_captcha['captcha_type']}验证码,请使用有头浏览器完成验证",
  545. need_captcha=True,
  546. captcha_type=ai_captcha['captcha_type'],
  547. screenshot_base64=screenshot_base64,
  548. page_url=page_url,
  549. status='need_captcha'
  550. )
  551. publish_btn = self.page.get_by_role('button', name="发布", exact=True)
  552. btn_count = await publish_btn.count()
  553. if btn_count > 0:
  554. print(f"[{self.platform_name}] 点击发布按钮...")
  555. await publish_btn.click()
  556. # 等待跳转到内容管理页面 - 参考 matrix
  557. await self.page.wait_for_url(
  558. "https://creator.douyin.com/creator-micro/content/manage",
  559. timeout=5000
  560. )
  561. self.report_progress(100, "发布成功")
  562. print(f"[{self.platform_name}] 视频发布成功!")
  563. screenshot_base64 = await self.capture_screenshot()
  564. page_url = await self.get_page_url()
  565. return PublishResult(
  566. success=True,
  567. platform=self.platform_name,
  568. message="发布成功",
  569. screenshot_base64=screenshot_base64,
  570. page_url=page_url,
  571. status='success'
  572. )
  573. except Exception as e:
  574. current_url = self.page.url
  575. # 检查是否已经在管理页面
  576. if "https://creator.douyin.com/creator-micro/content/manage" in current_url:
  577. self.report_progress(100, "发布成功")
  578. print(f"[{self.platform_name}] 视频发布成功!")
  579. screenshot_base64 = await self.capture_screenshot()
  580. return PublishResult(
  581. success=True,
  582. platform=self.platform_name,
  583. message="发布成功",
  584. screenshot_base64=screenshot_base64,
  585. page_url=current_url,
  586. status='success'
  587. )
  588. else:
  589. print(f"[{self.platform_name}] 视频正在发布中... {i+1}/30, URL: {current_url}")
  590. await asyncio.sleep(1)
  591. # 发布超时
  592. print(f"[{self.platform_name}] 发布超时,获取截图...")
  593. screenshot_base64 = await self.capture_screenshot()
  594. page_url = await self.get_page_url()
  595. return PublishResult(
  596. success=False,
  597. platform=self.platform_name,
  598. error="发布超时,请检查发布状态",
  599. screenshot_base64=screenshot_base64,
  600. page_url=page_url,
  601. status='need_action'
  602. )
  603. async def get_works(self, cookies: str, page: int = 0, page_size: int = 20) -> WorksResult:
  604. """获取抖音作品列表
  605. Args:
  606. cookies: Cookie 字符串或 JSON
  607. page: 分页参数,首次请求传 0,后续传上一次返回的 next_page(即 API 的 max_cursor)
  608. page_size: 每页数量
  609. Returns:
  610. WorksResult: 包含 works, total, has_more, next_page(用于下一页请求)
  611. """
  612. print(f"\n{'='*60}")
  613. print(f"[{self.platform_name}] 获取作品列表")
  614. print(f"[{self.platform_name}] cursor={page}, page_size={page_size}")
  615. print(f"{'='*60}")
  616. works: List[WorkItem] = []
  617. total = 0
  618. has_more = False
  619. next_cursor = 0
  620. try:
  621. await self.init_browser()
  622. cookie_list = self.parse_cookies(cookies)
  623. await self.set_cookies(cookie_list)
  624. if not self.page:
  625. raise Exception("Page not initialized")
  626. # 访问创作者中心首页以触发登录验证
  627. await self.page.goto("https://creator.douyin.com/creator-micro/home")
  628. await asyncio.sleep(3)
  629. # 检查登录状态
  630. current_url = self.page.url
  631. if "login" in current_url or "passport" in current_url:
  632. raise Exception("Cookie 已过期,请重新登录")
  633. # 调用作品列表 API:page 作为 max_cursor(首次 0,后续为上一页返回的 max_cursor)
  634. max_cursor = page
  635. api_url = f"https://creator.douyin.com/janus/douyin/creator/pc/work_list?status=0&device_platform=android&count={page_size}&max_cursor={max_cursor}&cookie_enabled=true&browser_language=zh-CN&browser_platform=Win32&browser_name=Mozilla&browser_online=true&timezone_name=Asia%2FShanghai"
  636. response = await self.page.evaluate(f'''
  637. async () => {{
  638. try {{
  639. const resp = await fetch("{api_url}", {{
  640. credentials: 'include',
  641. headers: {{ 'Accept': 'application/json' }}
  642. }});
  643. return await resp.json();
  644. }} catch (e) {{
  645. return {{ error: e.toString() }};
  646. }}
  647. }}
  648. ''')
  649. if response.get('error'):
  650. print(f"[{self.platform_name}] API 请求失败: {response.get('error')}", flush=True)
  651. aweme_list = response.get('aweme_list', []) or []
  652. has_more = response.get('has_more', False)
  653. # 下一页游标:优先 max_cursor,兼容 next_cursor(与创作者中心 work_list 一致)
  654. next_cursor = response.get('max_cursor') if 'max_cursor' in response else response.get('next_cursor')
  655. if next_cursor is None:
  656. next_cursor = 0
  657. # 从第一个作品的 author.aweme_count 获取总作品数
  658. if aweme_list and len(aweme_list) > 0:
  659. first_aweme = aweme_list[0]
  660. author_aweme_count = first_aweme.get('author', {}).get('aweme_count', 0)
  661. if author_aweme_count > 0:
  662. total = author_aweme_count
  663. print(f"[{self.platform_name}] 从 author.aweme_count 获取总作品数: {total}")
  664. print(f"[{self.platform_name}] API 响应: has_more={has_more}, aweme_list={len(aweme_list)}, next_cursor={next_cursor}")
  665. for aweme in aweme_list:
  666. aweme_id = str(aweme.get('aweme_id', ''))
  667. if not aweme_id:
  668. continue
  669. statistics = aweme.get('statistics', {})
  670. # 打印调试信息,确认字段存在
  671. # print(f"[{self.platform_name}] 作品 {aweme_id} 统计: {statistics}", flush=True)
  672. # 获取封面
  673. cover_url = ''
  674. if aweme.get('Cover', {}).get('url_list'):
  675. cover_url = aweme['Cover']['url_list'][0]
  676. elif aweme.get('video', {}).get('cover', {}).get('url_list'):
  677. cover_url = aweme['video']['cover']['url_list'][0]
  678. # 获取标题
  679. title = aweme.get('item_title', '') or aweme.get('desc', '').split('\n')[0][:50] or '无标题'
  680. # 获取时长(毫秒转秒)
  681. duration = aweme.get('video', {}).get('duration', 0) // 1000
  682. # 获取发布时间
  683. create_time = aweme.get('create_time', 0)
  684. publish_time = datetime.fromtimestamp(create_time).strftime('%Y-%m-%d %H:%M:%S') if create_time else ''
  685. works.append(WorkItem(
  686. work_id=aweme_id,
  687. title=title,
  688. cover_url=cover_url,
  689. duration=duration,
  690. status='published',
  691. publish_time=publish_time,
  692. play_count=int(statistics.get('play_count', 0)),
  693. like_count=int(statistics.get('digg_count', 0)),
  694. comment_count=int(statistics.get('comment_count', 0)),
  695. share_count=int(statistics.get('share_count', 0)),
  696. ))
  697. if total == 0:
  698. total = len(works)
  699. print(f"[{self.platform_name}] 本页获取到 {len(works)} 个作品")
  700. except Exception as e:
  701. import traceback
  702. traceback.print_exc()
  703. return WorksResult(
  704. success=False,
  705. platform=self.platform_name,
  706. error=str(e)
  707. )
  708. return WorksResult(
  709. success=True,
  710. platform=self.platform_name,
  711. works=works,
  712. total=total,
  713. has_more=has_more,
  714. next_page=next_cursor
  715. )
  716. async def get_comments(self, cookies: str, work_id: str, cursor: str = "") -> CommentsResult:
  717. """获取抖音作品评论 - 通过访问视频详情页拦截评论 API"""
  718. print(f"\n{'='*60}")
  719. print(f"[{self.platform_name}] 获取作品评论")
  720. print(f"[{self.platform_name}] work_id={work_id}, cursor={cursor}")
  721. print(f"{'='*60}")
  722. comments: List[CommentItem] = []
  723. total = 0
  724. has_more = False
  725. next_cursor = ""
  726. captured_data = {}
  727. try:
  728. await self.init_browser()
  729. cookie_list = self.parse_cookies(cookies)
  730. await self.set_cookies(cookie_list)
  731. if not self.page:
  732. raise Exception("Page not initialized")
  733. # 设置 API 响应监听器
  734. async def handle_response(response):
  735. nonlocal captured_data
  736. url = response.url
  737. # 监听评论列表 API - 抖音视频页面使用的 API
  738. # /aweme/v1/web/comment/list/ 或 /comment/list/
  739. if '/comment/list' in url and ('aweme_id' in url or work_id in url):
  740. try:
  741. json_data = await response.json()
  742. print(f"[{self.platform_name}] 捕获到评论 API: {url[:100]}...", flush=True)
  743. # 检查响应是否成功
  744. if json_data.get('status_code') == 0 or json_data.get('comments'):
  745. captured_data = json_data
  746. comment_count = len(json_data.get('comments', []))
  747. print(f"[{self.platform_name}] 评论 API 响应成功: comments={comment_count}, has_more={json_data.get('has_more')}", flush=True)
  748. except Exception as e:
  749. print(f"[{self.platform_name}] 解析评论响应失败: {e}", flush=True)
  750. self.page.on('response', handle_response)
  751. print(f"[{self.platform_name}] 已注册评论 API 响应监听器", flush=True)
  752. # 访问视频详情页 - 这会自动触发评论 API 请求
  753. video_url = f"https://www.douyin.com/video/{work_id}"
  754. print(f"[{self.platform_name}] 访问视频详情页: {video_url}", flush=True)
  755. await self.page.goto(video_url, wait_until="domcontentloaded", timeout=30000)
  756. await asyncio.sleep(5)
  757. # 检查登录状态
  758. current_url = self.page.url
  759. if "login" in current_url or "passport" in current_url:
  760. raise Exception("Cookie 已过期,请重新登录")
  761. # 等待评论加载
  762. if not captured_data:
  763. print(f"[{self.platform_name}] 等待评论 API 响应...", flush=True)
  764. # 尝试滚动页面触发评论加载
  765. await self.page.evaluate('window.scrollBy(0, 300)')
  766. await asyncio.sleep(3)
  767. if not captured_data:
  768. # 再等待一会
  769. await asyncio.sleep(3)
  770. # 移除监听器
  771. self.page.remove_listener('response', handle_response)
  772. # 解析评论数据
  773. if captured_data:
  774. comment_list = captured_data.get('comments') or []
  775. has_more = captured_data.get('has_more', False) or captured_data.get('has_more', 0) == 1
  776. next_cursor = str(captured_data.get('cursor', ''))
  777. total = captured_data.get('total', 0) or len(comment_list)
  778. print(f"[{self.platform_name}] 解析评论: total={total}, has_more={has_more}, comments={len(comment_list)}", flush=True)
  779. for comment in comment_list:
  780. cid = str(comment.get('cid', ''))
  781. if not cid:
  782. continue
  783. user = comment.get('user', {})
  784. # 解析回复列表
  785. replies = []
  786. reply_list = comment.get('reply_comment', []) or []
  787. for reply in reply_list:
  788. reply_user = reply.get('user', {})
  789. replies.append(CommentItem(
  790. comment_id=str(reply.get('cid', '')),
  791. work_id=work_id,
  792. content=reply.get('text', ''),
  793. author_id=str(reply_user.get('uid', '')),
  794. author_name=reply_user.get('nickname', ''),
  795. author_avatar=reply_user.get('avatar_thumb', {}).get('url_list', [''])[0] if reply_user.get('avatar_thumb') else '',
  796. like_count=int(reply.get('digg_count', 0)),
  797. create_time=datetime.fromtimestamp(reply.get('create_time', 0)).strftime('%Y-%m-%d %H:%M:%S') if reply.get('create_time') else '',
  798. is_author=reply.get('is_author', False),
  799. ))
  800. comments.append(CommentItem(
  801. comment_id=cid,
  802. work_id=work_id,
  803. content=comment.get('text', ''),
  804. author_id=str(user.get('uid', '')),
  805. author_name=user.get('nickname', ''),
  806. author_avatar=user.get('avatar_thumb', {}).get('url_list', [''])[0] if user.get('avatar_thumb') else '',
  807. like_count=int(comment.get('digg_count', 0)),
  808. reply_count=int(comment.get('reply_comment_total', 0)),
  809. create_time=datetime.fromtimestamp(comment.get('create_time', 0)).strftime('%Y-%m-%d %H:%M:%S') if comment.get('create_time') else '',
  810. is_author=comment.get('is_author', False),
  811. replies=replies,
  812. ))
  813. print(f"[{self.platform_name}] 解析到 {len(comments)} 条评论", flush=True)
  814. else:
  815. print(f"[{self.platform_name}] 未捕获到评论 API 响应", flush=True)
  816. except Exception as e:
  817. import traceback
  818. traceback.print_exc()
  819. return CommentsResult(
  820. success=False,
  821. platform=self.platform_name,
  822. work_id=work_id,
  823. error=str(e)
  824. )
  825. finally:
  826. await self.close_browser()
  827. result = CommentsResult(
  828. success=True,
  829. platform=self.platform_name,
  830. work_id=work_id,
  831. comments=comments,
  832. total=total,
  833. has_more=has_more
  834. )
  835. result.__dict__['cursor'] = next_cursor
  836. return result
  837. async def get_all_comments(self, cookies: str) -> dict:
  838. """获取所有作品的评论 - 通过评论管理页面"""
  839. print(f"\n{'='*60}")
  840. print(f"[{self.platform_name}] 获取所有作品评论")
  841. print(f"{'='*60}")
  842. all_work_comments = []
  843. captured_comments = []
  844. captured_works = {} # work_id -> work_info
  845. try:
  846. await self.init_browser()
  847. cookie_list = self.parse_cookies(cookies)
  848. await self.set_cookies(cookie_list)
  849. if not self.page:
  850. raise Exception("Page not initialized")
  851. # 设置 API 响应监听器
  852. async def handle_response(response):
  853. nonlocal captured_comments, captured_works
  854. url = response.url
  855. try:
  856. # 监听评论列表 API - 多种格式
  857. # /comment/list/select/ 或 /comment/read 或 /creator/comment/list
  858. if '/comment/list' in url or '/comment/read' in url or 'comment_list' in url:
  859. json_data = await response.json()
  860. print(f"[{self.platform_name}] 捕获到评论 API: {url[:100]}...", flush=True)
  861. # 格式1: comments 字段
  862. comments = json_data.get('comments', [])
  863. # 格式2: comment_info_list 字段
  864. if not comments:
  865. comments = json_data.get('comment_info_list', [])
  866. if comments:
  867. # 从 URL 中提取 aweme_id
  868. import re
  869. aweme_id_match = re.search(r'aweme_id=(\d+)', url)
  870. aweme_id = aweme_id_match.group(1) if aweme_id_match else ''
  871. for comment in comments:
  872. # 添加 aweme_id 到评论中
  873. if aweme_id and 'aweme_id' not in comment:
  874. comment['aweme_id'] = aweme_id
  875. captured_comments.append(comment)
  876. print(f"[{self.platform_name}] 捕获到 {len(comments)} 条评论 (aweme_id={aweme_id}),总计: {len(captured_comments)}", flush=True)
  877. # 监听作品列表 API
  878. if '/work_list' in url or '/item/list' in url or '/creator/item' in url:
  879. json_data = await response.json()
  880. aweme_list = json_data.get('aweme_list', []) or json_data.get('item_info_list', []) or json_data.get('item_list', [])
  881. print(f"[{self.platform_name}] 捕获到作品列表 API: {len(aweme_list)} 个作品", flush=True)
  882. for aweme in aweme_list:
  883. aweme_id = str(aweme.get('aweme_id', '') or aweme.get('item_id', '') or aweme.get('item_id_plain', ''))
  884. if aweme_id:
  885. cover_url = ''
  886. if aweme.get('Cover', {}).get('url_list'):
  887. cover_url = aweme['Cover']['url_list'][0]
  888. elif aweme.get('video', {}).get('cover', {}).get('url_list'):
  889. cover_url = aweme['video']['cover']['url_list'][0]
  890. elif aweme.get('cover_image_url'):
  891. cover_url = aweme['cover_image_url']
  892. captured_works[aweme_id] = {
  893. 'title': aweme.get('item_title', '') or aweme.get('title', '') or aweme.get('desc', ''),
  894. 'cover': cover_url,
  895. 'comment_count': aweme.get('statistics', {}).get('comment_count', 0) or aweme.get('comment_count', 0),
  896. }
  897. except Exception as e:
  898. print(f"[{self.platform_name}] 解析响应失败: {e}", flush=True)
  899. self.page.on('response', handle_response)
  900. print(f"[{self.platform_name}] 已注册 API 响应监听器", flush=True)
  901. # 访问评论管理页面
  902. print(f"[{self.platform_name}] 访问评论管理页面...", flush=True)
  903. await self.page.goto("https://creator.douyin.com/creator-micro/interactive/comment", wait_until="domcontentloaded", timeout=30000)
  904. await asyncio.sleep(5)
  905. # 检查登录状态
  906. current_url = self.page.url
  907. if "login" in current_url or "passport" in current_url:
  908. raise Exception("Cookie 已过期,请重新登录")
  909. print(f"[{self.platform_name}] 页面加载完成,当前捕获: {len(captured_comments)} 条评论, {len(captured_works)} 个作品", flush=True)
  910. # 尝试点击"选择作品"来加载作品列表
  911. try:
  912. select_btn = await self.page.query_selector('text="选择作品"')
  913. if select_btn:
  914. print(f"[{self.platform_name}] 点击选择作品按钮...", flush=True)
  915. await select_btn.click()
  916. await asyncio.sleep(3)
  917. # 获取作品列表
  918. work_items = await self.page.query_selector_all('[class*="work-item"], [class*="video-item"], [class*="aweme-item"]')
  919. print(f"[{self.platform_name}] 找到 {len(work_items)} 个作品元素", flush=True)
  920. # 点击每个作品加载其评论
  921. for i, item in enumerate(work_items[:10]): # 最多处理10个作品
  922. try:
  923. await item.click()
  924. await asyncio.sleep(2)
  925. print(f"[{self.platform_name}] 已点击作品 {i+1}/{min(len(work_items), 10)}", flush=True)
  926. except:
  927. pass
  928. # 关闭选择作品弹窗
  929. close_btn = await self.page.query_selector('[class*="close"], [class*="cancel"]')
  930. if close_btn:
  931. await close_btn.click()
  932. await asyncio.sleep(1)
  933. except Exception as e:
  934. print(f"[{self.platform_name}] 选择作品操作失败: {e}", flush=True)
  935. # 滚动加载更多评论
  936. for i in range(5):
  937. await self.page.evaluate('window.scrollBy(0, 500)')
  938. await asyncio.sleep(1)
  939. await asyncio.sleep(3)
  940. # 移除监听器
  941. self.page.remove_listener('response', handle_response)
  942. print(f"[{self.platform_name}] 最终捕获: {len(captured_comments)} 条评论, {len(captured_works)} 个作品", flush=True)
  943. # 按作品分组评论
  944. work_comments_map = {} # work_id -> work_comments
  945. for comment in captured_comments:
  946. # 从评论中获取作品信息
  947. aweme = comment.get('aweme', {}) or comment.get('item', {})
  948. aweme_id = str(comment.get('aweme_id', '') or aweme.get('aweme_id', '') or aweme.get('item_id', ''))
  949. if not aweme_id:
  950. continue
  951. if aweme_id not in work_comments_map:
  952. work_info = captured_works.get(aweme_id, {})
  953. work_comments_map[aweme_id] = {
  954. 'work_id': aweme_id,
  955. 'title': aweme.get('title', '') or aweme.get('desc', '') or work_info.get('title', ''),
  956. 'cover_url': aweme.get('cover', {}).get('url_list', [''])[0] if aweme.get('cover') else work_info.get('cover', ''),
  957. 'comments': []
  958. }
  959. cid = str(comment.get('cid', ''))
  960. if not cid:
  961. continue
  962. user = comment.get('user', {})
  963. work_comments_map[aweme_id]['comments'].append({
  964. 'comment_id': cid,
  965. 'author_id': str(user.get('uid', '')),
  966. 'author_name': user.get('nickname', ''),
  967. 'author_avatar': user.get('avatar_thumb', {}).get('url_list', [''])[0] if user.get('avatar_thumb') else '',
  968. 'content': comment.get('text', ''),
  969. 'like_count': int(comment.get('digg_count', 0)),
  970. 'create_time': datetime.fromtimestamp(comment.get('create_time', 0)).strftime('%Y-%m-%d %H:%M:%S') if comment.get('create_time') else '',
  971. 'is_author': comment.get('is_author', False),
  972. })
  973. all_work_comments = list(work_comments_map.values())
  974. total_comments = sum(len(w['comments']) for w in all_work_comments)
  975. print(f"[{self.platform_name}] 获取到 {len(all_work_comments)} 个作品的 {total_comments} 条评论", flush=True)
  976. except Exception as e:
  977. import traceback
  978. traceback.print_exc()
  979. return {
  980. 'success': False,
  981. 'platform': self.platform_name,
  982. 'error': str(e),
  983. 'work_comments': []
  984. }
  985. finally:
  986. await self.close_browser()
  987. return {
  988. 'success': True,
  989. 'platform': self.platform_name,
  990. 'work_comments': all_work_comments,
  991. 'total': len(all_work_comments)
  992. }
  993. async def auto_reply_private_messages(self, cookies: str) -> dict:
  994. """自动回复抖音私信 - 适配新页面结构"""
  995. print(f"\n{'='*60}")
  996. print(f"[{self.platform_name}] 开始自动回复抖音私信")
  997. print(f"{'='*60}")
  998. try:
  999. await self.init_browser()
  1000. cookie_list = self.parse_cookies(cookies)
  1001. await self.set_cookies(cookie_list)
  1002. if not self.page:
  1003. raise Exception("Page not initialized")
  1004. # 访问抖音私信页面
  1005. await self.page.goto("https://creator.douyin.com/creator-micro/data/following/chat", timeout=30000)
  1006. await asyncio.sleep(3)
  1007. # 检查登录状态
  1008. current_url = self.page.url
  1009. print(f"[{self.platform_name}] 当前 URL: {current_url}")
  1010. if "login" in current_url or "passport" in current_url:
  1011. raise Exception("Cookie 已过期,请重新登录")
  1012. replied_count = 0
  1013. # 处理两个tab: 陌生人私信 和 朋友私信
  1014. for tab_name in ["陌生人私信", "朋友私信"]:
  1015. print(f"\n{'='*50}")
  1016. print(f"[{self.platform_name}] 处理 {tab_name} ...")
  1017. print(f"{'='*50}")
  1018. # 点击对应tab
  1019. tab_locator = self.page.locator(f'div.semi-tabs-tab:text-is("{tab_name}")')
  1020. if await tab_locator.count() > 0:
  1021. await tab_locator.click()
  1022. await asyncio.sleep(2)
  1023. else:
  1024. print(f"⚠️ 未找到 {tab_name} 标签,跳过")
  1025. continue
  1026. # 获取私信列表
  1027. session_items = self.page.locator('.semi-list-item')
  1028. session_count = await session_items.count()
  1029. print(f"[{self.platform_name}] {tab_name} 共找到 {session_count} 条会话")
  1030. if session_count == 0:
  1031. print(f"[{self.platform_name}] {tab_name} 无新私信")
  1032. continue
  1033. for idx in range(session_count):
  1034. try:
  1035. # 重新获取列表(防止 DOM 变化)
  1036. current_sessions = self.page.locator('.semi-list-item')
  1037. if idx >= await current_sessions.count():
  1038. break
  1039. session = current_sessions.nth(idx)
  1040. user_name = await session.locator('.item-header-name-vL_79m').inner_text()
  1041. last_msg = await session.locator('.text-whxV9A').inner_text()
  1042. print(f"\n ➤ [{idx+1}/{session_count}] 处理用户: {user_name} | 最后消息: {last_msg[:30]}...")
  1043. # 检查会话预览消息是否包含非文字内容
  1044. if "分享" in last_msg and ("视频" in last_msg or "图片" in last_msg or "链接" in last_msg):
  1045. print(" ➤ 会话预览为非文字消息,跳过")
  1046. continue
  1047. # 点击进入聊天
  1048. await session.click()
  1049. await asyncio.sleep(2)
  1050. # 提取聊天历史(判断最后一条是否是自己发的)
  1051. chat_messages = self.page.locator('.box-item-dSA1TJ:not(.time-Za5gKL)')
  1052. msg_count = await chat_messages.count()
  1053. should_reply = True
  1054. if msg_count > 0:
  1055. # 最后一条消息
  1056. last_msg_el = chat_messages.nth(msg_count - 1)
  1057. # 获取元素的 class 属性判断是否是自己发的
  1058. classes = await last_msg_el.get_attribute('class') or ''
  1059. is_my_message = 'is-me-' in classes # 包含 is-me- 表示是自己发的
  1060. should_reply = not is_my_message # 如果是自己发的就不回复
  1061. if should_reply:
  1062. # 提取完整聊天历史
  1063. chat_history = await self._extract_chat_history()
  1064. if chat_history:
  1065. # 生成回复
  1066. reply_text = await self._generate_reply_with_ai(chat_history)
  1067. if not reply_text:
  1068. reply_text = self._generate_reply(chat_history)
  1069. if reply_text:
  1070. print(f" 📝 回复内容: {reply_text}")
  1071. # 填充输入框
  1072. input_box = self.page.locator('div.chat-input-dccKiL[contenteditable="true"]')
  1073. send_btn = self.page.locator('button:has-text("发送")')
  1074. if await input_box.is_visible() and await send_btn.is_visible():
  1075. await input_box.fill(reply_text)
  1076. await asyncio.sleep(0.5)
  1077. await send_btn.click()
  1078. print(" ✅ 已发送")
  1079. replied_count += 1
  1080. await asyncio.sleep(2)
  1081. else:
  1082. print(" ❌ 输入框或发送按钮不可见")
  1083. else:
  1084. print(" ➤ 无需回复")
  1085. else:
  1086. print(" ➤ 聊天历史为空,跳过")
  1087. else:
  1088. print(" ➤ 最后一条是我发的,跳过")
  1089. except Exception as e:
  1090. print(f" ❌ 处理会话 {idx+1} 时出错: {e}")
  1091. continue
  1092. print(f"[{self.platform_name}] 自动回复完成,共回复 {replied_count} 条消息")
  1093. return {
  1094. 'success': True,
  1095. 'platform': self.platform_name,
  1096. 'replied_count': replied_count,
  1097. 'message': f'成功回复 {replied_count} 条私信'
  1098. }
  1099. except Exception as e:
  1100. import traceback
  1101. traceback.print_exc()
  1102. return {
  1103. 'success': False,
  1104. 'platform': self.platform_name,
  1105. 'error': str(e)
  1106. }
  1107. finally:
  1108. await self.close_browser()
  1109. # 辅助方法保持兼容(可复用)
  1110. def _generate_reply(self, chat_history: list) -> str:
  1111. """规则回复"""
  1112. if not chat_history:
  1113. return "你好!感谢联系~"
  1114. last_msg = chat_history[-1]["content"]
  1115. if "谢谢" in last_msg or "感谢" in last_msg:
  1116. return "不客气!欢迎常来交流~"
  1117. elif "你好" in last_msg or "在吗" in last_msg:
  1118. return "你好!请问有什么可以帮您的?"
  1119. elif "视频" in last_msg or "怎么拍" in last_msg:
  1120. return "视频是用手机拍摄的,注意光线和稳定哦!"
  1121. else:
  1122. return "收到!我会认真阅读您的留言~"
  1123. async def _extract_chat_history(self) -> list:
  1124. """精准提取聊天记录,区分作者(自己)和用户"""
  1125. if not self.page:
  1126. return []
  1127. history = []
  1128. # 获取所有聊天消息(排除时间戳元素)
  1129. message_wrappers = self.page.locator('.box-item-dSA1TJ:not(.time-Za5gKL)')
  1130. count = await message_wrappers.count()
  1131. for i in range(count):
  1132. try:
  1133. wrapper = message_wrappers.nth(i)
  1134. # 检查是否为自己发送的消息
  1135. classes = await wrapper.get_attribute('class') or ''
  1136. is_author = 'is-me-' in classes # 包含 is-me- 表示是自己发的
  1137. # 获取消息文本内容
  1138. text_element = wrapper.locator('.text-X2d7fS')
  1139. if await text_element.count() > 0:
  1140. content = await text_element.inner_text()
  1141. content = content.strip()
  1142. if content: # 只添加非空消息
  1143. # 获取用户名(如果是对方消息)
  1144. author_name = ''
  1145. if not is_author:
  1146. # 尝试获取对方用户名
  1147. name_elements = wrapper.locator('.aweme-author-name-m8uoXU')
  1148. if await name_elements.count() > 0:
  1149. author_name = await name_elements.nth(0).inner_text()
  1150. else:
  1151. author_name = '用户'
  1152. else:
  1153. author_name = '我'
  1154. history.append({
  1155. "author": author_name,
  1156. "content": content,
  1157. "is_author": is_author,
  1158. })
  1159. except Exception as e:
  1160. print(f" ⚠️ 解析第 {i+1} 条消息失败: {e}")
  1161. continue
  1162. return history
  1163. async def _generate_reply_with_ai(self, chat_history: list) -> str:
  1164. """使用 AI 生成回复(保留原逻辑)"""
  1165. import os, requests, json
  1166. try:
  1167. ai_api_key = os.environ.get('DASHSCOPE_API_KEY', '')
  1168. ai_base_url = os.environ.get('DASHSCOPE_BASE_URL', 'https://dashscope.aliyuncs.com/compatible-mode/v1')
  1169. ai_model = os.environ.get('AI_MODEL', 'qwen-plus')
  1170. if not ai_api_key:
  1171. return self._generate_reply(chat_history)
  1172. messages = [{"role": "system", "content": "你是一个友好的抖音创作者助手,负责回复粉丝私信。请保持简洁、友好、专业的语气。回复长度不超过20字。"}]
  1173. for msg in chat_history:
  1174. role = "assistant" if msg.get("is_author", False) else "user"
  1175. messages.append({"role": role, "content": msg["content"]})
  1176. headers = {'Authorization': f'Bearer {ai_api_key}', 'Content-Type': 'application/json'}
  1177. payload = {"model": ai_model, "messages": messages, "max_tokens": 150, "temperature": 0.8}
  1178. response = requests.post(f"{ai_base_url}/chat/completions", headers=headers, json=payload, timeout=30)
  1179. if response.status_code == 200:
  1180. ai_reply = response.json().get('choices', [{}])[0].get('message', {}).get('content', '').strip()
  1181. return ai_reply if ai_reply else self._generate_reply(chat_history)
  1182. else:
  1183. return self._generate_reply(chat_history)
  1184. except:
  1185. return self._generate_reply(chat_history)
  1186. async def get_work_comments_mapping(self, cookies: str) -> dict:
  1187. """获取所有作品及其评论的对应关系
  1188. Args:
  1189. cookies: 抖音创作者平台的cookies
  1190. Returns:
  1191. dict: 包含作品和评论对应关系的JSON数据
  1192. """
  1193. print(f"\n{'='*60}")
  1194. print(f"[{self.platform_name}] 获取作品和评论对应关系")
  1195. print(f"{'='*60}")
  1196. work_comments_mapping = []
  1197. try:
  1198. await self.init_browser()
  1199. cookie_list = self.parse_cookies(cookies)
  1200. await self.set_cookies(cookie_list)
  1201. if not self.page:
  1202. raise Exception("Page not initialized")
  1203. # 访问创作者中心首页
  1204. await self.page.goto("https://creator.douyin.com/creator-micro/home", timeout=30000)
  1205. await asyncio.sleep(3)
  1206. # 检查登录状态
  1207. current_url = self.page.url
  1208. if "login" in current_url or "passport" in current_url:
  1209. raise Exception("Cookie 已过期,请重新登录")
  1210. # 访问内容管理页面获取作品列表
  1211. print(f"[{self.platform_name}] 访问内容管理页面...")
  1212. await self.page.goto("https://creator.douyin.com/creator-micro/content/manage", timeout=30000)
  1213. await asyncio.sleep(5)
  1214. # 获取作品列表
  1215. works_result = await self.get_works(cookies, page=0, page_size=20)
  1216. if not works_result.success:
  1217. print(f"[{self.platform_name}] 获取作品列表失败: {works_result.error}")
  1218. return {
  1219. 'success': False,
  1220. 'platform': self.platform_name,
  1221. 'error': works_result.error,
  1222. 'work_comments': []
  1223. }
  1224. print(f"[{self.platform_name}] 获取到 {len(works_result.works)} 个作品")
  1225. # 对每个作品获取评论
  1226. for i, work in enumerate(works_result.works):
  1227. print(f"[{self.platform_name}] 正在获取作品 {i+1}/{len(works_result.works)} 的评论: {work.title[:20]}...")
  1228. # 获取单个作品的评论
  1229. comments_result = await self.get_comments(cookies, work.work_id)
  1230. if comments_result.success:
  1231. work_comments_mapping.append({
  1232. 'work_info': work.to_dict(),
  1233. 'comments': [comment.to_dict() for comment in comments_result.comments]
  1234. })
  1235. print(f"[{self.platform_name}] 作品 '{work.title[:20]}...' 获取到 {len(comments_result.comments)} 条评论")
  1236. else:
  1237. print(f"[{self.platform_name}] 获取作品 '{work.title[:20]}...' 评论失败: {comments_result.error}")
  1238. work_comments_mapping.append({
  1239. 'work_info': work.to_dict(),
  1240. 'comments': [],
  1241. 'error': comments_result.error
  1242. })
  1243. # 添加延时避免请求过于频繁
  1244. await asyncio.sleep(2)
  1245. print(f"[{self.platform_name}] 所有作品评论获取完成")
  1246. except Exception as e:
  1247. import traceback
  1248. traceback.print_exc()
  1249. return {
  1250. 'success': False,
  1251. 'platform': self.platform_name,
  1252. 'error': str(e),
  1253. 'work_comments': []
  1254. }
  1255. finally:
  1256. await self.close_browser()
  1257. return {
  1258. 'success': True,
  1259. 'platform': self.platform_name,
  1260. 'work_comments': work_comments_mapping,
  1261. 'summary': {
  1262. 'total_works': len(work_comments_mapping),
  1263. 'total_comments': sum(len(item['comments']) for item in work_comments_mapping),
  1264. }
  1265. }