weixin.py 51 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217
  1. # -*- coding: utf-8 -*-
  2. """
  3. 微信视频号发布器
  4. 参考: matrix/tencent_uploader/main.py
  5. """
  6. import asyncio
  7. import os
  8. from datetime import datetime
  9. from typing import List
  10. from .base import (
  11. BasePublisher, PublishParams, PublishResult,
  12. WorkItem, WorksResult, CommentItem, CommentsResult
  13. )
  14. import os
  15. import time
  16. def format_short_title(origin_title: str) -> str:
  17. """
  18. 格式化短标题
  19. - 移除特殊字符
  20. - 长度限制在 6-16 字符
  21. """
  22. allowed_special_chars = "《》"":+?%°"
  23. filtered_chars = [
  24. char if char.isalnum() or char in allowed_special_chars
  25. else ' ' if char == ',' else ''
  26. for char in origin_title
  27. ]
  28. formatted_string = ''.join(filtered_chars)
  29. if len(formatted_string) > 16:
  30. formatted_string = formatted_string[:16]
  31. elif len(formatted_string) < 6:
  32. formatted_string += ' ' * (6 - len(formatted_string))
  33. return formatted_string
  34. class WeixinPublisher(BasePublisher):
  35. """
  36. 微信视频号发布器
  37. 使用 Playwright 自动化操作视频号创作者中心
  38. 注意: 需要使用 Chrome 浏览器,否则可能出现 H264 编码错误
  39. """
  40. platform_name = "weixin"
  41. login_url = "https://channels.weixin.qq.com/platform"
  42. publish_url = "https://channels.weixin.qq.com/platform/post/create"
  43. cookie_domain = ".weixin.qq.com"
  44. def _parse_count(self, count_str: str) -> int:
  45. """解析数字(支持带'万'的格式)"""
  46. try:
  47. count_str = count_str.strip()
  48. if '万' in count_str:
  49. return int(float(count_str.replace('万', '')) * 10000)
  50. return int(count_str)
  51. except:
  52. return 0
  53. async def init_browser(self, storage_state: str = None):
  54. """初始化浏览器 - 参考 matrix 使用 channel=chrome 避免 H264 编码错误"""
  55. from playwright.async_api import async_playwright
  56. playwright = await async_playwright().start()
  57. # 参考 matrix: 使用系统内的 Chrome 浏览器,避免 H264 编码错误
  58. # 如果没有安装 Chrome,则使用默认 Chromium
  59. try:
  60. self.browser = await playwright.chromium.launch(
  61. headless=self.headless,
  62. channel="chrome" # 使用系统 Chrome
  63. )
  64. print(f"[{self.platform_name}] 使用系统 Chrome 浏览器")
  65. except Exception as e:
  66. print(f"[{self.platform_name}] Chrome 不可用,使用 Chromium: {e}")
  67. self.browser = await playwright.chromium.launch(headless=self.headless)
  68. # 设置 HTTP Headers 防止重定向
  69. headers = {
  70. "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
  71. "Referer": "https://channels.weixin.qq.com/platform/post/list",
  72. }
  73. self.context = await self.browser.new_context(
  74. extra_http_headers=headers,
  75. ignore_https_errors=True,
  76. viewport={"width": 1920, "height": 1080},
  77. user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
  78. )
  79. self.page = await self.context.new_page()
  80. return self.page
  81. async def set_schedule_time(self, publish_date: datetime):
  82. """设置定时发布"""
  83. if not self.page:
  84. return
  85. print(f"[{self.platform_name}] 设置定时发布...")
  86. # 点击定时选项
  87. label_element = self.page.locator("label").filter(has_text="定时").nth(1)
  88. await label_element.click()
  89. # 选择日期
  90. await self.page.click('input[placeholder="请选择发表时间"]')
  91. publish_month = f"{publish_date.month:02d}"
  92. current_month = f"{publish_month}月"
  93. # 检查月份
  94. page_month = await self.page.inner_text('span.weui-desktop-picker__panel__label:has-text("月")')
  95. if page_month != current_month:
  96. await self.page.click('button.weui-desktop-btn__icon__right')
  97. # 选择日期
  98. elements = await self.page.query_selector_all('table.weui-desktop-picker__table a')
  99. for element in elements:
  100. class_name = await element.evaluate('el => el.className')
  101. if 'weui-desktop-picker__disabled' in class_name:
  102. continue
  103. text = await element.inner_text()
  104. if text.strip() == str(publish_date.day):
  105. await element.click()
  106. break
  107. # 输入时间
  108. await self.page.click('input[placeholder="请选择时间"]')
  109. await self.page.keyboard.press("Control+KeyA")
  110. await self.page.keyboard.type(str(publish_date.hour))
  111. # 点击其他地方确认
  112. await self.page.locator("div.input-editor").click()
  113. async def handle_upload_error(self, video_path: str):
  114. """处理上传错误"""
  115. if not self.page:
  116. return
  117. print(f"[{self.platform_name}] 视频出错了,重新上传中...")
  118. await self.page.locator('div.media-status-content div.tag-inner:has-text("删除")').click()
  119. await self.page.get_by_role('button', name="删除", exact=True).click()
  120. file_input = self.page.locator('input[type="file"]')
  121. await file_input.set_input_files(video_path)
  122. async def add_title_tags(self, params: PublishParams):
  123. """添加标题和话题"""
  124. if not self.page:
  125. return
  126. await self.page.locator("div.input-editor").click()
  127. await self.page.keyboard.type(params.title)
  128. if params.tags:
  129. await self.page.keyboard.press("Enter")
  130. for tag in params.tags:
  131. await self.page.keyboard.type("#" + tag)
  132. await self.page.keyboard.press("Space")
  133. print(f"[{self.platform_name}] 成功添加标题和 {len(params.tags)} 个话题")
  134. async def add_short_title(self):
  135. """添加短标题"""
  136. if not self.page:
  137. return
  138. try:
  139. short_title_element = self.page.get_by_text("短标题", exact=True).locator("..").locator(
  140. "xpath=following-sibling::div").locator('span input[type="text"]')
  141. if await short_title_element.count():
  142. # 获取已有内容作为短标题
  143. pass
  144. except:
  145. pass
  146. async def upload_cover(self, cover_path: str):
  147. """上传封面图"""
  148. if not self.page or not cover_path or not os.path.exists(cover_path):
  149. return
  150. try:
  151. await asyncio.sleep(2)
  152. preview_btn_info = await self.page.locator(
  153. 'div.finder-tag-wrap.btn:has-text("更换封面")').get_attribute('class')
  154. if "disabled" not in preview_btn_info:
  155. await self.page.locator('div.finder-tag-wrap.btn:has-text("更换封面")').click()
  156. await self.page.locator('div.single-cover-uploader-wrap > div.wrap').hover()
  157. # 删除现有封面
  158. if await self.page.locator(".del-wrap > .svg-icon").count():
  159. await self.page.locator(".del-wrap > .svg-icon").click()
  160. # 上传新封面
  161. preview_div = self.page.locator("div.single-cover-uploader-wrap > div.wrap")
  162. async with self.page.expect_file_chooser() as fc_info:
  163. await preview_div.click()
  164. preview_chooser = await fc_info.value
  165. await preview_chooser.set_files(cover_path)
  166. await asyncio.sleep(2)
  167. await self.page.get_by_role("button", name="确定").click()
  168. await asyncio.sleep(1)
  169. await self.page.get_by_role("button", name="确认").click()
  170. print(f"[{self.platform_name}] 封面上传成功")
  171. except Exception as e:
  172. print(f"[{self.platform_name}] 封面上传失败: {e}")
  173. async def check_captcha(self) -> dict:
  174. """检查页面是否需要验证码"""
  175. if not self.page:
  176. return {'need_captcha': False, 'captcha_type': ''}
  177. try:
  178. # 检查各种验证码
  179. captcha_selectors = [
  180. 'text="请输入验证码"',
  181. 'text="滑动验证"',
  182. '[class*="captcha"]',
  183. '[class*="verify"]',
  184. ]
  185. for selector in captcha_selectors:
  186. try:
  187. if await self.page.locator(selector).count() > 0:
  188. print(f"[{self.platform_name}] 检测到验证码: {selector}")
  189. return {'need_captcha': True, 'captcha_type': 'image'}
  190. except:
  191. pass
  192. # 检查登录弹窗
  193. login_selectors = [
  194. 'text="请登录"',
  195. 'text="扫码登录"',
  196. '[class*="login-dialog"]',
  197. ]
  198. for selector in login_selectors:
  199. try:
  200. if await self.page.locator(selector).count() > 0:
  201. print(f"[{self.platform_name}] 检测到需要登录: {selector}")
  202. return {'need_captcha': True, 'captcha_type': 'login'}
  203. except:
  204. pass
  205. except Exception as e:
  206. print(f"[{self.platform_name}] 验证码检测异常: {e}")
  207. return {'need_captcha': False, 'captcha_type': ''}
  208. async def publish(self, cookies: str, params: PublishParams) -> PublishResult:
  209. """发布视频到视频号"""
  210. print(f"\n{'='*60}")
  211. print(f"[{self.platform_name}] 开始发布视频")
  212. print(f"[{self.platform_name}] 视频路径: {params.video_path}")
  213. print(f"[{self.platform_name}] 标题: {params.title}")
  214. print(f"[{self.platform_name}] Headless: {self.headless}")
  215. print(f"{'='*60}")
  216. self.report_progress(5, "正在初始化浏览器...")
  217. # 初始化浏览器(使用 Chrome)
  218. await self.init_browser()
  219. print(f"[{self.platform_name}] 浏览器初始化完成")
  220. # 解析并设置 cookies
  221. cookie_list = self.parse_cookies(cookies)
  222. print(cookie_list)
  223. print(f"[{self.platform_name}] 解析到 {len(cookie_list)} 个 cookies")
  224. await self.set_cookies(cookie_list)
  225. if not self.page:
  226. raise Exception("Page not initialized")
  227. # 检查视频文件
  228. if not os.path.exists(params.video_path):
  229. raise Exception(f"视频文件不存在: {params.video_path}")
  230. print(f"[{self.platform_name}] 视频文件存在,大小: {os.path.getsize(params.video_path)} bytes")
  231. self.report_progress(10, "正在打开上传页面...")
  232. print(f"[{self.platform_name}] 当前 发布URL: {self.publish_url}")
  233. # 访问上传页面
  234. await self.page.goto(self.publish_url, wait_until="networkidle", timeout=60000)
  235. await asyncio.sleep(10)
  236. # 打印页面HTML调试
  237. print(f"[{self.platform_name}] 当前 URL: {self.page.url}")
  238. html_content = await self.page.content()
  239. print(f"[{self.platform_name}] 页面HTML长度: {len(html_content)}")
  240. # 截图调试
  241. screenshot_path = f"weixin_publish_{int(asyncio.get_event_loop().time())}.png"
  242. await self.page.screenshot(path=screenshot_path)
  243. print(f"[{self.platform_name}] 截图已保存: {screenshot_path}")
  244. # 检查 input[type='file'] 是否存在
  245. file_input = self.page.locator("input[type='file']")
  246. count = await file_input.count()
  247. print(f"[{self.platform_name}] 找到 {count} 个 file input")
  248. if count == 0:
  249. raise Exception("页面中未找到 input[type='file'] 元素")
  250. # 直接设置文件,不触发click
  251. print("上传文件...")
  252. file_path = params.video_path
  253. await file_input.first.set_input_files(file_path)
  254. print(f"[{self.platform_name}] 文件已设置: {file_path}")
  255. # 等待上传进度
  256. await asyncio.sleep(5)
  257. # 等待删除标签弹窗可见(可选,设置超时)
  258. try:
  259. await self.page.wait_for_selector(".weui-desktop-popover__wrp.finder-popover-dialog-wrap .finder-tag-wrap", state="visible", timeout=20000)
  260. print("删除标签弹窗已显示")
  261. except:
  262. print("删除标签弹窗未出现,继续执行")
  263. # 主动关闭系统文件选择窗口(如果还存在)
  264. try:
  265. # 获取所有窗口
  266. context_pages = self.page.context.pages
  267. for p in context_pages:
  268. if p != self.page and "打开" in await p.title():
  269. print(f"关闭系统文件选择窗口: {await p.title()}")
  270. await p.close()
  271. except Exception as e:
  272. print(f"关闭文件选择窗口异常: {e}")
  273. # 填写多个输入框
  274. print("填写输入框...")
  275. # 描述输入框
  276. await self.page.locator("div.input-editor[contenteditable][data-placeholder='添加描述']").fill("智能拍照机来啦")
  277. # 短标题输入框
  278. await self.page.fill("input.weui-desktop-form__input[placeholder*='概括视频主要内容']", "解放双手的智能拍照机")
  279. await self.page.wait_for_timeout(1000)
  280. # 点击最下方的发布按钮
  281. print("点击发布按钮...")
  282. await self.page.click("button.weui-desktop-btn.weui-desktop-btn_primary:has-text('发表')")
  283. # 监控是否出现"直接发表"按钮
  284. try:
  285. direct_publish_btn = self.page.locator("button.weui-desktop-btn.weui-desktop-btn_default:has-text('直接发表')")
  286. await direct_publish_btn.wait_for(state="visible", timeout=3000)
  287. print("检测到'直接发表'按钮,点击...")
  288. await direct_publish_btn.click()
  289. except:
  290. print("未检测到'直接发表'按钮,继续...")
  291. # 等待发布完成
  292. await self.page.wait_for_timeout(3000)
  293. print("发布完成!")
  294. return PublishResult(
  295. success=True,
  296. platform=self.platform_name,
  297. message="发布成功",
  298. screenshot_base64="",
  299. page_url=self.publish_url,
  300. status='success'
  301. )
  302. # 检查是否跳转到登录页
  303. current_url = self.page.url
  304. print(f"[{self.platform_name}] 当前页面: {current_url}")
  305. if "login" in current_url:
  306. screenshot_base64 = await self.capture_screenshot()
  307. return PublishResult(
  308. success=False,
  309. platform=self.platform_name,
  310. error="Cookie 已过期,需要重新登录",
  311. need_captcha=True,
  312. captcha_type='login',
  313. screenshot_base64=screenshot_base64,
  314. page_url=current_url,
  315. status='need_captcha'
  316. )
  317. # 使用 AI 检查验证码
  318. ai_captcha = await self.ai_check_captcha()
  319. if ai_captcha['has_captcha']:
  320. print(f"[{self.platform_name}] AI检测到验证码: {ai_captcha['captcha_type']}", flush=True)
  321. screenshot_base64 = await self.capture_screenshot()
  322. return PublishResult(
  323. success=False,
  324. platform=self.platform_name,
  325. error=f"检测到{ai_captcha['captcha_type']}验证码,需要使用有头浏览器完成验证",
  326. need_captcha=True,
  327. captcha_type=ai_captcha['captcha_type'],
  328. screenshot_base64=screenshot_base64,
  329. page_url=current_url,
  330. status='need_captcha'
  331. )
  332. # 传统方式检查验证码
  333. captcha_result = await self.check_captcha()
  334. if captcha_result['need_captcha']:
  335. screenshot_base64 = await self.capture_screenshot()
  336. return PublishResult(
  337. success=False,
  338. platform=self.platform_name,
  339. error=f"需要{captcha_result['captcha_type']}验证码,请使用有头浏览器完成验证",
  340. need_captcha=True,
  341. captcha_type=captcha_result['captcha_type'],
  342. screenshot_base64=screenshot_base64,
  343. page_url=current_url,
  344. status='need_captcha'
  345. )
  346. self.report_progress(15, "正在选择视频文件...")
  347. # 上传视频 - 参考 matrix/tencent_uploader/main.py
  348. # matrix 使用: div.upload-content 点击后触发文件选择器
  349. upload_success = False
  350. # 方法1: 参考 matrix - 点击 div.upload-content
  351. try:
  352. upload_div = self.page.locator("div.upload-content")
  353. if await upload_div.count() > 0:
  354. print(f"[{self.platform_name}] 找到 upload-content 上传区域")
  355. async with self.page.expect_file_chooser(timeout=10000) as fc_info:
  356. await upload_div.click()
  357. file_chooser = await fc_info.value
  358. await file_chooser.set_files(params.video_path)
  359. upload_success = True
  360. print(f"[{self.platform_name}] 通过 upload-content 上传成功")
  361. except Exception as e:
  362. print(f"[{self.platform_name}] upload-content 上传失败: {e}")
  363. # 方法2: 尝试其他选择器
  364. if not upload_success:
  365. upload_selectors = [
  366. 'div[class*="upload-area"]',
  367. 'div[class*="drag-upload"]',
  368. 'div.add-wrap',
  369. '[class*="uploader"]',
  370. ]
  371. for selector in upload_selectors:
  372. if upload_success:
  373. break
  374. try:
  375. upload_area = self.page.locator(selector).first
  376. if await upload_area.count() > 0:
  377. print(f"[{self.platform_name}] 尝试点击上传区域: {selector}")
  378. async with self.page.expect_file_chooser(timeout=10000) as fc_info:
  379. await upload_area.click()
  380. file_chooser = await fc_info.value
  381. await file_chooser.set_files(params.video_path)
  382. upload_success = True
  383. print(f"[{self.platform_name}] 通过点击上传区域成功")
  384. break
  385. except Exception as e:
  386. print(f"[{self.platform_name}] 选择器 {selector} 失败: {e}")
  387. # 方法3: 直接设置 file input
  388. if not upload_success:
  389. try:
  390. file_input = self.page.locator('input[type="file"]')
  391. if await file_input.count() > 0:
  392. await file_input.first.set_input_files(params.video_path)
  393. upload_success = True
  394. print(f"[{self.platform_name}] 通过 file input 上传成功")
  395. except Exception as e:
  396. print(f"[{self.platform_name}] file input 上传失败: {e}")
  397. if not upload_success:
  398. screenshot_base64 = await self.capture_screenshot()
  399. return PublishResult(
  400. success=False,
  401. platform=self.platform_name,
  402. error="未找到上传入口",
  403. screenshot_base64=screenshot_base64,
  404. page_url=await self.get_page_url(),
  405. status='failed'
  406. )
  407. self.report_progress(20, "正在填充标题和话题...")
  408. # 添加标题和话题
  409. await self.add_title_tags(params)
  410. self.report_progress(30, "等待视频上传完成...")
  411. # 等待上传完成
  412. for _ in range(120):
  413. try:
  414. button_info = await self.page.get_by_role("button", name="发表").get_attribute('class')
  415. if "weui-desktop-btn_disabled" not in button_info:
  416. print(f"[{self.platform_name}] 视频上传完毕")
  417. # 上传封面
  418. self.report_progress(50, "正在上传封面...")
  419. await self.upload_cover(params.cover_path)
  420. break
  421. else:
  422. # 检查上传错误
  423. if await self.page.locator('div.status-msg.error').count():
  424. if await self.page.locator('div.media-status-content div.tag-inner:has-text("删除")').count():
  425. await self.handle_upload_error(params.video_path)
  426. await asyncio.sleep(3)
  427. except:
  428. await asyncio.sleep(3)
  429. self.report_progress(60, "处理视频设置...")
  430. # 添加短标题
  431. try:
  432. short_title_el = self.page.get_by_text("短标题", exact=True).locator("..").locator(
  433. "xpath=following-sibling::div").locator('span input[type="text"]')
  434. if await short_title_el.count():
  435. short_title = format_short_title(params.title)
  436. await short_title_el.fill(short_title)
  437. except:
  438. pass
  439. # 定时发布
  440. if params.publish_date:
  441. self.report_progress(70, "设置定时发布...")
  442. await self.set_schedule_time(params.publish_date)
  443. self.report_progress(80, "正在发布...")
  444. # 点击发布 - 参考 matrix
  445. for i in range(30):
  446. try:
  447. # 参考 matrix: div.form-btns button:has-text("发表")
  448. publish_btn = self.page.locator('div.form-btns button:has-text("发表")')
  449. if await publish_btn.count():
  450. print(f"[{self.platform_name}] 点击发布按钮...")
  451. await publish_btn.click()
  452. # 等待跳转到作品列表页面 - 参考 matrix
  453. await self.page.wait_for_url(
  454. "https://channels.weixin.qq.com/platform/post/list",
  455. timeout=10000
  456. )
  457. self.report_progress(100, "发布成功")
  458. print(f"[{self.platform_name}] 视频发布成功!")
  459. screenshot_base64 = await self.capture_screenshot()
  460. return PublishResult(
  461. success=True,
  462. platform=self.platform_name,
  463. message="发布成功",
  464. screenshot_base64=screenshot_base64,
  465. page_url=self.page.url,
  466. status='success'
  467. )
  468. except Exception as e:
  469. current_url = self.page.url
  470. if "https://channels.weixin.qq.com/platform/post/list" in current_url:
  471. self.report_progress(100, "发布成功")
  472. print(f"[{self.platform_name}] 视频发布成功!")
  473. screenshot_base64 = await self.capture_screenshot()
  474. return PublishResult(
  475. success=True,
  476. platform=self.platform_name,
  477. message="发布成功",
  478. screenshot_base64=screenshot_base64,
  479. page_url=current_url,
  480. status='success'
  481. )
  482. else:
  483. print(f"[{self.platform_name}] 视频正在发布中... {i+1}/30, URL: {current_url}")
  484. await asyncio.sleep(1)
  485. # 发布超时
  486. screenshot_base64 = await self.capture_screenshot()
  487. page_url = await self.get_page_url()
  488. return PublishResult(
  489. success=False,
  490. platform=self.platform_name,
  491. error="发布超时,请检查发布状态",
  492. screenshot_base64=screenshot_base64,
  493. page_url=page_url,
  494. status='need_action'
  495. )
  496. async def get_works(self, cookies: str, page: int = 0, page_size: int = 20) -> WorksResult:
  497. print(f"1111111111111111111")
  498. """获取视频号作品列表"""
  499. print(f"\n{'='*60}")
  500. print(f"[{self.platform_name}] 获取作品列表")
  501. print(f"[{self.platform_name}] page={page}, page_size={page_size}")
  502. print(f"{'='*60}")
  503. works: List[WorkItem] = []
  504. total = 0
  505. has_more = False
  506. try:
  507. await self.init_browser()
  508. cookie_list = self.parse_cookies(cookies)
  509. await self.set_cookies(cookie_list)
  510. if not self.page:
  511. raise Exception("Page not initialized")
  512. # 访问视频号创作者中心
  513. await self.page.goto("https://channels.weixin.qq.com/platform/post/list")
  514. await asyncio.sleep(5)
  515. print(f"1111111111111111")
  516. # 检查登录状态
  517. current_url = self.page.url
  518. if "login" in current_url:
  519. print(f"2111111111111111")
  520. raise Exception("Cookie 已过期,请重新登录")
  521. # 视频号使用页面爬取方式获取作品列表
  522. # 等待作品列表加载(增加等待时间,并添加截图调试)
  523. try:
  524. await self.page.wait_for_selector('div.post-feed-item', timeout=15000)
  525. except:
  526. # 超时后打印当前 URL 和截图
  527. current_url = self.page.url
  528. print(f"[{self.platform_name}] 等待超时,当前 URL: {current_url}")
  529. # 截图保存
  530. screenshot_path = f"weixin_timeout_{int(asyncio.get_event_loop().time())}.png"
  531. await self.page.screenshot(path=screenshot_path)
  532. print(f"[{self.platform_name}] 截图已保存: {screenshot_path}")
  533. raise Exception(f"页面加载超时,当前 URL: {current_url}")
  534. # 打印 DOM 结构
  535. page_html = await self.page.content()
  536. print(f"[{self.platform_name}] ========== 页面 DOM 开始 ==========")
  537. print(page_html[:5000]) # 打印前5000个字符
  538. print(f"[{self.platform_name}] ========== 页面 DOM 结束 ==========")
  539. # 获取所有作品项
  540. post_items = self.page.locator('div.post-feed-item')
  541. item_count = await post_items.count()
  542. print(f"[{self.platform_name}] 找到 {item_count} 个作品项")
  543. for i in range(min(item_count, page_size)):
  544. try:
  545. item = post_items.nth(i)
  546. # 获取封面
  547. cover_el = item.locator('div.media img.thumb').first
  548. cover_url = ''
  549. if await cover_el.count() > 0:
  550. cover_url = await cover_el.get_attribute('src') or ''
  551. # 获取标题
  552. title_el = item.locator('div.post-title').first
  553. title = ''
  554. if await title_el.count() > 0:
  555. title = await title_el.text_content() or ''
  556. title = title.strip()
  557. # 获取发布时间
  558. time_el = item.locator('div.post-time span').first
  559. publish_time = ''
  560. if await time_el.count() > 0:
  561. publish_time = await time_el.text_content() or ''
  562. publish_time = publish_time.strip()
  563. # 获取统计数据
  564. import re
  565. data_items = item.locator('div.post-data div.data-item')
  566. data_count = await data_items.count()
  567. play_count = 0
  568. like_count = 0
  569. comment_count = 0
  570. share_count = 0
  571. collect_count = 0
  572. for j in range(data_count):
  573. data_item = data_items.nth(j)
  574. count_text = await data_item.locator('span.count').text_content() or '0'
  575. count_text = count_text.strip()
  576. # 判断图标类型
  577. if await data_item.locator('span.weui-icon-outlined-eyes-on').count() > 0:
  578. # 播放量
  579. play_count = self._parse_count(count_text)
  580. elif await data_item.locator('span.weui-icon-outlined-like').count() > 0:
  581. # 点赞
  582. like_count = self._parse_count(count_text)
  583. elif await data_item.locator('span.weui-icon-outlined-comment').count() > 0:
  584. # 评论
  585. comment_count = self._parse_count(count_text)
  586. elif await data_item.locator('use[xlink\\:href="#icon-share"]').count() > 0:
  587. # 分享
  588. share_count = self._parse_count(count_text)
  589. elif await data_item.locator('use[xlink\\:href="#icon-thumb"]').count() > 0:
  590. # 收藏
  591. collect_count = self._parse_count(count_text)
  592. # 生成临时 work_id
  593. work_id = f"weixin_{i}_{hash(title)}_{hash(publish_time)}"
  594. works.append(WorkItem(
  595. work_id=work_id,
  596. title=title or '无标题',
  597. cover_url=cover_url,
  598. duration=0,
  599. status='published',
  600. publish_time=publish_time,
  601. play_count=play_count,
  602. like_count=like_count,
  603. comment_count=comment_count,
  604. share_count=share_count,
  605. collect_count=collect_count,
  606. ))
  607. except Exception as e:
  608. print(f"[{self.platform_name}] 解析作品 {i} 失败: {e}")
  609. import traceback
  610. traceback.print_exc()
  611. continue
  612. total = len(works)
  613. has_more = item_count > page_size
  614. print(f"[{self.platform_name}] 获取到 {total} 个作品")
  615. except Exception as e:
  616. import traceback
  617. traceback.print_exc()
  618. return WorksResult(success=False, platform=self.platform_name, error=str(e))
  619. return WorksResult(success=True, platform=self.platform_name, works=works, total=total, has_more=has_more)
  620. async def get_comments(self, cookies: str, work_id: str, cursor: str = "") -> CommentsResult:
  621. """获取视频号作品评论"""
  622. print(f"\n{'='*60}")
  623. print(f"[{self.platform_name}] 获取作品评论")
  624. print(f"[{self.platform_name}] work_id={work_id}")
  625. print(f"{'='*60}")
  626. comments: List[CommentItem] = []
  627. total = 0
  628. has_more = False
  629. try:
  630. await self.init_browser()
  631. cookie_list = self.parse_cookies(cookies)
  632. await self.set_cookies(cookie_list)
  633. if not self.page:
  634. raise Exception("Page not initialized")
  635. # 访问评论管理页面
  636. await self.page.goto("https://channels.weixin.qq.com/platform/interaction/comment")
  637. await asyncio.sleep(3)
  638. # 检查登录状态
  639. current_url = self.page.url
  640. if "login" in current_url:
  641. raise Exception("Cookie 已过期,请重新登录")
  642. # 等待左侧作品列表加载
  643. try:
  644. await self.page.wait_for_selector('div.comment-feed-wrap', timeout=15000)
  645. except:
  646. print(f"[{self.platform_name}] 未找到作品列表")
  647. return CommentsResult(success=True, platform=self.platform_name, work_id=work_id, comments=[], total=0, has_more=False)
  648. print(f"[{self.platform_name}] 查找 work_id={work_id} 对应的作品")
  649. # 点击左侧作品项(根据 work_id 匹配)
  650. feed_items = self.page.locator('div.comment-feed-wrap')
  651. item_count = await feed_items.count()
  652. print(f"[{self.platform_name}] 左侧共 {item_count} 个作品")
  653. clicked = False
  654. for i in range(item_count):
  655. feed = feed_items.nth(i)
  656. title_el = feed.locator('div.feed-title').first
  657. if await title_el.count() > 0:
  658. title_text = await title_el.text_content() or ''
  659. title_text = title_text.strip()
  660. # 检查是否包含 work_id(标题)
  661. if work_id in title_text or title_text in work_id:
  662. print(f"[{self.platform_name}] 找到匹配作品: {title_text}")
  663. await feed.click()
  664. await asyncio.sleep(2)
  665. clicked = True
  666. break
  667. if not clicked:
  668. # 如果没找到匹配的,点击第一个
  669. print(f"[{self.platform_name}] 未找到匹配作品,点击第一个")
  670. if item_count > 0:
  671. await feed_items.nth(0).click()
  672. await asyncio.sleep(2)
  673. else:
  674. return CommentsResult(success=True, platform=self.platform_name, work_id=work_id, comments=[], total=0, has_more=False)
  675. # 等待右侧评论详情加载
  676. try:
  677. await self.page.wait_for_selector('div.comment-item', timeout=5000)
  678. except:
  679. print(f"[{self.platform_name}] 该作品暂无评论")
  680. return CommentsResult(success=True, platform=self.platform_name, work_id=work_id, comments=[], total=0, has_more=False)
  681. # 获取评论总数
  682. total_text_el = self.page.locator('div.comment-count__tips')
  683. if await total_text_el.count() > 0:
  684. total_text = await total_text_el.text_content() or ''
  685. # 提取数字(如 "共 1 条评论")
  686. import re
  687. match = re.search(r'(\d+)', total_text)
  688. if match:
  689. total = int(match.group(1))
  690. print(f"[{self.platform_name}] 评论总数: {total}")
  691. # 获取右侧评论列表
  692. comment_items = self.page.locator('div.comment-item')
  693. item_count = await comment_items.count()
  694. print(f"[{self.platform_name}] 当前加载 {item_count} 条评论")
  695. for i in range(item_count):
  696. try:
  697. item = comment_items.nth(i)
  698. # 获取作者昵称(加 .first 防 strict mode)
  699. author_name = ''
  700. name_el = item.locator('span.comment-user-name').first
  701. if await name_el.count() > 0:
  702. author_name = await name_el.text_content() or ''
  703. author_name = author_name.strip()
  704. # 获取头像
  705. author_avatar = ''
  706. avatar_el = item.locator('img.comment-avatar').first
  707. if await avatar_el.count() > 0:
  708. author_avatar = await avatar_el.get_attribute('src') or ''
  709. # 获取评论内容(加 .first 防 strict mode)
  710. content = ''
  711. content_el = item.locator('span.comment-content').first
  712. if await content_el.count() > 0:
  713. content = await content_el.text_content() or ''
  714. content = content.strip()
  715. # 获取评论时间(加 .first 防 strict mode)
  716. create_time = ''
  717. time_el = item.locator('span.comment-time').first
  718. if await time_el.count() > 0:
  719. create_time = await time_el.text_content() or ''
  720. create_time = create_time.strip()
  721. if not content:
  722. continue
  723. # 生成评论 ID
  724. comment_id = f"weixin_comment_{i}_{abs(hash(content))}"
  725. comments.append(CommentItem(
  726. comment_id=comment_id,
  727. work_id=work_id,
  728. content=content,
  729. author_id='',
  730. author_name=author_name,
  731. author_avatar=author_avatar,
  732. like_count=0,
  733. reply_count=0,
  734. create_time=create_time,
  735. ))
  736. print(f"[{self.platform_name}] 评论 {i+1}: {author_name} - {content[:20]}...")
  737. except Exception as e:
  738. print(f"[{self.platform_name}] 解析评论 {i} 失败: {e}")
  739. continue
  740. print(f"[{self.platform_name}] 成功获取 {len(comments)} 条评论")
  741. except Exception as e:
  742. import traceback
  743. traceback.print_exc()
  744. return CommentsResult(success=False, platform=self.platform_name, work_id=work_id, error=str(e))
  745. return CommentsResult(success=True, platform=self.platform_name, work_id=work_id, comments=comments, total=total, has_more=has_more)
  746. async def auto_reply_private_messages(self, cookies: str) -> dict:
  747. """自动回复私信 - 集成自 pw3.py"""
  748. print(f"\n{'='*60}")
  749. print(f"[{self.platform_name}] 开始自动回复私信")
  750. print(f"{'='*60}")
  751. try:
  752. await self.init_browser()
  753. cookie_list = self.parse_cookies(cookies)
  754. await self.set_cookies(cookie_list)
  755. if not self.page:
  756. raise Exception("Page not initialized")
  757. # 访问私信页面
  758. await self.page.goto("https://channels.weixin.qq.com/platform/private_msg", timeout=30000)
  759. await asyncio.sleep(3)
  760. # 检查登录状态
  761. current_url = self.page.url
  762. print(f"[{self.platform_name}] 当前 URL: {current_url}")
  763. if "login" in current_url:
  764. raise Exception("Cookie 已过期,请重新登录")
  765. # 等待私信页面加载(使用多个选择器容错)
  766. try:
  767. await self.page.wait_for_selector('.private-msg-list-header', timeout=15000)
  768. except:
  769. # 尝试其他选择器
  770. try:
  771. await self.page.wait_for_selector('.weui-desktop-tab__navs__inner', timeout=10000)
  772. print(f"[{self.platform_name}] 使用备用选择器加载成功")
  773. except:
  774. # 截图调试
  775. screenshot_path = f"weixin_private_msg_{int(asyncio.get_event_loop().time())}.png"
  776. await self.page.screenshot(path=screenshot_path)
  777. print(f"[{self.platform_name}] 页面加载失败,截图: {screenshot_path}")
  778. raise Exception(f"私信页面加载超时,当前 URL: {current_url}")
  779. print(f"[{self.platform_name}] 私信页面加载完成")
  780. # 处理两个 tab
  781. total_replied = 0
  782. for tab_name in ["打招呼消息", "私信"]:
  783. replied_count = await self._process_tab_sessions(tab_name)
  784. total_replied += replied_count
  785. print(f"[{self.platform_name}] 自动回复完成,共回复 {total_replied} 条消息")
  786. return {
  787. 'success': True,
  788. 'platform': self.platform_name,
  789. 'replied_count': total_replied,
  790. 'message': f'成功回复 {total_replied} 条私信'
  791. }
  792. except Exception as e:
  793. import traceback
  794. traceback.print_exc()
  795. return {
  796. 'success': False,
  797. 'platform': self.platform_name,
  798. 'error': str(e)
  799. }
  800. async def _process_tab_sessions(self, tab_name: str) -> int:
  801. """处理指定 tab 下的所有会话"""
  802. print(f"\n🔄 正在处理「{tab_name}」中的所有会话...")
  803. if not self.page:
  804. return 0
  805. replied_count = 0
  806. try:
  807. # 点击 tab
  808. if tab_name == "私信":
  809. tab_link = self.page.locator('.weui-desktop-tab__navs__inner li').first.locator('a')
  810. elif tab_name == "打招呼消息":
  811. tab_link = self.page.locator('.weui-desktop-tab__navs__inner li').nth(1).locator('a')
  812. else:
  813. return 0
  814. if await tab_link.is_visible():
  815. await tab_link.click()
  816. print(f" ➤ 已点击「{tab_name}」tab")
  817. else:
  818. print(f" ❌ 「{tab_name}」tab 不可见")
  819. return 0
  820. # 等待会话列表加载
  821. try:
  822. await self.page.wait_for_function("""
  823. () => {
  824. const hasSession = document.querySelectorAll('.session-wrap').length > 0;
  825. const hasEmpty = !!document.querySelector('.empty-text');
  826. return hasSession || hasEmpty;
  827. }
  828. """, timeout=8000)
  829. print(" ✅ 会话列表区域已加载")
  830. except:
  831. print(" ⚠️ 等待会话列表超时,继续尝试读取...")
  832. # 获取会话
  833. session_wraps = self.page.locator('.session-wrap')
  834. session_count = await session_wraps.count()
  835. print(f" 💬 共找到 {session_count} 个会话")
  836. if session_count == 0:
  837. return 0
  838. # 遍历每个会话
  839. for idx in range(session_count):
  840. try:
  841. current_sessions = self.page.locator('.session-wrap')
  842. if idx >= await current_sessions.count():
  843. break
  844. session = current_sessions.nth(idx)
  845. user_name = await session.locator('.name').inner_text()
  846. last_preview = await session.locator('.feed-info').inner_text()
  847. print(f"\n ➤ [{idx+1}/{session_count}] 正在处理: {user_name} | 最后消息: {last_preview}")
  848. await session.click()
  849. await asyncio.sleep(2)
  850. # 提取聊天历史
  851. history = await self._extract_chat_history()
  852. need_reply = (not history) or (not history[-1]["is_author"])
  853. if need_reply:
  854. reply_text = await self._generate_reply_with_ai(history)
  855. if reply_text=="":
  856. reply_text = self._generate_reply(history)
  857. # # 生成回复
  858. # if history and history[-1]["is_author"]:
  859. # reply_text = await self._generate_reply_with_ai(history)
  860. # else:
  861. # reply_text = self._generate_reply(history)
  862. if reply_text:
  863. print(f" 📝 回复内容: {reply_text}")
  864. try:
  865. textarea = self.page.locator('.edit_area').first
  866. send_btn = self.page.locator('button:has-text("发送")').first
  867. if await textarea.is_visible() and await send_btn.is_visible():
  868. await textarea.fill(reply_text)
  869. await asyncio.sleep(0.5)
  870. await send_btn.click()
  871. print(" ✅ 已发送")
  872. replied_count += 1
  873. await asyncio.sleep(1.5)
  874. else:
  875. print(" ❌ 输入框或发送按钮不可见")
  876. except Exception as e:
  877. print(f" ❌ 发送失败: {e}")
  878. else:
  879. print(" ➤ 无需回复")
  880. else:
  881. print(" ➤ 最后一条是我发的,跳过回复")
  882. except Exception as e:
  883. print(f" ❌ 处理会话 {idx+1} 时出错: {e}")
  884. continue
  885. except Exception as e:
  886. print(f"❌ 处理「{tab_name}」失败: {e}")
  887. return replied_count
  888. async def _extract_chat_history(self) -> list:
  889. """精准提取聊天记录,区分作者(自己)和用户"""
  890. if not self.page:
  891. return []
  892. history = []
  893. message_wrappers = self.page.locator('.session-content-wrapper > div:not(.footer) > .text-wrapper')
  894. count = await message_wrappers.count()
  895. for i in range(count):
  896. try:
  897. wrapper = message_wrappers.nth(i)
  898. # 判断方向
  899. is_right = await wrapper.locator('.content-right').count() > 0
  900. is_left = await wrapper.locator('.content-left').count() > 0
  901. if not (is_left or is_right):
  902. continue
  903. # 提取消息文本
  904. pre_el = wrapper.locator('pre.message-plain')
  905. content = ''
  906. if await pre_el.count() > 0:
  907. content = await pre_el.inner_text()
  908. content = content.strip()
  909. if not content:
  910. continue
  911. # 获取头像
  912. avatar_img = wrapper.locator('.avatar').first
  913. avatar_src = ''
  914. if await avatar_img.count() > 0:
  915. avatar_src = await avatar_img.get_attribute("src") or ''
  916. # 右侧 = 作者(自己)
  917. is_author = is_right
  918. # 获取用户名
  919. if is_left:
  920. name_el = wrapper.locator('.profile .name')
  921. author_name = '用户'
  922. if await name_el.count() > 0:
  923. author_name = await name_el.inner_text()
  924. else:
  925. author_name = "我"
  926. history.append({
  927. "author": author_name,
  928. "content": content,
  929. "is_author": is_author,
  930. "avatar": avatar_src
  931. })
  932. except Exception as e:
  933. print(f" ⚠️ 解析第 {i+1} 条消息失败: {e}")
  934. continue
  935. return history
  936. async def _generate_reply_with_ai(self, chat_history: list) -> str:
  937. """使用 AI 生成智能回复"""
  938. import requests
  939. import json
  940. try:
  941. # 获取 AI 配置
  942. ai_api_key = os.environ.get('DASHSCOPE_API_KEY', '')
  943. ai_base_url = os.environ.get('DASHSCOPE_BASE_URL', 'https://dashscope.aliyuncs.com/compatible-mode/v1')
  944. ai_model = os.environ.get('AI_MODEL', 'qwen-plus')
  945. if not ai_api_key:
  946. print("⚠️ 未配置 AI API Key,使用规则回复")
  947. return self._generate_reply(chat_history)
  948. # 构建对话上下文
  949. messages = [{"role": "system", "content": "你是一个友好的微信视频号创作者助手,负责回复粉丝私信。请保持简洁、友好、专业的语气。回复长度不超过20字。"}]
  950. for msg in chat_history:
  951. role = "assistant" if msg["is_author"] else "user"
  952. messages.append({
  953. "role": role,
  954. "content": msg["content"]
  955. })
  956. # 调用 AI API
  957. headers = {
  958. 'Authorization': f'Bearer {ai_api_key}',
  959. 'Content-Type': 'application/json'
  960. }
  961. payload = {
  962. "model": ai_model,
  963. "messages": messages,
  964. "max_tokens": 150,
  965. "temperature": 0.8
  966. }
  967. print(" 🤖 正在调用 AI 生成回复...")
  968. response = requests.post(
  969. f"{ai_base_url}/chat/completions",
  970. headers=headers,
  971. json=payload,
  972. timeout=30
  973. )
  974. if response.status_code != 200:
  975. print(f" ⚠️ AI API 返回错误 {response.status_code},使用规则回复")
  976. return self._generate_reply(chat_history)
  977. result = response.json()
  978. ai_reply = result.get('choices', [{}])[0].get('message', {}).get('content', '').strip()
  979. if ai_reply:
  980. print(f" ✅ AI 生成回复: {ai_reply}")
  981. return ai_reply
  982. else:
  983. print(" ⚠️ AI 返回空内容,使用规则回复")
  984. return self._generate_reply(chat_history)
  985. except Exception as e:
  986. print(f" ⚠️ AI 回复生成失败: {e},使用规则回复")
  987. return self._generate_reply(chat_history)
  988. def _generate_reply(self, chat_history: list) -> str:
  989. """根据完整聊天历史生成回复(规则回复方式)"""
  990. if not chat_history:
  991. return "你好!感谢联系~"
  992. # 检查最后一条是否是作者发的
  993. if chat_history[-1]["is_author"]:
  994. return "" # 不回复
  995. # 找最后一条用户消息
  996. last_user_msg = chat_history[-1]["content"]
  997. # 简单规则回复
  998. if "谢谢" in last_user_msg or "感谢" in last_user_msg:
  999. return "不客气!欢迎常来交流~"
  1000. elif "你好" in last_user_msg or "在吗" in last_user_msg:
  1001. return "你好!请问有什么可以帮您的?"
  1002. elif "视频" in last_user_msg or "怎么拍" in last_user_msg:
  1003. return "视频是用手机拍摄的,注意光线和稳定哦!"
  1004. else:
  1005. return "收到!我会认真阅读您的留言~"