weixin.py 50 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167
  1. # -*- coding: utf-8 -*-
  2. """
  3. 微信视频号发布器
  4. 参考: matrix/tencent_uploader/main.py
  5. """
  6. import asyncio
  7. import os
  8. from datetime import datetime
  9. from typing import List
  10. from .base import (
  11. BasePublisher, PublishParams, PublishResult,
  12. WorkItem, WorksResult, CommentItem, CommentsResult
  13. )
  14. # 允许通过环境变量手动指定“上传视频入口”的选择器,便于在页面结构频繁变更时快速调整
  15. WEIXIN_UPLOAD_SELECTOR = os.environ.get("WEIXIN_UPLOAD_SELECTOR", "").strip()
  16. def format_short_title(origin_title: str) -> str:
  17. """
  18. 格式化短标题
  19. - 移除特殊字符
  20. - 长度限制在 6-16 字符
  21. """
  22. allowed_special_chars = "《》"":+?%°"
  23. filtered_chars = [
  24. char if char.isalnum() or char in allowed_special_chars
  25. else ' ' if char == ',' else ''
  26. for char in origin_title
  27. ]
  28. formatted_string = ''.join(filtered_chars)
  29. if len(formatted_string) > 16:
  30. formatted_string = formatted_string[:16]
  31. elif len(formatted_string) < 6:
  32. formatted_string += ' ' * (6 - len(formatted_string))
  33. return formatted_string
  34. class WeixinPublisher(BasePublisher):
  35. """
  36. 微信视频号发布器
  37. 使用 Playwright 自动化操作视频号创作者中心
  38. 注意: 需要使用 Chrome 浏览器,否则可能出现 H264 编码错误
  39. """
  40. platform_name = "weixin"
  41. login_url = "https://channels.weixin.qq.com/platform"
  42. publish_url = "https://channels.weixin.qq.com/platform/post/create"
  43. # 视频号域名为 channels.weixin.qq.com,cookie 常见 domain 为 .qq.com / .weixin.qq.com 等
  44. # 这里默认用更宽泛的 .qq.com,避免“字符串 cookie”场景下 domain 兜底不生效
  45. cookie_domain = ".qq.com"
  46. async def ai_find_upload_selector(self, frame_html: str, frame_name: str = "main") -> str:
  47. """
  48. 使用 AI 从 HTML 中识别“上传视频/选择文件”相关元素的 CSS 选择器。
  49. 设计思路:
  50. - 仅在常规 DOM 选择器都失败时调用,避免频繁占用 AI 配额;
  51. - 通过 DashScope 文本模型(与验证码识别同一套配置)分析 HTML;
  52. - 返回一个适合用于 frame.locator(selector) 的 CSS 选择器。
  53. """
  54. import json
  55. import re
  56. import requests
  57. import os
  58. # 避免 HTML 过长导致 token 超限,只截取前 N 字符
  59. if not frame_html:
  60. return ""
  61. max_len = 20000
  62. if len(frame_html) > max_len:
  63. frame_html = frame_html[:max_len]
  64. ai_api_key = os.environ.get("DASHSCOPE_API_KEY", "")
  65. ai_base_url = os.environ.get("DASHSCOPE_BASE_URL", "https://dashscope.aliyuncs.com/compatible-mode/v1")
  66. ai_text_model = os.environ.get("AI_TEXT_MODEL", "qwen-plus")
  67. if not ai_api_key:
  68. print(f"[{self.platform_name}] AI上传入口识别: 未配置 AI API Key,跳过")
  69. return ""
  70. prompt = f"""
  71. 你是熟悉微信视频号后台的前端工程师,现在需要在一段 HTML 中找到“上传视频文件”的入口。
  72. 页面说明:
  73. - 平台:微信视频号(channels.weixin.qq.com)
  74. - 目标:用于上传视频文件的按钮或 input(一般会触发文件选择框)
  75. - 你会收到某个 frame 的完整 HTML 片段(不包含截图)。
  76. 请你根据下面的 HTML,推断最适合用于上传视频文件的元素,并输出一个可以被 Playwright 使用的 CSS 选择器。
  77. 要求:
  78. 1. 只考虑“上传/选择视频文件”的入口,不要返回“发布/发表/下一步”等按钮;
  79. 2. 选择器需要尽量稳定,不要使用自动生成的随机类名(例如带很多随机字母/数字的类名可以用前缀匹配);
  80. 3. 选择器必须是 CSS 选择器(不要返回 XPath);
  81. 4. 如果确实找不到合理的上传入口,返回 selector 为空字符串。
  82. 请以 JSON 格式输出,严格遵守以下结构(不要添加任何解释文字):
  83. ```json
  84. {{
  85. "selector": "CSS 选择器字符串,比如:input[type='file'] 或 div.upload-content input[type='file']"
  86. }}
  87. ```
  88. 下面是 frame=\"{frame_name}\" 的 HTML:
  89. ```html
  90. {frame_html}
  91. ```"""
  92. payload = {
  93. "model": ai_text_model,
  94. "messages": [
  95. {
  96. "role": "user",
  97. "content": prompt,
  98. }
  99. ],
  100. "max_tokens": 600,
  101. }
  102. headers = {
  103. "Authorization": f"Bearer {ai_api_key}",
  104. "Content-Type": "application/json",
  105. }
  106. try:
  107. print(f"[{self.platform_name}] AI上传入口识别: 正在分析 frame={frame_name} HTML...")
  108. resp = requests.post(
  109. f"{ai_base_url}/chat/completions",
  110. headers=headers,
  111. json=payload,
  112. timeout=40,
  113. )
  114. if resp.status_code != 200:
  115. print(f"[{self.platform_name}] AI上传入口识别: API 返回错误 {resp.status_code}")
  116. return ""
  117. data = resp.json()
  118. content = data.get("choices", [{}])[0].get("message", {}).get("content", "") or ""
  119. # 尝试从 ```json``` 代码块中解析
  120. json_match = re.search(r"```json\\s*([\\s\\S]*?)\\s*```", content)
  121. if json_match:
  122. json_str = json_match.group(1)
  123. else:
  124. json_match = re.search(r"\\{[\\s\\S]*\\}", content)
  125. json_str = json_match.group(0) if json_match else "{}"
  126. try:
  127. result = json.loads(json_str)
  128. except Exception:
  129. result = {}
  130. selector = (result.get("selector") or "").strip()
  131. print(f"[{self.platform_name}] AI上传入口识别结果: selector='{selector}'")
  132. return selector
  133. except Exception as e:
  134. print(f"[{self.platform_name}] AI上传入口识别异常: {e}")
  135. return ""
  136. async def ai_pick_selector_from_candidates(self, candidates: list, goal: str, frame_name: str = "main") -> str:
  137. """
  138. 将“候选元素列表(包含 css selector + 文本/属性)”发给 AI,让 AI 直接挑选最符合 goal 的元素。
  139. 适用于:HTML 里看不出上传入口、或页面大量动态渲染时。
  140. """
  141. import json
  142. import re
  143. import requests
  144. import os
  145. if not candidates:
  146. return ""
  147. ai_api_key = os.environ.get("DASHSCOPE_API_KEY", "")
  148. ai_base_url = os.environ.get("DASHSCOPE_BASE_URL", "https://dashscope.aliyuncs.com/compatible-mode/v1")
  149. ai_text_model = os.environ.get("AI_TEXT_MODEL", "qwen-plus")
  150. if not ai_api_key:
  151. print(f"[{self.platform_name}] AI候选选择器: 未配置 AI API Key,跳过")
  152. return ""
  153. # 控制长度,最多取前 120 个候选
  154. candidates = candidates[:120]
  155. prompt = f"""
  156. 你是自动化发布工程师。现在要在微信视频号(channels.weixin.qq.com)发布页面里找到“{goal}”相关的入口元素。
  157. 我会给你一组候选元素,每个候选都包含:
  158. - css: 可直接用于 Playwright 的 CSS 选择器
  159. - tag / type / role / ariaLabel / text / id / className(部分字段可能为空)
  160. 你的任务:
  161. - 从候选中选出最可能用于“{goal}”的元素,返回它的 css 选择器;
  162. - 如果没有任何候选符合,返回空字符串。
  163. 注意:
  164. - 如果 goal 是“上传视频入口”,优先选择 input[type=file] 或看起来会触发选择文件/上传的区域;
  165. - 不要选择“发布/发表/下一步”等按钮(除非 goal 明确是发布按钮)。
  166. 请严格按 JSON 输出(不要解释):
  167. ```json
  168. {{ "selector": "..." }}
  169. ```
  170. 候选列表(frame={frame_name}):
  171. ```json
  172. {json.dumps(candidates, ensure_ascii=False)}
  173. ```"""
  174. payload = {
  175. "model": ai_text_model,
  176. "messages": [{"role": "user", "content": prompt}],
  177. "max_tokens": 400,
  178. }
  179. headers = {
  180. "Authorization": f"Bearer {ai_api_key}",
  181. "Content-Type": "application/json",
  182. }
  183. try:
  184. print(f"[{self.platform_name}] AI候选选择器: 正在分析 frame={frame_name}, goal={goal} ...")
  185. resp = requests.post(
  186. f"{ai_base_url}/chat/completions",
  187. headers=headers,
  188. json=payload,
  189. timeout=40,
  190. )
  191. if resp.status_code != 200:
  192. print(f"[{self.platform_name}] AI候选选择器: API 返回错误 {resp.status_code}")
  193. return ""
  194. data = resp.json()
  195. content = data.get("choices", [{}])[0].get("message", {}).get("content", "") or ""
  196. json_match = re.search(r"```json\\s*([\\s\\S]*?)\\s*```", content)
  197. if json_match:
  198. json_str = json_match.group(1)
  199. else:
  200. json_match = re.search(r"\\{[\\s\\S]*\\}", content)
  201. json_str = json_match.group(0) if json_match else "{}"
  202. try:
  203. result = json.loads(json_str)
  204. except Exception:
  205. result = {}
  206. selector = (result.get("selector") or "").strip()
  207. print(f"[{self.platform_name}] AI候选选择器结果: selector='{selector}'")
  208. return selector
  209. except Exception as e:
  210. print(f"[{self.platform_name}] AI候选选择器异常: {e}")
  211. return ""
  212. async def _extract_relevant_html_snippets(self, html: str) -> str:
  213. """
  214. 从 HTML 中抽取与上传相关的片段,减少 token,提升 AI 命中率。
  215. - 优先抓取包含 upload/上传/file/input 等关键词的窗口片段
  216. - 若未命中关键词,返回“开头 + 结尾”的拼接
  217. """
  218. import re
  219. if not html:
  220. return ""
  221. patterns = [
  222. r"upload",
  223. r"uploader",
  224. r"file",
  225. r"type\\s*=\\s*['\\\"]file['\\\"]",
  226. r"input",
  227. r"drag",
  228. r"drop",
  229. r"选择",
  230. r"上传",
  231. r"添加",
  232. r"视频",
  233. ]
  234. regex = re.compile("|".join(patterns), re.IGNORECASE)
  235. snippets = []
  236. for m in regex.finditer(html):
  237. start = max(0, m.start() - 350)
  238. end = min(len(html), m.end() + 350)
  239. snippets.append(html[start:end])
  240. if len(snippets) >= 18:
  241. break
  242. if snippets:
  243. # 去重(粗略)
  244. unique = []
  245. seen = set()
  246. for s in snippets:
  247. key = hash(s)
  248. if key not in seen:
  249. seen.add(key)
  250. unique.append(s)
  251. return "\n\n<!-- SNIPPET -->\n\n".join(unique)[:20000]
  252. # fallback: head + tail
  253. head = html[:9000]
  254. tail = html[-9000:] if len(html) > 9000 else ""
  255. return (head + "\n\n<!-- TAIL -->\n\n" + tail)[:20000]
  256. async def init_browser(self, storage_state: str = None):
  257. """初始化浏览器 - 参考 matrix 使用 channel=chrome 避免 H264 编码错误"""
  258. from playwright.async_api import async_playwright
  259. playwright = await async_playwright().start()
  260. # 参考 matrix: 使用系统内的 Chrome 浏览器,避免 H264 编码错误
  261. # 如果没有安装 Chrome,则使用默认 Chromium
  262. try:
  263. self.browser = await playwright.chromium.launch(
  264. headless=self.headless,
  265. channel="chrome" # 使用系统 Chrome
  266. )
  267. print(f"[{self.platform_name}] 使用系统 Chrome 浏览器")
  268. except Exception as e:
  269. print(f"[{self.platform_name}] Chrome 不可用,使用 Chromium: {e}")
  270. self.browser = await playwright.chromium.launch(headless=self.headless)
  271. if storage_state and os.path.exists(storage_state):
  272. self.context = await self.browser.new_context(storage_state=storage_state)
  273. else:
  274. self.context = await self.browser.new_context()
  275. self.page = await self.context.new_page()
  276. return self.page
  277. async def set_schedule_time(self, publish_date: datetime):
  278. """设置定时发布"""
  279. if not self.page:
  280. return
  281. print(f"[{self.platform_name}] 设置定时发布...")
  282. # 点击定时选项
  283. label_element = self.page.locator("label").filter(has_text="定时").nth(1)
  284. await label_element.click()
  285. # 选择日期
  286. await self.page.click('input[placeholder="请选择发表时间"]')
  287. publish_month = f"{publish_date.month:02d}"
  288. current_month = f"{publish_month}月"
  289. # 检查月份
  290. page_month = await self.page.inner_text('span.weui-desktop-picker__panel__label:has-text("月")')
  291. if page_month != current_month:
  292. await self.page.click('button.weui-desktop-btn__icon__right')
  293. # 选择日期
  294. elements = await self.page.query_selector_all('table.weui-desktop-picker__table a')
  295. for element in elements:
  296. class_name = await element.evaluate('el => el.className')
  297. if 'weui-desktop-picker__disabled' in class_name:
  298. continue
  299. text = await element.inner_text()
  300. if text.strip() == str(publish_date.day):
  301. await element.click()
  302. break
  303. # 输入时间
  304. await self.page.click('input[placeholder="请选择时间"]')
  305. await self.page.keyboard.press("Control+KeyA")
  306. await self.page.keyboard.type(str(publish_date.hour))
  307. # 点击其他地方确认
  308. await self.page.locator("div.input-editor").click()
  309. async def handle_upload_error(self, video_path: str):
  310. """处理上传错误"""
  311. if not self.page:
  312. return
  313. print(f"[{self.platform_name}] 视频出错了,重新上传中...")
  314. await self.page.locator('div.media-status-content div.tag-inner:has-text("删除")').click()
  315. await self.page.get_by_role('button', name="删除", exact=True).click()
  316. file_input = self.page.locator('input[type="file"]')
  317. await file_input.set_input_files(video_path)
  318. async def add_title_tags(self, params: PublishParams):
  319. """添加标题和话题"""
  320. if not self.page:
  321. return
  322. await self.page.locator("div.input-editor").click()
  323. await self.page.keyboard.type(params.title)
  324. if params.tags:
  325. await self.page.keyboard.press("Enter")
  326. for tag in params.tags:
  327. await self.page.keyboard.type("#" + tag)
  328. await self.page.keyboard.press("Space")
  329. print(f"[{self.platform_name}] 成功添加标题和 {len(params.tags)} 个话题")
  330. async def add_short_title(self):
  331. """添加短标题"""
  332. if not self.page:
  333. return
  334. try:
  335. short_title_element = self.page.get_by_text("短标题", exact=True).locator("..").locator(
  336. "xpath=following-sibling::div").locator('span input[type="text"]')
  337. if await short_title_element.count():
  338. # 获取已有内容作为短标题
  339. pass
  340. except:
  341. pass
  342. async def upload_cover(self, cover_path: str):
  343. """上传封面图"""
  344. if not self.page or not cover_path or not os.path.exists(cover_path):
  345. return
  346. try:
  347. await asyncio.sleep(2)
  348. preview_btn_info = await self.page.locator(
  349. 'div.finder-tag-wrap.btn:has-text("更换封面")').get_attribute('class')
  350. if "disabled" not in preview_btn_info:
  351. await self.page.locator('div.finder-tag-wrap.btn:has-text("更换封面")').click()
  352. await self.page.locator('div.single-cover-uploader-wrap > div.wrap').hover()
  353. # 删除现有封面
  354. if await self.page.locator(".del-wrap > .svg-icon").count():
  355. await self.page.locator(".del-wrap > .svg-icon").click()
  356. # 上传新封面
  357. preview_div = self.page.locator("div.single-cover-uploader-wrap > div.wrap")
  358. async with self.page.expect_file_chooser() as fc_info:
  359. await preview_div.click()
  360. preview_chooser = await fc_info.value
  361. await preview_chooser.set_files(cover_path)
  362. await asyncio.sleep(2)
  363. await self.page.get_by_role("button", name="确定").click()
  364. await asyncio.sleep(1)
  365. await self.page.get_by_role("button", name="确认").click()
  366. print(f"[{self.platform_name}] 封面上传成功")
  367. except Exception as e:
  368. print(f"[{self.platform_name}] 封面上传失败: {e}")
  369. async def check_captcha(self) -> dict:
  370. """检查页面是否需要验证码"""
  371. if not self.page:
  372. return {'need_captcha': False, 'captcha_type': ''}
  373. try:
  374. # 检查各种验证码
  375. captcha_selectors = [
  376. 'text="请输入验证码"',
  377. 'text="滑动验证"',
  378. '[class*="captcha"]',
  379. '[class*="verify"]',
  380. ]
  381. for selector in captcha_selectors:
  382. try:
  383. if await self.page.locator(selector).count() > 0:
  384. print(f"[{self.platform_name}] 检测到验证码: {selector}")
  385. return {'need_captcha': True, 'captcha_type': 'image'}
  386. except:
  387. pass
  388. # 检查登录弹窗
  389. login_selectors = [
  390. 'text="请登录"',
  391. 'text="扫码登录"',
  392. '[class*="login-dialog"]',
  393. ]
  394. for selector in login_selectors:
  395. try:
  396. if await self.page.locator(selector).count() > 0:
  397. print(f"[{self.platform_name}] 检测到需要登录: {selector}")
  398. return {'need_captcha': True, 'captcha_type': 'login'}
  399. except:
  400. pass
  401. except Exception as e:
  402. print(f"[{self.platform_name}] 验证码检测异常: {e}")
  403. return {'need_captcha': False, 'captcha_type': ''}
  404. async def publish(self, cookies: str, params: PublishParams) -> PublishResult:
  405. """发布视频到视频号"""
  406. print(f"\n{'='*60}")
  407. print(f"[{self.platform_name}] 开始发布视频")
  408. print(f"[{self.platform_name}] 视频路径: {params.video_path}")
  409. print(f"[{self.platform_name}] 标题: {params.title}")
  410. print(f"[{self.platform_name}] Headless: {self.headless}")
  411. print(f"{'='*60}")
  412. self.report_progress(5, "正在初始化浏览器...")
  413. # 初始化浏览器(使用 Chrome)
  414. await self.init_browser()
  415. print(f"[{self.platform_name}] 浏览器初始化完成")
  416. # 解析并设置 cookies
  417. cookie_list = self.parse_cookies(cookies)
  418. print(f"[{self.platform_name}] 解析到 {len(cookie_list)} 个 cookies")
  419. await self.set_cookies(cookie_list)
  420. if not self.page:
  421. raise Exception("Page not initialized")
  422. # 检查视频文件
  423. if not os.path.exists(params.video_path):
  424. raise Exception(f"视频文件不存在: {params.video_path}")
  425. print(f"[{self.platform_name}] 视频文件存在,大小: {os.path.getsize(params.video_path)} bytes")
  426. self.report_progress(10, "正在打开上传页面...")
  427. # 访问上传页面
  428. await self.page.goto(self.publish_url, wait_until="domcontentloaded", timeout=60000)
  429. await asyncio.sleep(3)
  430. # 检查是否跳转到登录页
  431. current_url = self.page.url
  432. print(f"[{self.platform_name}] 当前页面: {current_url}")
  433. if "login" in current_url:
  434. screenshot_base64 = await self.capture_screenshot()
  435. return PublishResult(
  436. success=False,
  437. platform=self.platform_name,
  438. error="Cookie 已过期,需要重新登录",
  439. need_captcha=True,
  440. captcha_type='login',
  441. screenshot_base64=screenshot_base64,
  442. page_url=current_url,
  443. status='need_captcha'
  444. )
  445. # 使用 AI 检查验证码
  446. ai_captcha = await self.ai_check_captcha()
  447. if ai_captcha['has_captcha']:
  448. print(f"[{self.platform_name}] AI检测到验证码: {ai_captcha['captcha_type']}", flush=True)
  449. screenshot_base64 = await self.capture_screenshot()
  450. return PublishResult(
  451. success=False,
  452. platform=self.platform_name,
  453. error=f"检测到{ai_captcha['captcha_type']}验证码,需要使用有头浏览器完成验证",
  454. need_captcha=True,
  455. captcha_type=ai_captcha['captcha_type'],
  456. screenshot_base64=screenshot_base64,
  457. page_url=current_url,
  458. status='need_captcha'
  459. )
  460. # 传统方式检查验证码
  461. captcha_result = await self.check_captcha()
  462. if captcha_result['need_captcha']:
  463. screenshot_base64 = await self.capture_screenshot()
  464. return PublishResult(
  465. success=False,
  466. platform=self.platform_name,
  467. error=f"需要{captcha_result['captcha_type']}验证码,请使用有头浏览器完成验证",
  468. need_captcha=True,
  469. captcha_type=captcha_result['captcha_type'],
  470. screenshot_base64=screenshot_base64,
  471. page_url=current_url,
  472. status='need_captcha'
  473. )
  474. self.report_progress(15, "正在选择视频文件...")
  475. # 上传视频
  476. # 说明:视频号发布页在不同账号/地区/灰度下 DOM 结构差异较大,且上传组件可能在 iframe 中。
  477. # 因此这里按 matrix 的思路“点击触发 file chooser”,同时增加“遍历全部 frame + 精确挑选 video input”的兜底。
  478. upload_success = False
  479. if not self.page:
  480. raise Exception("Page not initialized")
  481. # 等待页面把上传区域渲染出来(避免过早判断)
  482. try:
  483. await self.page.wait_for_selector("div.upload-content, input[type='file'], iframe", timeout=20000)
  484. except Exception:
  485. pass
  486. async def _try_set_files_in_frame(frame, frame_name: str) -> bool:
  487. """在指定 frame 中尝试触发上传"""
  488. nonlocal upload_success
  489. if upload_success:
  490. return True
  491. # 方法0:如果用户通过环境变量显式配置了选择器,优先尝试这个
  492. if WEIXIN_UPLOAD_SELECTOR:
  493. try:
  494. el = frame.locator(WEIXIN_UPLOAD_SELECTOR).first
  495. if await el.count() > 0 and await el.is_visible():
  496. print(f"[{self.platform_name}] [{frame_name}] 使用环境变量 WEIXIN_UPLOAD_SELECTOR: {WEIXIN_UPLOAD_SELECTOR}")
  497. try:
  498. async with self.page.expect_file_chooser(timeout=5000) as fc_info:
  499. await el.click()
  500. chooser = await fc_info.value
  501. await chooser.set_files(params.video_path)
  502. upload_success = True
  503. print(f"[{self.platform_name}] [{frame_name}] 通过环境变量选择器上传成功")
  504. return True
  505. except Exception as e:
  506. print(f"[{self.platform_name}] [{frame_name}] 环境变量选择器点击失败,尝试直接 set_input_files: {e}")
  507. try:
  508. await el.set_input_files(params.video_path)
  509. upload_success = True
  510. print(f"[{self.platform_name}] [{frame_name}] 环境变量选择器 set_input_files 成功")
  511. return True
  512. except Exception as e2:
  513. print(f"[{self.platform_name}] [{frame_name}] 环境变量选择器 set_input_files 仍失败: {e2}")
  514. except Exception as e:
  515. print(f"[{self.platform_name}] [{frame_name}] 使用环境变量选择器定位元素失败: {e}")
  516. # 先尝试点击上传区域触发 chooser(最贴近 matrix)
  517. click_selectors = [
  518. "div.upload-content",
  519. "div[class*='upload-content']",
  520. "div[class*='upload']",
  521. "div.add-wrap",
  522. "[class*='uploader']",
  523. "text=点击上传",
  524. "text=上传视频",
  525. "text=选择视频",
  526. ]
  527. for selector in click_selectors:
  528. try:
  529. el = frame.locator(selector).first
  530. if await el.count() > 0 and await el.is_visible():
  531. print(f"[{self.platform_name}] [{frame_name}] 找到可点击上传区域: {selector}")
  532. try:
  533. async with self.page.expect_file_chooser(timeout=5000) as fc_info:
  534. await el.click()
  535. chooser = await fc_info.value
  536. await chooser.set_files(params.video_path)
  537. upload_success = True
  538. print(f"[{self.platform_name}] [{frame_name}] 通过 file chooser 上传成功")
  539. return True
  540. except Exception as e:
  541. print(f"[{self.platform_name}] [{frame_name}] 点击触发 chooser 失败: {e}")
  542. except Exception:
  543. pass
  544. # 再尝试直接设置 input[type=file](iframe/隐藏 input 常见)
  545. try:
  546. inputs = frame.locator("input[type='file']")
  547. cnt = await inputs.count()
  548. if cnt > 0:
  549. best_idx = 0
  550. best_score = -1
  551. for i in range(cnt):
  552. try:
  553. inp = inputs.nth(i)
  554. accept = (await inp.get_attribute("accept")) or ""
  555. multiple = (await inp.get_attribute("multiple")) or ""
  556. score = 0
  557. if "video" in accept:
  558. score += 10
  559. if "mp4" in accept:
  560. score += 3
  561. if multiple:
  562. score += 1
  563. if score > best_score:
  564. best_score = score
  565. best_idx = i
  566. except Exception:
  567. continue
  568. target = inputs.nth(best_idx)
  569. print(f"[{self.platform_name}] [{frame_name}] 尝试对 input[{best_idx}] set_input_files (score={best_score})")
  570. await target.set_input_files(params.video_path)
  571. upload_success = True
  572. print(f"[{self.platform_name}] [{frame_name}] 通过 file input 上传成功")
  573. return True
  574. except Exception as e:
  575. print(f"[{self.platform_name}] [{frame_name}] file input 上传失败: {e}")
  576. # 不直接返回,让后面的 AI 兜底有机会执行
  577. # 方法4: 兜底使用 AI 分析 HTML,猜测上传入口
  578. try:
  579. frame_url = getattr(frame, "url", "")
  580. html_full = await frame.content()
  581. html_for_ai = await self._extract_relevant_html_snippets(html_full)
  582. print(f"[{self.platform_name}] [{frame_name}] frame_url={frame_url}, html_len={len(html_full)}, html_for_ai_len={len(html_for_ai)}")
  583. ai_selector = await self.ai_find_upload_selector(html_for_ai, frame_name=frame_name)
  584. if ai_selector:
  585. try:
  586. el = frame.locator(ai_selector).first
  587. if await el.count() > 0:
  588. print(f"[{self.platform_name}] [{frame_name}] 使用 AI 选择器点击上传入口: {ai_selector}")
  589. try:
  590. async with self.page.expect_file_chooser(timeout=5000) as fc_info:
  591. await el.click()
  592. chooser = await fc_info.value
  593. await chooser.set_files(params.video_path)
  594. upload_success = True
  595. print(f"[{self.platform_name}] [{frame_name}] 通过 AI 选择器上传成功")
  596. return True
  597. except Exception as e:
  598. print(f"[{self.platform_name}] [{frame_name}] AI 选择器点击失败,改为直接 set_input_files: {e}")
  599. try:
  600. await el.set_input_files(params.video_path)
  601. upload_success = True
  602. print(f"[{self.platform_name}] [{frame_name}] AI 选择器直接 set_input_files 成功")
  603. return True
  604. except Exception as e2:
  605. print(f"[{self.platform_name}] [{frame_name}] AI 选择器 set_input_files 仍失败: {e2}")
  606. except Exception as e:
  607. print(f"[{self.platform_name}] [{frame_name}] 使用 AI 选择器定位元素失败: {e}")
  608. else:
  609. # 如果 AI 无法从 HTML 推断,退一步:构造候选元素列表交给 AI 选择
  610. try:
  611. candidates = await frame.evaluate("""
  612. () => {
  613. function cssEscape(s) {
  614. try { return CSS.escape(s); } catch (e) { return s.replace(/[^a-zA-Z0-9_-]/g, '\\\\$&'); }
  615. }
  616. function buildSelector(el) {
  617. if (!el || el.nodeType !== 1) return '';
  618. if (el.id) return `#${cssEscape(el.id)}`;
  619. let parts = [];
  620. let cur = el;
  621. for (let depth = 0; cur && cur.nodeType === 1 && depth < 5; depth++) {
  622. let part = cur.tagName.toLowerCase();
  623. const role = cur.getAttribute('role');
  624. const type = cur.getAttribute('type');
  625. if (type) part += `[type="${type}"]`;
  626. if (role) part += `[role="${role}"]`;
  627. const cls = (cur.className || '').toString().trim().split(/\\s+/).filter(Boolean);
  628. if (cls.length) part += '.' + cls.slice(0, 2).map(cssEscape).join('.');
  629. // nth-of-type
  630. let idx = 1;
  631. let sib = cur;
  632. while (sib && (sib = sib.previousElementSibling)) {
  633. if (sib.tagName === cur.tagName) idx++;
  634. }
  635. part += `:nth-of-type(${idx})`;
  636. parts.unshift(part);
  637. cur = cur.parentElement;
  638. }
  639. return parts.join(' > ');
  640. }
  641. const nodes = Array.from(document.querySelectorAll('input, button, a, div, span'))
  642. .filter(el => {
  643. const tag = el.tagName.toLowerCase();
  644. const type = (el.getAttribute('type') || '').toLowerCase();
  645. const role = (el.getAttribute('role') || '').toLowerCase();
  646. const aria = (el.getAttribute('aria-label') || '').toLowerCase();
  647. const txt = (el.innerText || '').trim().slice(0, 60);
  648. const cls = (el.className || '').toString().toLowerCase();
  649. const isFile = tag === 'input' && type === 'file';
  650. const looksClickable =
  651. tag === 'button' || tag === 'a' || role === 'button' || el.onclick ||
  652. cls.includes('upload') || cls.includes('uploader') || cls.includes('drag') ||
  653. aria.includes('上传') || aria.includes('选择') || aria.includes('添加') ||
  654. txt.includes('上传') || txt.includes('选择') || txt.includes('添加') || txt.includes('点击上传');
  655. if (!isFile && !looksClickable) return false;
  656. const r = el.getBoundingClientRect();
  657. const visible = r.width > 5 && r.height > 5;
  658. return visible;
  659. });
  660. const limited = nodes.slice(0, 120).map(el => ({
  661. css: buildSelector(el),
  662. tag: el.tagName.toLowerCase(),
  663. type: el.getAttribute('type') || '',
  664. role: el.getAttribute('role') || '',
  665. ariaLabel: el.getAttribute('aria-label') || '',
  666. text: (el.innerText || '').trim().slice(0, 80),
  667. id: el.id || '',
  668. className: (el.className || '').toString().slice(0, 120),
  669. accept: el.getAttribute('accept') || '',
  670. }));
  671. return limited;
  672. }
  673. """)
  674. ai_selector2 = await self.ai_pick_selector_from_candidates(
  675. candidates=candidates,
  676. goal="上传视频入口",
  677. frame_name=frame_name
  678. )
  679. if ai_selector2:
  680. el2 = frame.locator(ai_selector2).first
  681. if await el2.count() > 0:
  682. print(f"[{self.platform_name}] [{frame_name}] 使用 AI 候选选择器点击上传入口: {ai_selector2}")
  683. try:
  684. async with self.page.expect_file_chooser(timeout=5000) as fc_info:
  685. await el2.click()
  686. chooser2 = await fc_info.value
  687. await chooser2.set_files(params.video_path)
  688. upload_success = True
  689. print(f"[{self.platform_name}] [{frame_name}] 通过 AI 候选选择器上传成功")
  690. return True
  691. except Exception as e:
  692. print(f"[{self.platform_name}] [{frame_name}] AI 候选选择器点击失败,尝试 set_input_files: {e}")
  693. try:
  694. await el2.set_input_files(params.video_path)
  695. upload_success = True
  696. print(f"[{self.platform_name}] [{frame_name}] AI 候选选择器 set_input_files 成功")
  697. return True
  698. except Exception as e2:
  699. print(f"[{self.platform_name}] [{frame_name}] AI 候选选择器 set_input_files 仍失败: {e2}")
  700. except Exception as e:
  701. print(f"[{self.platform_name}] [{frame_name}] 构造候选并交给 AI 失败: {e}")
  702. except Exception as e:
  703. print(f"[{self.platform_name}] [{frame_name}] AI 上传入口识别整体失败: {e}")
  704. return False
  705. # 先尝试主 frame
  706. try:
  707. await _try_set_files_in_frame(self.page.main_frame, "main")
  708. except Exception as e:
  709. print(f"[{self.platform_name}] main frame 上传尝试异常: {e}")
  710. # 再遍历所有子 frame
  711. if not upload_success:
  712. try:
  713. frames = self.page.frames
  714. print(f"[{self.platform_name}] 发现 frames: {len(frames)}")
  715. for idx, fr in enumerate(frames):
  716. if upload_success:
  717. break
  718. # main_frame 已尝试过
  719. if fr == self.page.main_frame:
  720. continue
  721. name = fr.name or f"frame-{idx}"
  722. await _try_set_files_in_frame(fr, name)
  723. except Exception as e:
  724. print(f"[{self.platform_name}] 遍历 frames 异常: {e}")
  725. if not upload_success:
  726. screenshot_base64 = await self.capture_screenshot()
  727. return PublishResult(
  728. success=False,
  729. platform=self.platform_name,
  730. error="未找到上传入口(可能在 iframe 中或页面结构已变更)",
  731. screenshot_base64=screenshot_base64,
  732. page_url=await self.get_page_url(),
  733. status='failed'
  734. )
  735. self.report_progress(20, "正在填充标题和话题...")
  736. # 添加标题和话题
  737. await self.add_title_tags(params)
  738. self.report_progress(30, "等待视频上传完成...")
  739. # 等待上传完成
  740. for _ in range(120):
  741. try:
  742. button_info = await self.page.get_by_role("button", name="发表").get_attribute('class')
  743. if "weui-desktop-btn_disabled" not in button_info:
  744. print(f"[{self.platform_name}] 视频上传完毕")
  745. # 上传封面
  746. self.report_progress(50, "正在上传封面...")
  747. await self.upload_cover(params.cover_path)
  748. break
  749. else:
  750. # 检查上传错误
  751. if await self.page.locator('div.status-msg.error').count():
  752. if await self.page.locator('div.media-status-content div.tag-inner:has-text("删除")').count():
  753. await self.handle_upload_error(params.video_path)
  754. await asyncio.sleep(3)
  755. except:
  756. await asyncio.sleep(3)
  757. self.report_progress(60, "处理视频设置...")
  758. # 添加短标题
  759. try:
  760. short_title_el = self.page.get_by_text("短标题", exact=True).locator("..").locator(
  761. "xpath=following-sibling::div").locator('span input[type="text"]')
  762. if await short_title_el.count():
  763. short_title = format_short_title(params.title)
  764. await short_title_el.fill(short_title)
  765. except:
  766. pass
  767. # 定时发布
  768. if params.publish_date:
  769. self.report_progress(70, "设置定时发布...")
  770. await self.set_schedule_time(params.publish_date)
  771. self.report_progress(80, "正在发布...")
  772. # 点击发布 - 参考 matrix
  773. for i in range(30):
  774. try:
  775. # 参考 matrix: div.form-btns button:has-text("发表")
  776. publish_btn = self.page.locator('div.form-btns button:has-text("发表")')
  777. if await publish_btn.count():
  778. print(f"[{self.platform_name}] 点击发布按钮...")
  779. await publish_btn.click()
  780. # 等待跳转到作品列表页面 - 参考 matrix
  781. await self.page.wait_for_url(
  782. "https://channels.weixin.qq.com/platform/post/list",
  783. timeout=10000
  784. )
  785. self.report_progress(100, "发布成功")
  786. print(f"[{self.platform_name}] 视频发布成功!")
  787. screenshot_base64 = await self.capture_screenshot()
  788. return PublishResult(
  789. success=True,
  790. platform=self.platform_name,
  791. message="发布成功",
  792. screenshot_base64=screenshot_base64,
  793. page_url=self.page.url,
  794. status='success'
  795. )
  796. except Exception as e:
  797. current_url = self.page.url
  798. if "https://channels.weixin.qq.com/platform/post/list" in current_url:
  799. self.report_progress(100, "发布成功")
  800. print(f"[{self.platform_name}] 视频发布成功!")
  801. screenshot_base64 = await self.capture_screenshot()
  802. return PublishResult(
  803. success=True,
  804. platform=self.platform_name,
  805. message="发布成功",
  806. screenshot_base64=screenshot_base64,
  807. page_url=current_url,
  808. status='success'
  809. )
  810. else:
  811. print(f"[{self.platform_name}] 视频正在发布中... {i+1}/30, URL: {current_url}")
  812. await asyncio.sleep(1)
  813. # 发布超时
  814. screenshot_base64 = await self.capture_screenshot()
  815. page_url = await self.get_page_url()
  816. return PublishResult(
  817. success=False,
  818. platform=self.platform_name,
  819. error="发布超时,请检查发布状态",
  820. screenshot_base64=screenshot_base64,
  821. page_url=page_url,
  822. status='need_action'
  823. )
  824. async def get_works(self, cookies: str, page: int = 0, page_size: int = 20) -> WorksResult:
  825. """获取视频号作品列表"""
  826. print(f"\n{'='*60}")
  827. print(f"[{self.platform_name}] 获取作品列表")
  828. print(f"[{self.platform_name}] page={page}, page_size={page_size}")
  829. print(f"{'='*60}")
  830. works: List[WorkItem] = []
  831. total = 0
  832. has_more = False
  833. try:
  834. await self.init_browser()
  835. cookie_list = self.parse_cookies(cookies)
  836. await self.set_cookies(cookie_list)
  837. if not self.page:
  838. raise Exception("Page not initialized")
  839. # 访问视频号创作者中心
  840. await self.page.goto("https://channels.weixin.qq.com/platform/post/list")
  841. await asyncio.sleep(5)
  842. # 检查登录状态
  843. current_url = self.page.url
  844. if "login" in current_url:
  845. raise Exception("Cookie 已过期,请重新登录")
  846. # 视频号使用页面爬取方式获取作品列表
  847. # 等待作品列表加载
  848. await self.page.wait_for_selector('div.post-feed-wrap', timeout=10000)
  849. # 获取所有作品项
  850. post_items = self.page.locator('div.post-feed-item')
  851. item_count = await post_items.count()
  852. print(f"[{self.platform_name}] 找到 {item_count} 个作品项")
  853. for i in range(min(item_count, page_size)):
  854. try:
  855. item = post_items.nth(i)
  856. # 获取封面
  857. cover_el = item.locator('div.cover-wrap img').first
  858. cover_url = ''
  859. if await cover_el.count() > 0:
  860. cover_url = await cover_el.get_attribute('src') or ''
  861. # 获取标题
  862. title_el = item.locator('div.content').first
  863. title = ''
  864. if await title_el.count() > 0:
  865. title = await title_el.text_content() or ''
  866. title = title.strip()[:50]
  867. # 获取统计数据
  868. stats_el = item.locator('div.post-data')
  869. play_count = 0
  870. like_count = 0
  871. comment_count = 0
  872. if await stats_el.count() > 0:
  873. stats_text = await stats_el.text_content() or ''
  874. # 解析统计数据(格式可能是: 播放 100 点赞 50 评论 10)
  875. import re
  876. play_match = re.search(r'播放[\s]*([\d.]+[万]?)', stats_text)
  877. like_match = re.search(r'点赞[\s]*([\d.]+[万]?)', stats_text)
  878. comment_match = re.search(r'评论[\s]*([\d.]+[万]?)', stats_text)
  879. def parse_count(match):
  880. if not match:
  881. return 0
  882. val = match.group(1)
  883. if '万' in val:
  884. return int(float(val.replace('万', '')) * 10000)
  885. return int(val)
  886. play_count = parse_count(play_match)
  887. like_count = parse_count(like_match)
  888. comment_count = parse_count(comment_match)
  889. # 获取发布时间
  890. time_el = item.locator('div.time')
  891. publish_time = ''
  892. if await time_el.count() > 0:
  893. publish_time = await time_el.text_content() or ''
  894. publish_time = publish_time.strip()
  895. # 生成临时 work_id(视频号可能需要从详情页获取)
  896. work_id = f"weixin_{i}_{hash(title)}"
  897. works.append(WorkItem(
  898. work_id=work_id,
  899. title=title or '无标题',
  900. cover_url=cover_url,
  901. duration=0,
  902. status='published',
  903. publish_time=publish_time,
  904. play_count=play_count,
  905. like_count=like_count,
  906. comment_count=comment_count,
  907. ))
  908. except Exception as e:
  909. print(f"[{self.platform_name}] 解析作品 {i} 失败: {e}")
  910. continue
  911. total = len(works)
  912. has_more = item_count > page_size
  913. print(f"[{self.platform_name}] 获取到 {total} 个作品")
  914. except Exception as e:
  915. import traceback
  916. traceback.print_exc()
  917. return WorksResult(success=False, platform=self.platform_name, error=str(e))
  918. return WorksResult(success=True, platform=self.platform_name, works=works, total=total, has_more=has_more)
  919. async def get_comments(self, cookies: str, work_id: str, cursor: str = "") -> CommentsResult:
  920. """获取视频号作品评论"""
  921. print(f"\n{'='*60}")
  922. print(f"[{self.platform_name}] 获取作品评论")
  923. print(f"[{self.platform_name}] work_id={work_id}")
  924. print(f"{'='*60}")
  925. comments: List[CommentItem] = []
  926. total = 0
  927. has_more = False
  928. try:
  929. await self.init_browser()
  930. cookie_list = self.parse_cookies(cookies)
  931. await self.set_cookies(cookie_list)
  932. if not self.page:
  933. raise Exception("Page not initialized")
  934. # 访问评论管理页面
  935. await self.page.goto("https://channels.weixin.qq.com/platform/comment/index")
  936. await asyncio.sleep(5)
  937. # 检查登录状态
  938. current_url = self.page.url
  939. if "login" in current_url:
  940. raise Exception("Cookie 已过期,请重新登录")
  941. # 等待评论列表加载
  942. try:
  943. await self.page.wait_for_selector('div.comment-list', timeout=10000)
  944. except:
  945. print(f"[{self.platform_name}] 未找到评论列表")
  946. return CommentsResult(success=True, platform=self.platform_name, work_id=work_id, comments=[], total=0, has_more=False)
  947. # 获取所有评论项
  948. comment_items = self.page.locator('div.comment-item')
  949. item_count = await comment_items.count()
  950. print(f"[{self.platform_name}] 找到 {item_count} 个评论项")
  951. for i in range(item_count):
  952. try:
  953. item = comment_items.nth(i)
  954. # 获取作者信息
  955. author_name = ''
  956. author_avatar = ''
  957. name_el = item.locator('div.nick-name')
  958. if await name_el.count() > 0:
  959. author_name = await name_el.text_content() or ''
  960. author_name = author_name.strip()
  961. avatar_el = item.locator('img.avatar')
  962. if await avatar_el.count() > 0:
  963. author_avatar = await avatar_el.get_attribute('src') or ''
  964. # 获取评论内容
  965. content = ''
  966. content_el = item.locator('div.comment-content')
  967. if await content_el.count() > 0:
  968. content = await content_el.text_content() or ''
  969. content = content.strip()
  970. # 获取时间
  971. create_time = ''
  972. time_el = item.locator('div.time')
  973. if await time_el.count() > 0:
  974. create_time = await time_el.text_content() or ''
  975. create_time = create_time.strip()
  976. # 生成评论 ID
  977. comment_id = f"weixin_comment_{i}_{hash(content)}"
  978. comments.append(CommentItem(
  979. comment_id=comment_id,
  980. work_id=work_id,
  981. content=content,
  982. author_id='',
  983. author_name=author_name,
  984. author_avatar=author_avatar,
  985. like_count=0,
  986. reply_count=0,
  987. create_time=create_time,
  988. ))
  989. except Exception as e:
  990. print(f"[{self.platform_name}] 解析评论 {i} 失败: {e}")
  991. continue
  992. total = len(comments)
  993. print(f"[{self.platform_name}] 获取到 {total} 条评论")
  994. except Exception as e:
  995. import traceback
  996. traceback.print_exc()
  997. return CommentsResult(success=False, platform=self.platform_name, work_id=work_id, error=str(e))
  998. return CommentsResult(success=True, platform=self.platform_name, work_id=work_id, comments=comments, total=total, has_more=has_more)