weixin.py 72 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681
  1. # -*- coding: utf-8 -*-
  2. """
  3. 微信视频号发布器
  4. 参考: matrix/tencent_uploader/main.py
  5. """
  6. import asyncio
  7. import os
  8. from datetime import datetime
  9. from typing import List
  10. from .base import (
  11. BasePublisher, PublishParams, PublishResult,
  12. WorkItem, WorksResult, CommentItem, CommentsResult
  13. )
  14. import os
  15. import time
  16. # 允许通过环境变量手动指定“上传视频入口”的选择器,便于在页面结构频繁变更时快速调整
  17. WEIXIN_UPLOAD_SELECTOR = os.environ.get("WEIXIN_UPLOAD_SELECTOR", "").strip()
  18. def format_short_title(origin_title: str) -> str:
  19. """
  20. 格式化短标题
  21. - 移除特殊字符
  22. - 长度限制在 6-16 字符
  23. """
  24. allowed_special_chars = "《》"":+?%°"
  25. filtered_chars = [
  26. char if char.isalnum() or char in allowed_special_chars
  27. else ' ' if char == ',' else ''
  28. for char in origin_title
  29. ]
  30. formatted_string = ''.join(filtered_chars)
  31. if len(formatted_string) > 16:
  32. formatted_string = formatted_string[:16]
  33. elif len(formatted_string) < 6:
  34. formatted_string += ' ' * (6 - len(formatted_string))
  35. return formatted_string
  36. class WeixinPublisher(BasePublisher):
  37. """
  38. 微信视频号发布器
  39. 使用 Playwright 自动化操作视频号创作者中心
  40. 注意: 需要使用 Chrome 浏览器,否则可能出现 H264 编码错误
  41. """
  42. platform_name = "weixin"
  43. login_url = "https://channels.weixin.qq.com/platform"
  44. publish_url = "https://channels.weixin.qq.com/platform/post/create"
  45. cookie_domain = ".weixin.qq.com"
  46. def _parse_count(self, count_str: str) -> int:
  47. """解析数字(支持带'万'的格式)"""
  48. try:
  49. count_str = count_str.strip()
  50. if '万' in count_str:
  51. return int(float(count_str.replace('万', '')) * 10000)
  52. return int(count_str)
  53. except:
  54. return 0
  55. async def ai_find_upload_selector(self, frame_html: str, frame_name: str = "main") -> str:
  56. """
  57. 使用 AI 从 HTML 中识别“上传视频/选择文件”相关元素的 CSS 选择器。
  58. 设计思路:
  59. - 仅在常规 DOM 选择器都失败时调用,避免频繁占用 AI 配额;
  60. - 通过 DashScope 文本模型(与验证码识别同一套配置)分析 HTML;
  61. - 返回一个适合用于 frame.locator(selector) 的 CSS 选择器。
  62. """
  63. import json
  64. import re
  65. import requests
  66. import os
  67. # 避免 HTML 过长导致 token 超限,只截取前 N 字符
  68. if not frame_html:
  69. return ""
  70. max_len = 20000
  71. if len(frame_html) > max_len:
  72. frame_html = frame_html[:max_len]
  73. ai_api_key = os.environ.get("DASHSCOPE_API_KEY", "")
  74. ai_base_url = os.environ.get("DASHSCOPE_BASE_URL", "https://dashscope.aliyuncs.com/compatible-mode/v1")
  75. ai_text_model = os.environ.get("AI_TEXT_MODEL", "qwen-plus")
  76. if not ai_api_key:
  77. print(f"[{self.platform_name}] AI上传入口识别: 未配置 AI API Key,跳过")
  78. return ""
  79. prompt = f"""
  80. 你是熟悉微信视频号后台的前端工程师,现在需要在一段 HTML 中找到“上传视频文件”的入口。
  81. 页面说明:
  82. - 平台:微信视频号(channels.weixin.qq.com)
  83. - 目标:用于上传视频文件的按钮或 input(一般会触发文件选择框)
  84. - 你会收到某个 frame 的完整 HTML 片段(不包含截图)。
  85. 请你根据下面的 HTML,推断最适合用于上传视频文件的元素,并输出一个可以被 Playwright 使用的 CSS 选择器。
  86. 要求:
  87. 1. 只考虑“上传/选择视频文件”的入口,不要返回“发布/发表/下一步”等按钮;
  88. 2. 选择器需要尽量稳定,不要使用自动生成的随机类名(例如带很多随机字母/数字的类名可以用前缀匹配);
  89. 3. 选择器必须是 CSS 选择器(不要返回 XPath);
  90. 4. 如果确实找不到合理的上传入口,返回 selector 为空字符串。
  91. 请以 JSON 格式输出,严格遵守以下结构(不要添加任何解释文字):
  92. ```json
  93. {{
  94. "selector": "CSS 选择器字符串,比如:input[type='file'] 或 div.upload-content input[type='file']"
  95. }}
  96. ```
  97. 下面是 frame=\"{frame_name}\" 的 HTML:
  98. ```html
  99. {frame_html}
  100. ```"""
  101. payload = {
  102. "model": ai_text_model,
  103. "messages": [
  104. {
  105. "role": "user",
  106. "content": prompt,
  107. }
  108. ],
  109. "max_tokens": 600,
  110. }
  111. headers = {
  112. "Authorization": f"Bearer {ai_api_key}",
  113. "Content-Type": "application/json",
  114. }
  115. try:
  116. print(f"[{self.platform_name}] AI上传入口识别: 正在分析 frame={frame_name} HTML...")
  117. resp = requests.post(
  118. f"{ai_base_url}/chat/completions",
  119. headers=headers,
  120. json=payload,
  121. timeout=40,
  122. )
  123. if resp.status_code != 200:
  124. print(f"[{self.platform_name}] AI上传入口识别: API 返回错误 {resp.status_code}")
  125. return ""
  126. data = resp.json()
  127. content = data.get("choices", [{}])[0].get("message", {}).get("content", "") or ""
  128. # 尝试从 ```json``` 代码块中解析
  129. json_match = re.search(r"```json\\s*([\\s\\S]*?)\\s*```", content)
  130. if json_match:
  131. json_str = json_match.group(1)
  132. else:
  133. json_match = re.search(r"\\{[\\s\\S]*\\}", content)
  134. json_str = json_match.group(0) if json_match else "{}"
  135. try:
  136. result = json.loads(json_str)
  137. except Exception:
  138. result = {}
  139. selector = (result.get("selector") or "").strip()
  140. print(f"[{self.platform_name}] AI上传入口识别结果: selector='{selector}'")
  141. return selector
  142. except Exception as e:
  143. print(f"[{self.platform_name}] AI上传入口识别异常: {e}")
  144. return ""
  145. async def ai_pick_selector_from_candidates(self, candidates: list, goal: str, frame_name: str = "main") -> str:
  146. """
  147. 将“候选元素列表(包含 css selector + 文本/属性)”发给 AI,让 AI 直接挑选最符合 goal 的元素。
  148. 适用于:HTML 里看不出上传入口、或页面大量动态渲染时。
  149. """
  150. import json
  151. import re
  152. import requests
  153. import os
  154. if not candidates:
  155. return ""
  156. ai_api_key = os.environ.get("DASHSCOPE_API_KEY", "")
  157. ai_base_url = os.environ.get("DASHSCOPE_BASE_URL", "https://dashscope.aliyuncs.com/compatible-mode/v1")
  158. ai_text_model = os.environ.get("AI_TEXT_MODEL", "qwen-plus")
  159. if not ai_api_key:
  160. print(f"[{self.platform_name}] AI候选选择器: 未配置 AI API Key,跳过")
  161. return ""
  162. # 控制长度,最多取前 120 个候选
  163. candidates = candidates[:120]
  164. prompt = f"""
  165. 你是自动化发布工程师。现在要在微信视频号(channels.weixin.qq.com)发布页面里找到“{goal}”相关的入口元素。
  166. 我会给你一组候选元素,每个候选都包含:
  167. - css: 可直接用于 Playwright 的 CSS 选择器
  168. - tag / type / role / ariaLabel / text / id / className(部分字段可能为空)
  169. 你的任务:
  170. - 从候选中选出最可能用于“{goal}”的元素,返回它的 css 选择器;
  171. - 如果没有任何候选符合,返回空字符串。
  172. 注意:
  173. - 如果 goal 是“上传视频入口”,优先选择 input[type=file] 或看起来会触发选择文件/上传的区域;
  174. - 不要选择“发布/发表/下一步”等按钮(除非 goal 明确是发布按钮)。
  175. 请严格按 JSON 输出(不要解释):
  176. ```json
  177. {{ "selector": "..." }}
  178. ```
  179. 候选列表(frame={frame_name}):
  180. ```json
  181. {json.dumps(candidates, ensure_ascii=False)}
  182. ```"""
  183. payload = {
  184. "model": ai_text_model,
  185. "messages": [{"role": "user", "content": prompt}],
  186. "max_tokens": 400,
  187. }
  188. headers = {
  189. "Authorization": f"Bearer {ai_api_key}",
  190. "Content-Type": "application/json",
  191. }
  192. try:
  193. print(f"[{self.platform_name}] AI候选选择器: 正在分析 frame={frame_name}, goal={goal} ...")
  194. resp = requests.post(
  195. f"{ai_base_url}/chat/completions",
  196. headers=headers,
  197. json=payload,
  198. timeout=40,
  199. )
  200. if resp.status_code != 200:
  201. print(f"[{self.platform_name}] AI候选选择器: API 返回错误 {resp.status_code}")
  202. return ""
  203. data = resp.json()
  204. content = data.get("choices", [{}])[0].get("message", {}).get("content", "") or ""
  205. json_match = re.search(r"```json\\s*([\\s\\S]*?)\\s*```", content)
  206. if json_match:
  207. json_str = json_match.group(1)
  208. else:
  209. json_match = re.search(r"\\{[\\s\\S]*\\}", content)
  210. json_str = json_match.group(0) if json_match else "{}"
  211. try:
  212. result = json.loads(json_str)
  213. except Exception:
  214. result = {}
  215. selector = (result.get("selector") or "").strip()
  216. print(f"[{self.platform_name}] AI候选选择器结果: selector='{selector}'")
  217. return selector
  218. except Exception as e:
  219. print(f"[{self.platform_name}] AI候选选择器异常: {e}")
  220. return ""
  221. async def _extract_relevant_html_snippets(self, html: str) -> str:
  222. """
  223. 从 HTML 中抽取与上传相关的片段,减少 token,提升 AI 命中率。
  224. - 优先抓取包含 upload/上传/file/input 等关键词的窗口片段
  225. - 若未命中关键词,返回“开头 + 结尾”的拼接
  226. """
  227. import re
  228. if not html:
  229. return ""
  230. patterns = [
  231. r"upload",
  232. r"uploader",
  233. r"file",
  234. r"type\\s*=\\s*['\\\"]file['\\\"]",
  235. r"input",
  236. r"drag",
  237. r"drop",
  238. r"选择",
  239. r"上传",
  240. r"添加",
  241. r"视频",
  242. ]
  243. regex = re.compile("|".join(patterns), re.IGNORECASE)
  244. snippets = []
  245. for m in regex.finditer(html):
  246. start = max(0, m.start() - 350)
  247. end = min(len(html), m.end() + 350)
  248. snippets.append(html[start:end])
  249. if len(snippets) >= 18:
  250. break
  251. if snippets:
  252. # 去重(粗略)
  253. unique = []
  254. seen = set()
  255. for s in snippets:
  256. key = hash(s)
  257. if key not in seen:
  258. seen.add(key)
  259. unique.append(s)
  260. return "\n\n<!-- SNIPPET -->\n\n".join(unique)[:20000]
  261. # fallback: head + tail
  262. head = html[:9000]
  263. tail = html[-9000:] if len(html) > 9000 else ""
  264. return (head + "\n\n<!-- TAIL -->\n\n" + tail)[:20000]
  265. async def init_browser(self, storage_state: str = None):
  266. """初始化浏览器 - 参考 matrix 使用 channel=chrome 避免 H264 编码错误"""
  267. from playwright.async_api import async_playwright
  268. playwright = await async_playwright().start()
  269. # 参考 matrix: 使用系统内的 Chrome 浏览器,避免 H264 编码错误
  270. # 如果没有安装 Chrome,则使用默认 Chromium
  271. try:
  272. self.browser = await playwright.chromium.launch(
  273. headless=self.headless,
  274. channel="chrome" # 使用系统 Chrome
  275. )
  276. print(f"[{self.platform_name}] 使用系统 Chrome 浏览器")
  277. except Exception as e:
  278. print(f"[{self.platform_name}] Chrome 不可用,使用 Chromium: {e}")
  279. self.browser = await playwright.chromium.launch(headless=self.headless)
  280. # 设置 HTTP Headers 防止重定向
  281. headers = {
  282. "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
  283. "Referer": "https://channels.weixin.qq.com/platform/post/list",
  284. }
  285. self.context = await self.browser.new_context(
  286. extra_http_headers=headers,
  287. ignore_https_errors=True,
  288. viewport={"width": 1920, "height": 1080},
  289. user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
  290. )
  291. self.page = await self.context.new_page()
  292. return self.page
  293. async def set_schedule_time(self, publish_date: datetime):
  294. """设置定时发布"""
  295. if not self.page:
  296. return
  297. print(f"[{self.platform_name}] 设置定时发布...")
  298. # 点击定时选项
  299. label_element = self.page.locator("label").filter(has_text="定时").nth(1)
  300. await label_element.click()
  301. # 选择日期
  302. await self.page.click('input[placeholder="请选择发表时间"]')
  303. publish_month = f"{publish_date.month:02d}"
  304. current_month = f"{publish_month}月"
  305. # 检查月份
  306. page_month = await self.page.inner_text('span.weui-desktop-picker__panel__label:has-text("月")')
  307. if page_month != current_month:
  308. await self.page.click('button.weui-desktop-btn__icon__right')
  309. # 选择日期
  310. elements = await self.page.query_selector_all('table.weui-desktop-picker__table a')
  311. for element in elements:
  312. class_name = await element.evaluate('el => el.className')
  313. if 'weui-desktop-picker__disabled' in class_name:
  314. continue
  315. text = await element.inner_text()
  316. if text.strip() == str(publish_date.day):
  317. await element.click()
  318. break
  319. # 输入时间
  320. await self.page.click('input[placeholder="请选择时间"]')
  321. await self.page.keyboard.press("Control+KeyA")
  322. await self.page.keyboard.type(str(publish_date.hour))
  323. # 点击其他地方确认
  324. await self.page.locator("div.input-editor").click()
  325. async def handle_upload_error(self, video_path: str):
  326. """处理上传错误"""
  327. if not self.page:
  328. return
  329. print(f"[{self.platform_name}] 视频出错了,重新上传中...")
  330. await self.page.locator('div.media-status-content div.tag-inner:has-text("删除")').click()
  331. await self.page.get_by_role('button', name="删除", exact=True).click()
  332. file_input = self.page.locator('input[type="file"]')
  333. await file_input.set_input_files(video_path)
  334. async def add_title_tags(self, params: PublishParams):
  335. """添加标题和话题"""
  336. if not self.page:
  337. return
  338. await self.page.locator("div.input-editor").click()
  339. await self.page.keyboard.type(params.title)
  340. if params.tags:
  341. await self.page.keyboard.press("Enter")
  342. for tag in params.tags:
  343. await self.page.keyboard.type("#" + tag)
  344. await self.page.keyboard.press("Space")
  345. print(f"[{self.platform_name}] 成功添加标题和 {len(params.tags)} 个话题")
  346. async def add_short_title(self):
  347. """添加短标题"""
  348. if not self.page:
  349. return
  350. try:
  351. short_title_element = self.page.get_by_text("短标题", exact=True).locator("..").locator(
  352. "xpath=following-sibling::div").locator('span input[type="text"]')
  353. if await short_title_element.count():
  354. # 获取已有内容作为短标题
  355. pass
  356. except:
  357. pass
  358. async def upload_cover(self, cover_path: str):
  359. """上传封面图"""
  360. if not self.page or not cover_path or not os.path.exists(cover_path):
  361. return
  362. try:
  363. await asyncio.sleep(2)
  364. preview_btn_info = await self.page.locator(
  365. 'div.finder-tag-wrap.btn:has-text("更换封面")').get_attribute('class')
  366. if "disabled" not in preview_btn_info:
  367. await self.page.locator('div.finder-tag-wrap.btn:has-text("更换封面")').click()
  368. await self.page.locator('div.single-cover-uploader-wrap > div.wrap').hover()
  369. # 删除现有封面
  370. if await self.page.locator(".del-wrap > .svg-icon").count():
  371. await self.page.locator(".del-wrap > .svg-icon").click()
  372. # 上传新封面
  373. preview_div = self.page.locator("div.single-cover-uploader-wrap > div.wrap")
  374. async with self.page.expect_file_chooser() as fc_info:
  375. await preview_div.click()
  376. preview_chooser = await fc_info.value
  377. await preview_chooser.set_files(cover_path)
  378. await asyncio.sleep(2)
  379. await self.page.get_by_role("button", name="确定").click()
  380. await asyncio.sleep(1)
  381. await self.page.get_by_role("button", name="确认").click()
  382. print(f"[{self.platform_name}] 封面上传成功")
  383. except Exception as e:
  384. print(f"[{self.platform_name}] 封面上传失败: {e}")
  385. async def check_captcha(self) -> dict:
  386. """检查页面是否需要验证码"""
  387. if not self.page:
  388. return {'need_captcha': False, 'captcha_type': ''}
  389. try:
  390. # 检查各种验证码
  391. captcha_selectors = [
  392. 'text="请输入验证码"',
  393. 'text="滑动验证"',
  394. '[class*="captcha"]',
  395. '[class*="verify"]',
  396. ]
  397. for selector in captcha_selectors:
  398. try:
  399. if await self.page.locator(selector).count() > 0:
  400. print(f"[{self.platform_name}] 检测到验证码: {selector}")
  401. return {'need_captcha': True, 'captcha_type': 'image'}
  402. except:
  403. pass
  404. # 检查登录弹窗
  405. login_selectors = [
  406. 'text="请登录"',
  407. 'text="扫码登录"',
  408. '[class*="login-dialog"]',
  409. ]
  410. for selector in login_selectors:
  411. try:
  412. if await self.page.locator(selector).count() > 0:
  413. print(f"[{self.platform_name}] 检测到需要登录: {selector}")
  414. return {'need_captcha': True, 'captcha_type': 'login'}
  415. except:
  416. pass
  417. except Exception as e:
  418. print(f"[{self.platform_name}] 验证码检测异常: {e}")
  419. return {'need_captcha': False, 'captcha_type': ''}
  420. async def publish(self, cookies: str, params: PublishParams) -> PublishResult:
  421. """发布视频到视频号"""
  422. print(f"\n{'='*60}")
  423. print(f"[{self.platform_name}] 开始发布视频")
  424. print(f"[{self.platform_name}] 视频路径: {params.video_path}")
  425. print(f"[{self.platform_name}] 标题: {params.title}")
  426. print(f"[{self.platform_name}] Headless: {self.headless}")
  427. print(f"{'='*60}")
  428. self.report_progress(5, "正在初始化浏览器...")
  429. # 初始化浏览器(使用 Chrome)
  430. await self.init_browser()
  431. print(f"[{self.platform_name}] 浏览器初始化完成")
  432. # 解析并设置 cookies
  433. cookie_list = self.parse_cookies(cookies)
  434. print(cookie_list)
  435. print(f"[{self.platform_name}] 解析到 {len(cookie_list)} 个 cookies")
  436. await self.set_cookies(cookie_list)
  437. if not self.page:
  438. raise Exception("Page not initialized")
  439. # 检查视频文件
  440. if not os.path.exists(params.video_path):
  441. raise Exception(f"视频文件不存在: {params.video_path}")
  442. print(f"[{self.platform_name}] 视频文件存在,大小: {os.path.getsize(params.video_path)} bytes")
  443. self.report_progress(10, "正在打开上传页面...")
  444. print(f"[{self.platform_name}] 当前 发布URL: {self.publish_url}")
  445. # 访问上传页面
  446. await self.page.goto(self.publish_url, wait_until="networkidle", timeout=60000)
  447. await asyncio.sleep(10)
  448. # 打印页面HTML调试
  449. print(f"[{self.platform_name}] 当前 URL: {self.page.url}")
  450. html_content = await self.page.content()
  451. print(f"[{self.platform_name}] 页面HTML长度: {len(html_content)}")
  452. # 截图调试
  453. screenshot_path = f"weixin_publish_{int(asyncio.get_event_loop().time())}.png"
  454. await self.page.screenshot(path=screenshot_path)
  455. print(f"[{self.platform_name}] 截图已保存: {screenshot_path}")
  456. # 检查 input[type='file'] 是否存在
  457. file_input = self.page.locator("input[type='file']")
  458. count = await file_input.count()
  459. print(f"[{self.platform_name}] 找到 {count} 个 file input")
  460. if count == 0:
  461. raise Exception("页面中未找到 input[type='file'] 元素")
  462. # 直接设置文件,不触发click
  463. print("上传文件...")
  464. file_path = params.video_path
  465. await file_input.first.set_input_files(file_path)
  466. print(f"[{self.platform_name}] 文件已设置: {file_path}")
  467. # 等待上传进度
  468. await asyncio.sleep(5)
  469. # 等待删除标签弹窗可见(可选,设置超时)
  470. try:
  471. await self.page.wait_for_selector(".weui-desktop-popover__wrp.finder-popover-dialog-wrap .finder-tag-wrap", state="visible", timeout=20000)
  472. print("删除标签弹窗已显示")
  473. except:
  474. print("删除标签弹窗未出现,继续执行")
  475. # 主动关闭系统文件选择窗口(如果还存在)
  476. try:
  477. # 获取所有窗口
  478. context_pages = self.page.context.pages
  479. for p in context_pages:
  480. if p != self.page and "打开" in await p.title():
  481. print(f"关闭系统文件选择窗口: {await p.title()}")
  482. await p.close()
  483. except Exception as e:
  484. print(f"关闭文件选择窗口异常: {e}")
  485. # 填写多个输入框
  486. print("填写输入框...")
  487. # 描述输入框
  488. await self.page.locator("div.input-editor[contenteditable][data-placeholder='添加描述']").fill("智能拍照机来啦")
  489. # 短标题输入框
  490. await self.page.fill("input.weui-desktop-form__input[placeholder*='概括视频主要内容']", "解放双手的智能拍照机")
  491. await self.page.wait_for_timeout(1000)
  492. # 点击最下方的发布按钮
  493. print("点击发布按钮...")
  494. await self.page.click("button.weui-desktop-btn.weui-desktop-btn_primary:has-text('发表')")
  495. # 监控是否出现"直接发表"按钮
  496. try:
  497. direct_publish_btn = self.page.locator("button.weui-desktop-btn.weui-desktop-btn_default:has-text('直接发表')")
  498. await direct_publish_btn.wait_for(state="visible", timeout=3000)
  499. print("检测到'直接发表'按钮,点击...")
  500. await direct_publish_btn.click()
  501. except:
  502. print("未检测到'直接发表'按钮,继续...")
  503. # 等待发布完成
  504. await self.page.wait_for_timeout(3000)
  505. print("发布完成!")
  506. return PublishResult(
  507. success=True,
  508. platform=self.platform_name,
  509. message="发布成功",
  510. screenshot_base64="",
  511. page_url=self.publish_url,
  512. status='success'
  513. )
  514. # 检查是否跳转到登录页
  515. current_url = self.page.url
  516. print(f"[{self.platform_name}] 当前页面: {current_url}")
  517. if "login" in current_url:
  518. screenshot_base64 = await self.capture_screenshot()
  519. return PublishResult(
  520. success=False,
  521. platform=self.platform_name,
  522. error="Cookie 已过期,需要重新登录",
  523. need_captcha=True,
  524. captcha_type='login',
  525. screenshot_base64=screenshot_base64,
  526. page_url=current_url,
  527. status='need_captcha'
  528. )
  529. # 使用 AI 检查验证码
  530. ai_captcha = await self.ai_check_captcha()
  531. if ai_captcha['has_captcha']:
  532. print(f"[{self.platform_name}] AI检测到验证码: {ai_captcha['captcha_type']}", flush=True)
  533. screenshot_base64 = await self.capture_screenshot()
  534. return PublishResult(
  535. success=False,
  536. platform=self.platform_name,
  537. error=f"检测到{ai_captcha['captcha_type']}验证码,需要使用有头浏览器完成验证",
  538. need_captcha=True,
  539. captcha_type=ai_captcha['captcha_type'],
  540. screenshot_base64=screenshot_base64,
  541. page_url=current_url,
  542. status='need_captcha'
  543. )
  544. # 传统方式检查验证码
  545. captcha_result = await self.check_captcha()
  546. if captcha_result['need_captcha']:
  547. screenshot_base64 = await self.capture_screenshot()
  548. return PublishResult(
  549. success=False,
  550. platform=self.platform_name,
  551. error=f"需要{captcha_result['captcha_type']}验证码,请使用有头浏览器完成验证",
  552. need_captcha=True,
  553. captcha_type=captcha_result['captcha_type'],
  554. screenshot_base64=screenshot_base64,
  555. page_url=current_url,
  556. status='need_captcha'
  557. )
  558. self.report_progress(15, "正在选择视频文件...")
  559. # 上传视频
  560. # 说明:视频号发布页在不同账号/地区/灰度下 DOM 结构差异较大,且上传组件可能在 iframe 中。
  561. # 因此这里按 matrix 的思路“点击触发 file chooser”,同时增加“遍历全部 frame + 精确挑选 video input”的兜底。
  562. upload_success = False
  563. if not self.page:
  564. raise Exception("Page not initialized")
  565. # 等待页面把上传区域渲染出来(避免过早判断)
  566. try:
  567. await self.page.wait_for_selector("div.upload-content, input[type='file'], iframe", timeout=20000)
  568. except Exception:
  569. pass
  570. async def _try_set_files_in_frame(frame, frame_name: str) -> bool:
  571. """在指定 frame 中尝试触发上传"""
  572. nonlocal upload_success
  573. if upload_success:
  574. return True
  575. # 方法0:如果用户通过环境变量显式配置了选择器,优先尝试这个
  576. if WEIXIN_UPLOAD_SELECTOR:
  577. try:
  578. el = frame.locator(WEIXIN_UPLOAD_SELECTOR).first
  579. if await el.count() > 0 and await el.is_visible():
  580. print(f"[{self.platform_name}] [{frame_name}] 使用环境变量 WEIXIN_UPLOAD_SELECTOR: {WEIXIN_UPLOAD_SELECTOR}")
  581. try:
  582. async with self.page.expect_file_chooser(timeout=5000) as fc_info:
  583. await el.click()
  584. chooser = await fc_info.value
  585. await chooser.set_files(params.video_path)
  586. upload_success = True
  587. print(f"[{self.platform_name}] [{frame_name}] 通过环境变量选择器上传成功")
  588. return True
  589. except Exception as e:
  590. print(f"[{self.platform_name}] [{frame_name}] 环境变量选择器点击失败,尝试直接 set_input_files: {e}")
  591. try:
  592. await el.set_input_files(params.video_path)
  593. upload_success = True
  594. print(f"[{self.platform_name}] [{frame_name}] 环境变量选择器 set_input_files 成功")
  595. return True
  596. except Exception as e2:
  597. print(f"[{self.platform_name}] [{frame_name}] 环境变量选择器 set_input_files 仍失败: {e2}")
  598. except Exception as e:
  599. print(f"[{self.platform_name}] [{frame_name}] 使用环境变量选择器定位元素失败: {e}")
  600. # 先尝试点击上传区域触发 chooser(最贴近 matrix)
  601. click_selectors = [
  602. "div.upload-content",
  603. "div[class*='upload-content']",
  604. "div[class*='upload']",
  605. "div.add-wrap",
  606. "[class*='uploader']",
  607. "text=点击上传",
  608. "text=上传视频",
  609. "text=选择视频",
  610. ]
  611. for selector in click_selectors:
  612. try:
  613. el = frame.locator(selector).first
  614. if await el.count() > 0 and await el.is_visible():
  615. print(f"[{self.platform_name}] [{frame_name}] 找到可点击上传区域: {selector}")
  616. try:
  617. async with self.page.expect_file_chooser(timeout=5000) as fc_info:
  618. await el.click()
  619. chooser = await fc_info.value
  620. await chooser.set_files(params.video_path)
  621. upload_success = True
  622. print(f"[{self.platform_name}] [{frame_name}] 通过 file chooser 上传成功")
  623. return True
  624. except Exception as e:
  625. print(f"[{self.platform_name}] [{frame_name}] 点击触发 chooser 失败: {e}")
  626. except Exception:
  627. pass
  628. # 再尝试直接设置 input[type=file](iframe/隐藏 input 常见)
  629. try:
  630. inputs = frame.locator("input[type='file']")
  631. cnt = await inputs.count()
  632. if cnt > 0:
  633. best_idx = 0
  634. best_score = -1
  635. for i in range(cnt):
  636. try:
  637. inp = inputs.nth(i)
  638. accept = (await inp.get_attribute("accept")) or ""
  639. multiple = (await inp.get_attribute("multiple")) or ""
  640. score = 0
  641. if "video" in accept:
  642. score += 10
  643. if "mp4" in accept:
  644. score += 3
  645. if multiple:
  646. score += 1
  647. if score > best_score:
  648. best_score = score
  649. best_idx = i
  650. except Exception:
  651. continue
  652. target = inputs.nth(best_idx)
  653. print(f"[{self.platform_name}] [{frame_name}] 尝试对 input[{best_idx}] set_input_files (score={best_score})")
  654. await target.set_input_files(params.video_path)
  655. upload_success = True
  656. print(f"[{self.platform_name}] [{frame_name}] 通过 file input 上传成功")
  657. return True
  658. except Exception as e:
  659. print(f"[{self.platform_name}] [{frame_name}] file input 上传失败: {e}")
  660. # 不直接返回,让后面的 AI 兜底有机会执行
  661. # 方法4: 兜底使用 AI 分析 HTML,猜测上传入口
  662. try:
  663. frame_url = getattr(frame, "url", "")
  664. html_full = await frame.content()
  665. html_for_ai = await self._extract_relevant_html_snippets(html_full)
  666. print(f"[{self.platform_name}] [{frame_name}] frame_url={frame_url}, html_len={len(html_full)}, html_for_ai_len={len(html_for_ai)}")
  667. ai_selector = await self.ai_find_upload_selector(html_for_ai, frame_name=frame_name)
  668. if ai_selector:
  669. try:
  670. el = frame.locator(ai_selector).first
  671. if await el.count() > 0:
  672. print(f"[{self.platform_name}] [{frame_name}] 使用 AI 选择器点击上传入口: {ai_selector}")
  673. try:
  674. async with self.page.expect_file_chooser(timeout=5000) as fc_info:
  675. await el.click()
  676. chooser = await fc_info.value
  677. await chooser.set_files(params.video_path)
  678. upload_success = True
  679. print(f"[{self.platform_name}] [{frame_name}] 通过 AI 选择器上传成功")
  680. return True
  681. except Exception as e:
  682. print(f"[{self.platform_name}] [{frame_name}] AI 选择器点击失败,改为直接 set_input_files: {e}")
  683. try:
  684. await el.set_input_files(params.video_path)
  685. upload_success = True
  686. print(f"[{self.platform_name}] [{frame_name}] AI 选择器直接 set_input_files 成功")
  687. return True
  688. except Exception as e2:
  689. print(f"[{self.platform_name}] [{frame_name}] AI 选择器 set_input_files 仍失败: {e2}")
  690. except Exception as e:
  691. print(f"[{self.platform_name}] [{frame_name}] 使用 AI 选择器定位元素失败: {e}")
  692. else:
  693. # 如果 AI 无法从 HTML 推断,退一步:构造候选元素列表交给 AI 选择
  694. try:
  695. candidates = await frame.evaluate("""
  696. () => {
  697. function cssEscape(s) {
  698. try { return CSS.escape(s); } catch (e) { return s.replace(/[^a-zA-Z0-9_-]/g, '\\\\$&'); }
  699. }
  700. function buildSelector(el) {
  701. if (!el || el.nodeType !== 1) return '';
  702. if (el.id) return `#${cssEscape(el.id)}`;
  703. let parts = [];
  704. let cur = el;
  705. for (let depth = 0; cur && cur.nodeType === 1 && depth < 5; depth++) {
  706. let part = cur.tagName.toLowerCase();
  707. const role = cur.getAttribute('role');
  708. const type = cur.getAttribute('type');
  709. if (type) part += `[type="${type}"]`;
  710. if (role) part += `[role="${role}"]`;
  711. const cls = (cur.className || '').toString().trim().split(/\\s+/).filter(Boolean);
  712. if (cls.length) part += '.' + cls.slice(0, 2).map(cssEscape).join('.');
  713. // nth-of-type
  714. let idx = 1;
  715. let sib = cur;
  716. while (sib && (sib = sib.previousElementSibling)) {
  717. if (sib.tagName === cur.tagName) idx++;
  718. }
  719. part += `:nth-of-type(${idx})`;
  720. parts.unshift(part);
  721. cur = cur.parentElement;
  722. }
  723. return parts.join(' > ');
  724. }
  725. const nodes = Array.from(document.querySelectorAll('input, button, a, div, span'))
  726. .filter(el => {
  727. const tag = el.tagName.toLowerCase();
  728. const type = (el.getAttribute('type') || '').toLowerCase();
  729. const role = (el.getAttribute('role') || '').toLowerCase();
  730. const aria = (el.getAttribute('aria-label') || '').toLowerCase();
  731. const txt = (el.innerText || '').trim().slice(0, 60);
  732. const cls = (el.className || '').toString().toLowerCase();
  733. const isFile = tag === 'input' && type === 'file';
  734. const looksClickable =
  735. tag === 'button' || tag === 'a' || role === 'button' || el.onclick ||
  736. cls.includes('upload') || cls.includes('uploader') || cls.includes('drag') ||
  737. aria.includes('上传') || aria.includes('选择') || aria.includes('添加') ||
  738. txt.includes('上传') || txt.includes('选择') || txt.includes('添加') || txt.includes('点击上传');
  739. if (!isFile && !looksClickable) return false;
  740. const r = el.getBoundingClientRect();
  741. const visible = r.width > 5 && r.height > 5;
  742. return visible;
  743. });
  744. const limited = nodes.slice(0, 120).map(el => ({
  745. css: buildSelector(el),
  746. tag: el.tagName.toLowerCase(),
  747. type: el.getAttribute('type') || '',
  748. role: el.getAttribute('role') || '',
  749. ariaLabel: el.getAttribute('aria-label') || '',
  750. text: (el.innerText || '').trim().slice(0, 80),
  751. id: el.id || '',
  752. className: (el.className || '').toString().slice(0, 120),
  753. accept: el.getAttribute('accept') || '',
  754. }));
  755. return limited;
  756. }
  757. """)
  758. ai_selector2 = await self.ai_pick_selector_from_candidates(
  759. candidates=candidates,
  760. goal="上传视频入口",
  761. frame_name=frame_name
  762. )
  763. if ai_selector2:
  764. el2 = frame.locator(ai_selector2).first
  765. if await el2.count() > 0:
  766. print(f"[{self.platform_name}] [{frame_name}] 使用 AI 候选选择器点击上传入口: {ai_selector2}")
  767. try:
  768. async with self.page.expect_file_chooser(timeout=5000) as fc_info:
  769. await el2.click()
  770. chooser2 = await fc_info.value
  771. await chooser2.set_files(params.video_path)
  772. upload_success = True
  773. print(f"[{self.platform_name}] [{frame_name}] 通过 AI 候选选择器上传成功")
  774. return True
  775. except Exception as e:
  776. print(f"[{self.platform_name}] [{frame_name}] AI 候选选择器点击失败,尝试 set_input_files: {e}")
  777. try:
  778. await el2.set_input_files(params.video_path)
  779. upload_success = True
  780. print(f"[{self.platform_name}] [{frame_name}] AI 候选选择器 set_input_files 成功")
  781. return True
  782. except Exception as e2:
  783. print(f"[{self.platform_name}] [{frame_name}] AI 候选选择器 set_input_files 仍失败: {e2}")
  784. except Exception as e:
  785. print(f"[{self.platform_name}] [{frame_name}] 构造候选并交给 AI 失败: {e}")
  786. except Exception as e:
  787. print(f"[{self.platform_name}] [{frame_name}] AI 上传入口识别整体失败: {e}")
  788. return False
  789. # 先尝试主 frame
  790. try:
  791. await _try_set_files_in_frame(self.page.main_frame, "main")
  792. except Exception as e:
  793. print(f"[{self.platform_name}] main frame 上传尝试异常: {e}")
  794. # 再遍历所有子 frame
  795. if not upload_success:
  796. try:
  797. frames = self.page.frames
  798. print(f"[{self.platform_name}] 发现 frames: {len(frames)}")
  799. for idx, fr in enumerate(frames):
  800. if upload_success:
  801. break
  802. # main_frame 已尝试过
  803. if fr == self.page.main_frame:
  804. continue
  805. name = fr.name or f"frame-{idx}"
  806. await _try_set_files_in_frame(fr, name)
  807. except Exception as e:
  808. print(f"[{self.platform_name}] 遍历 frames 异常: {e}")
  809. if not upload_success:
  810. screenshot_base64 = await self.capture_screenshot()
  811. return PublishResult(
  812. success=False,
  813. platform=self.platform_name,
  814. error="未找到上传入口(可能在 iframe 中或页面结构已变更)",
  815. screenshot_base64=screenshot_base64,
  816. page_url=await self.get_page_url(),
  817. status='failed'
  818. )
  819. self.report_progress(20, "正在填充标题和话题...")
  820. # 添加标题和话题
  821. await self.add_title_tags(params)
  822. self.report_progress(30, "等待视频上传完成...")
  823. # 等待上传完成
  824. for _ in range(120):
  825. try:
  826. button_info = await self.page.get_by_role("button", name="发表").get_attribute('class')
  827. if "weui-desktop-btn_disabled" not in button_info:
  828. print(f"[{self.platform_name}] 视频上传完毕")
  829. # 上传封面
  830. self.report_progress(50, "正在上传封面...")
  831. await self.upload_cover(params.cover_path)
  832. break
  833. else:
  834. # 检查上传错误
  835. if await self.page.locator('div.status-msg.error').count():
  836. if await self.page.locator('div.media-status-content div.tag-inner:has-text("删除")').count():
  837. await self.handle_upload_error(params.video_path)
  838. await asyncio.sleep(3)
  839. except:
  840. await asyncio.sleep(3)
  841. self.report_progress(60, "处理视频设置...")
  842. # 添加短标题
  843. try:
  844. short_title_el = self.page.get_by_text("短标题", exact=True).locator("..").locator(
  845. "xpath=following-sibling::div").locator('span input[type="text"]')
  846. if await short_title_el.count():
  847. short_title = format_short_title(params.title)
  848. await short_title_el.fill(short_title)
  849. except:
  850. pass
  851. # 定时发布
  852. if params.publish_date:
  853. self.report_progress(70, "设置定时发布...")
  854. await self.set_schedule_time(params.publish_date)
  855. self.report_progress(80, "正在发布...")
  856. # 点击发布 - 参考 matrix
  857. for i in range(30):
  858. try:
  859. # 参考 matrix: div.form-btns button:has-text("发表")
  860. publish_btn = self.page.locator('div.form-btns button:has-text("发表")')
  861. if await publish_btn.count():
  862. print(f"[{self.platform_name}] 点击发布按钮...")
  863. await publish_btn.click()
  864. # 等待跳转到作品列表页面 - 参考 matrix
  865. await self.page.wait_for_url(
  866. "https://channels.weixin.qq.com/platform/post/list",
  867. timeout=10000
  868. )
  869. self.report_progress(100, "发布成功")
  870. print(f"[{self.platform_name}] 视频发布成功!")
  871. screenshot_base64 = await self.capture_screenshot()
  872. return PublishResult(
  873. success=True,
  874. platform=self.platform_name,
  875. message="发布成功",
  876. screenshot_base64=screenshot_base64,
  877. page_url=self.page.url,
  878. status='success'
  879. )
  880. except Exception as e:
  881. current_url = self.page.url
  882. if "https://channels.weixin.qq.com/platform/post/list" in current_url:
  883. self.report_progress(100, "发布成功")
  884. print(f"[{self.platform_name}] 视频发布成功!")
  885. screenshot_base64 = await self.capture_screenshot()
  886. return PublishResult(
  887. success=True,
  888. platform=self.platform_name,
  889. message="发布成功",
  890. screenshot_base64=screenshot_base64,
  891. page_url=current_url,
  892. status='success'
  893. )
  894. else:
  895. print(f"[{self.platform_name}] 视频正在发布中... {i+1}/30, URL: {current_url}")
  896. await asyncio.sleep(1)
  897. # 发布超时
  898. screenshot_base64 = await self.capture_screenshot()
  899. page_url = await self.get_page_url()
  900. return PublishResult(
  901. success=False,
  902. platform=self.platform_name,
  903. error="发布超时,请检查发布状态",
  904. screenshot_base64=screenshot_base64,
  905. page_url=page_url,
  906. status='need_action'
  907. )
  908. async def get_works(self, cookies: str, page: int = 0, page_size: int = 20) -> WorksResult:
  909. print(f"1111111111111111111")
  910. """获取视频号作品列表"""
  911. print(f"\n{'='*60}")
  912. print(f"[{self.platform_name}] 获取作品列表")
  913. print(f"[{self.platform_name}] page={page}, page_size={page_size}")
  914. print(f"{'='*60}")
  915. works: List[WorkItem] = []
  916. total = 0
  917. has_more = False
  918. try:
  919. await self.init_browser()
  920. cookie_list = self.parse_cookies(cookies)
  921. await self.set_cookies(cookie_list)
  922. if not self.page:
  923. raise Exception("Page not initialized")
  924. # 访问视频号创作者中心
  925. await self.page.goto("https://channels.weixin.qq.com/platform/post/list")
  926. await asyncio.sleep(5)
  927. print(f"1111111111111111")
  928. # 检查登录状态
  929. current_url = self.page.url
  930. if "login" in current_url:
  931. print(f"2111111111111111")
  932. raise Exception("Cookie 已过期,请重新登录")
  933. # 视频号使用页面爬取方式获取作品列表
  934. # 等待作品列表加载(增加等待时间,并添加截图调试)
  935. try:
  936. await self.page.wait_for_selector('div.post-feed-item', timeout=15000)
  937. except:
  938. # 超时后打印当前 URL 和截图
  939. current_url = self.page.url
  940. print(f"[{self.platform_name}] 等待超时,当前 URL: {current_url}")
  941. # 截图保存
  942. screenshot_path = f"weixin_timeout_{int(asyncio.get_event_loop().time())}.png"
  943. await self.page.screenshot(path=screenshot_path)
  944. print(f"[{self.platform_name}] 截图已保存: {screenshot_path}")
  945. raise Exception(f"页面加载超时,当前 URL: {current_url}")
  946. # 打印 DOM 结构
  947. page_html = await self.page.content()
  948. print(f"[{self.platform_name}] ========== 页面 DOM 开始 ==========")
  949. print(page_html[:5000]) # 打印前5000个字符
  950. print(f"[{self.platform_name}] ========== 页面 DOM 结束 ==========")
  951. # 获取所有作品项
  952. post_items = self.page.locator('div.post-feed-item')
  953. item_count = await post_items.count()
  954. print(f"[{self.platform_name}] 找到 {item_count} 个作品项")
  955. for i in range(min(item_count, page_size)):
  956. try:
  957. item = post_items.nth(i)
  958. # 获取封面
  959. cover_el = item.locator('div.media img.thumb').first
  960. cover_url = ''
  961. if await cover_el.count() > 0:
  962. cover_url = await cover_el.get_attribute('src') or ''
  963. # 获取标题
  964. title_el = item.locator('div.post-title').first
  965. title = ''
  966. if await title_el.count() > 0:
  967. title = await title_el.text_content() or ''
  968. title = title.strip()
  969. # 获取发布时间
  970. time_el = item.locator('div.post-time span').first
  971. publish_time = ''
  972. if await time_el.count() > 0:
  973. publish_time = await time_el.text_content() or ''
  974. publish_time = publish_time.strip()
  975. # 获取统计数据
  976. import re
  977. data_items = item.locator('div.post-data div.data-item')
  978. data_count = await data_items.count()
  979. play_count = 0
  980. like_count = 0
  981. comment_count = 0
  982. share_count = 0
  983. collect_count = 0
  984. for j in range(data_count):
  985. data_item = data_items.nth(j)
  986. count_text = await data_item.locator('span.count').text_content() or '0'
  987. count_text = count_text.strip()
  988. # 判断图标类型
  989. if await data_item.locator('span.weui-icon-outlined-eyes-on').count() > 0:
  990. # 播放量
  991. play_count = self._parse_count(count_text)
  992. elif await data_item.locator('span.weui-icon-outlined-like').count() > 0:
  993. # 点赞
  994. like_count = self._parse_count(count_text)
  995. elif await data_item.locator('span.weui-icon-outlined-comment').count() > 0:
  996. # 评论
  997. comment_count = self._parse_count(count_text)
  998. elif await data_item.locator('use[xlink\\:href="#icon-share"]').count() > 0:
  999. # 分享
  1000. share_count = self._parse_count(count_text)
  1001. elif await data_item.locator('use[xlink\\:href="#icon-thumb"]').count() > 0:
  1002. # 收藏
  1003. collect_count = self._parse_count(count_text)
  1004. # 生成临时 work_id
  1005. work_id = f"weixin_{i}_{hash(title)}_{hash(publish_time)}"
  1006. works.append(WorkItem(
  1007. work_id=work_id,
  1008. title=title or '无标题',
  1009. cover_url=cover_url,
  1010. duration=0,
  1011. status='published',
  1012. publish_time=publish_time,
  1013. play_count=play_count,
  1014. like_count=like_count,
  1015. comment_count=comment_count,
  1016. share_count=share_count,
  1017. collect_count=collect_count,
  1018. ))
  1019. except Exception as e:
  1020. print(f"[{self.platform_name}] 解析作品 {i} 失败: {e}")
  1021. import traceback
  1022. traceback.print_exc()
  1023. continue
  1024. total = len(works)
  1025. has_more = item_count > page_size
  1026. print(f"[{self.platform_name}] 获取到 {total} 个作品")
  1027. except Exception as e:
  1028. import traceback
  1029. traceback.print_exc()
  1030. return WorksResult(success=False, platform=self.platform_name, error=str(e))
  1031. return WorksResult(success=True, platform=self.platform_name, works=works, total=total, has_more=has_more)
  1032. async def get_comments(self, cookies: str, work_id: str, cursor: str = "") -> CommentsResult:
  1033. """获取视频号作品评论"""
  1034. print(f"\n{'='*60}")
  1035. print(f"[{self.platform_name}] 获取作品评论")
  1036. print(f"[{self.platform_name}] work_id={work_id}")
  1037. print(f"{'='*60}")
  1038. comments: List[CommentItem] = []
  1039. total = 0
  1040. has_more = False
  1041. try:
  1042. await self.init_browser()
  1043. cookie_list = self.parse_cookies(cookies)
  1044. await self.set_cookies(cookie_list)
  1045. if not self.page:
  1046. raise Exception("Page not initialized")
  1047. # 访问评论管理页面
  1048. await self.page.goto("https://channels.weixin.qq.com/platform/interaction/comment")
  1049. await asyncio.sleep(3)
  1050. # 检查登录状态
  1051. current_url = self.page.url
  1052. if "login" in current_url:
  1053. raise Exception("Cookie 已过期,请重新登录")
  1054. # 等待左侧作品列表加载
  1055. try:
  1056. await self.page.wait_for_selector('div.comment-feed-wrap', timeout=15000)
  1057. except:
  1058. print(f"[{self.platform_name}] 未找到作品列表")
  1059. return CommentsResult(success=True, platform=self.platform_name, work_id=work_id, comments=[], total=0, has_more=False)
  1060. print(f"[{self.platform_name}] 查找 work_id={work_id} 对应的作品")
  1061. # 点击左侧作品项(根据 work_id 匹配)
  1062. feed_items = self.page.locator('div.comment-feed-wrap')
  1063. item_count = await feed_items.count()
  1064. print(f"[{self.platform_name}] 左侧共 {item_count} 个作品")
  1065. clicked = False
  1066. for i in range(item_count):
  1067. feed = feed_items.nth(i)
  1068. title_el = feed.locator('div.feed-title').first
  1069. if await title_el.count() > 0:
  1070. title_text = await title_el.text_content() or ''
  1071. title_text = title_text.strip()
  1072. # 检查是否包含 work_id(标题)
  1073. if work_id in title_text or title_text in work_id:
  1074. print(f"[{self.platform_name}] 找到匹配作品: {title_text}")
  1075. await feed.click()
  1076. await asyncio.sleep(2)
  1077. clicked = True
  1078. break
  1079. if not clicked:
  1080. # 如果没找到匹配的,点击第一个
  1081. print(f"[{self.platform_name}] 未找到匹配作品,点击第一个")
  1082. if item_count > 0:
  1083. await feed_items.nth(0).click()
  1084. await asyncio.sleep(2)
  1085. else:
  1086. return CommentsResult(success=True, platform=self.platform_name, work_id=work_id, comments=[], total=0, has_more=False)
  1087. # 等待右侧评论详情加载
  1088. try:
  1089. await self.page.wait_for_selector('div.comment-item', timeout=5000)
  1090. except:
  1091. print(f"[{self.platform_name}] 该作品暂无评论")
  1092. return CommentsResult(success=True, platform=self.platform_name, work_id=work_id, comments=[], total=0, has_more=False)
  1093. # 获取评论总数
  1094. total_text_el = self.page.locator('div.comment-count__tips')
  1095. if await total_text_el.count() > 0:
  1096. total_text = await total_text_el.text_content() or ''
  1097. # 提取数字(如 "共 1 条评论")
  1098. import re
  1099. match = re.search(r'(\d+)', total_text)
  1100. if match:
  1101. total = int(match.group(1))
  1102. print(f"[{self.platform_name}] 评论总数: {total}")
  1103. # 获取右侧评论列表
  1104. comment_items = self.page.locator('div.comment-item')
  1105. item_count = await comment_items.count()
  1106. print(f"[{self.platform_name}] 当前加载 {item_count} 条评论")
  1107. for i in range(item_count):
  1108. try:
  1109. item = comment_items.nth(i)
  1110. # 获取作者昵称(加 .first 防 strict mode)
  1111. author_name = ''
  1112. name_el = item.locator('span.comment-user-name').first
  1113. if await name_el.count() > 0:
  1114. author_name = await name_el.text_content() or ''
  1115. author_name = author_name.strip()
  1116. # 获取头像
  1117. author_avatar = ''
  1118. avatar_el = item.locator('img.comment-avatar').first
  1119. if await avatar_el.count() > 0:
  1120. author_avatar = await avatar_el.get_attribute('src') or ''
  1121. # 获取评论内容(加 .first 防 strict mode)
  1122. content = ''
  1123. content_el = item.locator('span.comment-content').first
  1124. if await content_el.count() > 0:
  1125. content = await content_el.text_content() or ''
  1126. content = content.strip()
  1127. # 获取评论时间(加 .first 防 strict mode)
  1128. create_time = ''
  1129. time_el = item.locator('span.comment-time').first
  1130. if await time_el.count() > 0:
  1131. create_time = await time_el.text_content() or ''
  1132. create_time = create_time.strip()
  1133. if not content:
  1134. continue
  1135. # 生成评论 ID
  1136. comment_id = f"weixin_comment_{i}_{abs(hash(content))}"
  1137. comments.append(CommentItem(
  1138. comment_id=comment_id,
  1139. work_id=work_id,
  1140. content=content,
  1141. author_id='',
  1142. author_name=author_name,
  1143. author_avatar=author_avatar,
  1144. like_count=0,
  1145. reply_count=0,
  1146. create_time=create_time,
  1147. ))
  1148. print(f"[{self.platform_name}] 评论 {i+1}: {author_name} - {content[:20]}...")
  1149. except Exception as e:
  1150. print(f"[{self.platform_name}] 解析评论 {i} 失败: {e}")
  1151. continue
  1152. print(f"[{self.platform_name}] 成功获取 {len(comments)} 条评论")
  1153. except Exception as e:
  1154. import traceback
  1155. traceback.print_exc()
  1156. return CommentsResult(success=False, platform=self.platform_name, work_id=work_id, error=str(e))
  1157. return CommentsResult(success=True, platform=self.platform_name, work_id=work_id, comments=comments, total=total, has_more=has_more)
  1158. async def auto_reply_private_messages(self, cookies: str) -> dict:
  1159. """自动回复私信 - 集成自 pw3.py"""
  1160. print(f"\n{'='*60}")
  1161. print(f"[{self.platform_name}] 开始自动回复私信")
  1162. print(f"{'='*60}")
  1163. try:
  1164. await self.init_browser()
  1165. cookie_list = self.parse_cookies(cookies)
  1166. await self.set_cookies(cookie_list)
  1167. if not self.page:
  1168. raise Exception("Page not initialized")
  1169. # 访问私信页面
  1170. await self.page.goto("https://channels.weixin.qq.com/platform/private_msg", timeout=30000)
  1171. await asyncio.sleep(3)
  1172. # 检查登录状态
  1173. current_url = self.page.url
  1174. print(f"[{self.platform_name}] 当前 URL: {current_url}")
  1175. if "login" in current_url:
  1176. raise Exception("Cookie 已过期,请重新登录")
  1177. # 等待私信页面加载(使用多个选择器容错)
  1178. try:
  1179. await self.page.wait_for_selector('.private-msg-list-header', timeout=15000)
  1180. except:
  1181. # 尝试其他选择器
  1182. try:
  1183. await self.page.wait_for_selector('.weui-desktop-tab__navs__inner', timeout=10000)
  1184. print(f"[{self.platform_name}] 使用备用选择器加载成功")
  1185. except:
  1186. # 截图调试
  1187. screenshot_path = f"weixin_private_msg_{int(asyncio.get_event_loop().time())}.png"
  1188. await self.page.screenshot(path=screenshot_path)
  1189. print(f"[{self.platform_name}] 页面加载失败,截图: {screenshot_path}")
  1190. raise Exception(f"私信页面加载超时,当前 URL: {current_url}")
  1191. print(f"[{self.platform_name}] 私信页面加载完成")
  1192. # 处理两个 tab
  1193. total_replied = 0
  1194. for tab_name in ["打招呼消息", "私信"]:
  1195. replied_count = await self._process_tab_sessions(tab_name)
  1196. total_replied += replied_count
  1197. print(f"[{self.platform_name}] 自动回复完成,共回复 {total_replied} 条消息")
  1198. return {
  1199. 'success': True,
  1200. 'platform': self.platform_name,
  1201. 'replied_count': total_replied,
  1202. 'message': f'成功回复 {total_replied} 条私信'
  1203. }
  1204. except Exception as e:
  1205. import traceback
  1206. traceback.print_exc()
  1207. return {
  1208. 'success': False,
  1209. 'platform': self.platform_name,
  1210. 'error': str(e)
  1211. }
  1212. async def _process_tab_sessions(self, tab_name: str) -> int:
  1213. """处理指定 tab 下的所有会话"""
  1214. print(f"\n🔄 正在处理「{tab_name}」中的所有会话...")
  1215. if not self.page:
  1216. return 0
  1217. replied_count = 0
  1218. try:
  1219. # 点击 tab
  1220. if tab_name == "私信":
  1221. tab_link = self.page.locator('.weui-desktop-tab__navs__inner li').first.locator('a')
  1222. elif tab_name == "打招呼消息":
  1223. tab_link = self.page.locator('.weui-desktop-tab__navs__inner li').nth(1).locator('a')
  1224. else:
  1225. return 0
  1226. if await tab_link.is_visible():
  1227. await tab_link.click()
  1228. print(f" ➤ 已点击「{tab_name}」tab")
  1229. else:
  1230. print(f" ❌ 「{tab_name}」tab 不可见")
  1231. return 0
  1232. # 等待会话列表加载
  1233. try:
  1234. await self.page.wait_for_function("""
  1235. () => {
  1236. const hasSession = document.querySelectorAll('.session-wrap').length > 0;
  1237. const hasEmpty = !!document.querySelector('.empty-text');
  1238. return hasSession || hasEmpty;
  1239. }
  1240. """, timeout=8000)
  1241. print(" ✅ 会话列表区域已加载")
  1242. except:
  1243. print(" ⚠️ 等待会话列表超时,继续尝试读取...")
  1244. # 获取会话
  1245. session_wraps = self.page.locator('.session-wrap')
  1246. session_count = await session_wraps.count()
  1247. print(f" 💬 共找到 {session_count} 个会话")
  1248. if session_count == 0:
  1249. return 0
  1250. # 遍历每个会话
  1251. for idx in range(session_count):
  1252. try:
  1253. current_sessions = self.page.locator('.session-wrap')
  1254. if idx >= await current_sessions.count():
  1255. break
  1256. session = current_sessions.nth(idx)
  1257. user_name = await session.locator('.name').inner_text()
  1258. last_preview = await session.locator('.feed-info').inner_text()
  1259. print(f"\n ➤ [{idx+1}/{session_count}] 正在处理: {user_name} | 最后消息: {last_preview}")
  1260. await session.click()
  1261. await asyncio.sleep(2)
  1262. # 提取聊天历史
  1263. history = await self._extract_chat_history()
  1264. need_reply = (not history) or (not history[-1]["is_author"])
  1265. if need_reply:
  1266. reply_text = await self._generate_reply_with_ai(history)
  1267. if reply_text=="":
  1268. reply_text = self._generate_reply(history)
  1269. # # 生成回复
  1270. # if history and history[-1]["is_author"]:
  1271. # reply_text = await self._generate_reply_with_ai(history)
  1272. # else:
  1273. # reply_text = self._generate_reply(history)
  1274. if reply_text:
  1275. print(f" 📝 回复内容: {reply_text}")
  1276. try:
  1277. textarea = self.page.locator('.edit_area').first
  1278. send_btn = self.page.locator('button:has-text("发送")').first
  1279. if await textarea.is_visible() and await send_btn.is_visible():
  1280. await textarea.fill(reply_text)
  1281. await asyncio.sleep(0.5)
  1282. await send_btn.click()
  1283. print(" ✅ 已发送")
  1284. replied_count += 1
  1285. await asyncio.sleep(1.5)
  1286. else:
  1287. print(" ❌ 输入框或发送按钮不可见")
  1288. except Exception as e:
  1289. print(f" ❌ 发送失败: {e}")
  1290. else:
  1291. print(" ➤ 无需回复")
  1292. else:
  1293. print(" ➤ 最后一条是我发的,跳过回复")
  1294. except Exception as e:
  1295. print(f" ❌ 处理会话 {idx+1} 时出错: {e}")
  1296. continue
  1297. except Exception as e:
  1298. print(f"❌ 处理「{tab_name}」失败: {e}")
  1299. return replied_count
  1300. async def _extract_chat_history(self) -> list:
  1301. """精准提取聊天记录,区分作者(自己)和用户"""
  1302. if not self.page:
  1303. return []
  1304. history = []
  1305. message_wrappers = self.page.locator('.session-content-wrapper > div:not(.footer) > .text-wrapper')
  1306. count = await message_wrappers.count()
  1307. for i in range(count):
  1308. try:
  1309. wrapper = message_wrappers.nth(i)
  1310. # 判断方向
  1311. is_right = await wrapper.locator('.content-right').count() > 0
  1312. is_left = await wrapper.locator('.content-left').count() > 0
  1313. if not (is_left or is_right):
  1314. continue
  1315. # 提取消息文本
  1316. pre_el = wrapper.locator('pre.message-plain')
  1317. content = ''
  1318. if await pre_el.count() > 0:
  1319. content = await pre_el.inner_text()
  1320. content = content.strip()
  1321. if not content:
  1322. continue
  1323. # 获取头像
  1324. avatar_img = wrapper.locator('.avatar').first
  1325. avatar_src = ''
  1326. if await avatar_img.count() > 0:
  1327. avatar_src = await avatar_img.get_attribute("src") or ''
  1328. # 右侧 = 作者(自己)
  1329. is_author = is_right
  1330. # 获取用户名
  1331. if is_left:
  1332. name_el = wrapper.locator('.profile .name')
  1333. author_name = '用户'
  1334. if await name_el.count() > 0:
  1335. author_name = await name_el.inner_text()
  1336. else:
  1337. author_name = "我"
  1338. history.append({
  1339. "author": author_name,
  1340. "content": content,
  1341. "is_author": is_author,
  1342. "avatar": avatar_src
  1343. })
  1344. except Exception as e:
  1345. print(f" ⚠️ 解析第 {i+1} 条消息失败: {e}")
  1346. continue
  1347. return history
  1348. async def _generate_reply_with_ai(self, chat_history: list) -> str:
  1349. """使用 AI 生成智能回复"""
  1350. import requests
  1351. import json
  1352. try:
  1353. # 获取 AI 配置
  1354. ai_api_key = os.environ.get('DASHSCOPE_API_KEY', '')
  1355. ai_base_url = os.environ.get('DASHSCOPE_BASE_URL', 'https://dashscope.aliyuncs.com/compatible-mode/v1')
  1356. ai_model = os.environ.get('AI_MODEL', 'qwen-plus')
  1357. if not ai_api_key:
  1358. print("⚠️ 未配置 AI API Key,使用规则回复")
  1359. return self._generate_reply(chat_history)
  1360. # 构建对话上下文
  1361. messages = [{"role": "system", "content": "你是一个友好的微信视频号创作者助手,负责回复粉丝私信。请保持简洁、友好、专业的语气。回复长度不超过20字。"}]
  1362. for msg in chat_history:
  1363. role = "assistant" if msg["is_author"] else "user"
  1364. messages.append({
  1365. "role": role,
  1366. "content": msg["content"]
  1367. })
  1368. # 调用 AI API
  1369. headers = {
  1370. 'Authorization': f'Bearer {ai_api_key}',
  1371. 'Content-Type': 'application/json'
  1372. }
  1373. payload = {
  1374. "model": ai_model,
  1375. "messages": messages,
  1376. "max_tokens": 150,
  1377. "temperature": 0.8
  1378. }
  1379. print(" 🤖 正在调用 AI 生成回复...")
  1380. response = requests.post(
  1381. f"{ai_base_url}/chat/completions",
  1382. headers=headers,
  1383. json=payload,
  1384. timeout=30
  1385. )
  1386. if response.status_code != 200:
  1387. print(f" ⚠️ AI API 返回错误 {response.status_code},使用规则回复")
  1388. return self._generate_reply(chat_history)
  1389. result = response.json()
  1390. ai_reply = result.get('choices', [{}])[0].get('message', {}).get('content', '').strip()
  1391. if ai_reply:
  1392. print(f" ✅ AI 生成回复: {ai_reply}")
  1393. return ai_reply
  1394. else:
  1395. print(" ⚠️ AI 返回空内容,使用规则回复")
  1396. return self._generate_reply(chat_history)
  1397. except Exception as e:
  1398. print(f" ⚠️ AI 回复生成失败: {e},使用规则回复")
  1399. return self._generate_reply(chat_history)
  1400. def _generate_reply(self, chat_history: list) -> str:
  1401. """根据完整聊天历史生成回复(规则回复方式)"""
  1402. if not chat_history:
  1403. return "你好!感谢联系~"
  1404. # 检查最后一条是否是作者发的
  1405. if chat_history[-1]["is_author"]:
  1406. return "" # 不回复
  1407. # 找最后一条用户消息
  1408. last_user_msg = chat_history[-1]["content"]
  1409. # 简单规则回复
  1410. if "谢谢" in last_user_msg or "感谢" in last_user_msg:
  1411. return "不客气!欢迎常来交流~"
  1412. elif "你好" in last_user_msg or "在吗" in last_user_msg:
  1413. return "你好!请问有什么可以帮您的?"
  1414. elif "视频" in last_user_msg or "怎么拍" in last_user_msg:
  1415. return "视频是用手机拍摄的,注意光线和稳定哦!"
  1416. else:
  1417. return "收到!我会认真阅读您的留言~"