weixin.py 132 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209
  1. # -*- coding: utf-8 -*-
  2. """
  3. 微信视频号发布器
  4. 参考: matrix/tencent_uploader/main.py
  5. """
  6. import asyncio
  7. import json
  8. import os
  9. from datetime import datetime
  10. from typing import List
  11. from .base import (
  12. BasePublisher,
  13. PublishParams,
  14. PublishResult,
  15. WorkItem,
  16. WorksResult,
  17. CommentItem,
  18. CommentsResult,
  19. )
  20. import os
  21. import time
  22. # 允许通过环境变量手动指定“上传视频入口”的选择器,便于在页面结构频繁变更时快速调整
  23. WEIXIN_UPLOAD_SELECTOR = os.environ.get("WEIXIN_UPLOAD_SELECTOR", "").strip()
  24. # 代理下视频上传持续失败时,可设 WEIXIN_UPLOAD_BYPASS_PROXY=1
  25. # 仅对上传 CDN 直连,其余页面仍走代理(解决大文件经代理易「网络出错」)
  26. WEIXIN_UPLOAD_BYPASS_PROXY = os.environ.get(
  27. "WEIXIN_UPLOAD_BYPASS_PROXY", "0"
  28. ).strip() in ("1", "true", "yes")
  29. def format_short_title(origin_title: str) -> str:
  30. """
  31. 格式化短标题
  32. - 移除特殊字符
  33. - 长度限制在 6-16 字符
  34. """
  35. allowed_special_chars = "《》:+?%°"
  36. filtered_chars = [
  37. char
  38. if char.isalnum() or char in allowed_special_chars
  39. else " "
  40. if char == ","
  41. else ""
  42. for char in origin_title
  43. ]
  44. formatted_string = "".join(filtered_chars)
  45. if len(formatted_string) > 16:
  46. formatted_string = formatted_string[:16]
  47. elif len(formatted_string) < 6:
  48. formatted_string += " " * (6 - len(formatted_string))
  49. return formatted_string
  50. class WeixinPublisher(BasePublisher):
  51. """
  52. 微信视频号发布器
  53. 使用 Playwright 自动化操作视频号创作者中心
  54. 注意: 需要使用 Chrome 浏览器,否则可能出现 H264 编码错误
  55. """
  56. platform_name = "weixin"
  57. login_url = "https://channels.weixin.qq.com/platform"
  58. publish_url = "https://channels.weixin.qq.com/platform/post/create"
  59. cookie_domain = ".weixin.qq.com"
  60. def _parse_count(self, count_str: str) -> int:
  61. """解析数字(支持带'万'的格式)"""
  62. try:
  63. count_str = count_str.strip()
  64. if "万" in count_str:
  65. return int(float(count_str.replace("万", "")) * 10000)
  66. return int(count_str)
  67. except:
  68. return 0
  69. async def ai_find_upload_selector(
  70. self, frame_html: str, frame_name: str = "main"
  71. ) -> str:
  72. """
  73. 使用 AI 从 HTML 中识别“上传视频/选择文件”相关元素的 CSS 选择器。
  74. 设计思路:
  75. - 仅在常规 DOM 选择器都失败时调用,避免频繁占用 AI 配额;
  76. - 通过 DashScope 文本模型(与验证码识别同一套配置)分析 HTML;
  77. - 返回一个适合用于 frame.locator(selector) 的 CSS 选择器。
  78. """
  79. import json
  80. import re
  81. import requests
  82. import os
  83. # 避免 HTML 过长导致 token 超限,只截取前 N 字符
  84. if not frame_html:
  85. return ""
  86. max_len = 20000
  87. if len(frame_html) > max_len:
  88. frame_html = frame_html[:max_len]
  89. ai_api_key = os.environ.get("DASHSCOPE_API_KEY", "")
  90. ai_base_url = os.environ.get(
  91. "DASHSCOPE_BASE_URL", "https://dashscope.aliyuncs.com/compatible-mode/v1"
  92. )
  93. ai_text_model = os.environ.get("AI_TEXT_MODEL", "qwen-plus")
  94. if not ai_api_key:
  95. print(f"[{self.platform_name}] AI上传入口识别: 未配置 AI API Key,跳过")
  96. return ""
  97. prompt = f"""
  98. 你是熟悉微信视频号后台的前端工程师,现在需要在一段 HTML 中找到“上传视频文件”的入口。
  99. 页面说明:
  100. - 平台:微信视频号(channels.weixin.qq.com)
  101. - 目标:用于上传视频文件的按钮或 input(一般会触发文件选择框)
  102. - 你会收到某个 frame 的完整 HTML 片段(不包含截图)。
  103. 请你根据下面的 HTML,推断最适合用于上传视频文件的元素,并输出一个可以被 Playwright 使用的 CSS 选择器。
  104. 要求:
  105. 1. 只考虑“上传/选择视频文件”的入口,不要返回“发布/发表/下一步”等按钮;
  106. 2. 选择器需要尽量稳定,不要使用自动生成的随机类名(例如带很多随机字母/数字的类名可以用前缀匹配);
  107. 3. 选择器必须是 CSS 选择器(不要返回 XPath);
  108. 4. 如果确实找不到合理的上传入口,返回 selector 为空字符串。
  109. 请以 JSON 格式输出,严格遵守以下结构(不要添加任何解释文字):
  110. ```json
  111. {{
  112. "selector": "CSS 选择器字符串,比如:input[type='file'] 或 div.upload-content input[type='file']"
  113. }}
  114. ```
  115. 下面是 frame=\"{frame_name}\" 的 HTML:
  116. ```html
  117. {frame_html}
  118. ```"""
  119. payload = {
  120. "model": ai_text_model,
  121. "messages": [
  122. {
  123. "role": "user",
  124. "content": prompt,
  125. }
  126. ],
  127. "max_tokens": 600,
  128. }
  129. headers = {
  130. "Authorization": f"Bearer {ai_api_key}",
  131. "Content-Type": "application/json",
  132. }
  133. try:
  134. print(
  135. f"[{self.platform_name}] AI上传入口识别: 正在分析 frame={frame_name} HTML..."
  136. )
  137. resp = requests.post(
  138. f"{ai_base_url}/chat/completions",
  139. headers=headers,
  140. json=payload,
  141. timeout=40,
  142. )
  143. if resp.status_code != 200:
  144. print(
  145. f"[{self.platform_name}] AI上传入口识别: API 返回错误 {resp.status_code}"
  146. )
  147. return ""
  148. data = resp.json()
  149. content = (
  150. data.get("choices", [{}])[0].get("message", {}).get("content", "") or ""
  151. )
  152. # 尝试从 ```json``` 代码块中解析
  153. json_match = re.search(r"```json\\s*([\\s\\S]*?)\\s*```", content)
  154. if json_match:
  155. json_str = json_match.group(1)
  156. else:
  157. json_match = re.search(r"\\{[\\s\\S]*\\}", content)
  158. json_str = json_match.group(0) if json_match else "{}"
  159. try:
  160. result = json.loads(json_str)
  161. except Exception:
  162. result = {}
  163. selector = (result.get("selector") or "").strip()
  164. print(f"[{self.platform_name}] AI上传入口识别结果: selector='{selector}'")
  165. return selector
  166. except Exception as e:
  167. print(f"[{self.platform_name}] AI上传入口识别异常: {e}")
  168. return ""
  169. async def ai_pick_selector_from_candidates(
  170. self, candidates: list, goal: str, frame_name: str = "main"
  171. ) -> str:
  172. """
  173. 将“候选元素列表(包含 css selector + 文本/属性)”发给 AI,让 AI 直接挑选最符合 goal 的元素。
  174. 适用于:HTML 里看不出上传入口、或页面大量动态渲染时。
  175. """
  176. import json
  177. import re
  178. import requests
  179. import os
  180. if not candidates:
  181. return ""
  182. ai_api_key = os.environ.get("DASHSCOPE_API_KEY", "")
  183. ai_base_url = os.environ.get(
  184. "DASHSCOPE_BASE_URL", "https://dashscope.aliyuncs.com/compatible-mode/v1"
  185. )
  186. ai_text_model = os.environ.get("AI_TEXT_MODEL", "qwen-plus")
  187. if not ai_api_key:
  188. print(f"[{self.platform_name}] AI候选选择器: 未配置 AI API Key,跳过")
  189. return ""
  190. # 控制长度,最多取前 120 个候选
  191. candidates = candidates[:120]
  192. prompt = f"""
  193. 你是自动化发布工程师。现在要在微信视频号(channels.weixin.qq.com)发布页面里找到“{goal}”相关的入口元素。
  194. 我会给你一组候选元素,每个候选都包含:
  195. - css: 可直接用于 Playwright 的 CSS 选择器
  196. - tag / type / role / ariaLabel / text / id / className(部分字段可能为空)
  197. 你的任务:
  198. - 从候选中选出最可能用于“{goal}”的元素,返回它的 css 选择器;
  199. - 如果没有任何候选符合,返回空字符串。
  200. 注意:
  201. - 如果 goal 是“上传视频入口”,优先选择 input[type=file] 或看起来会触发选择文件/上传的区域;
  202. - 不要选择“发布/发表/下一步”等按钮(除非 goal 明确是发布按钮)。
  203. 请严格按 JSON 输出(不要解释):
  204. ```json
  205. {{ "selector": "..." }}
  206. ```
  207. 候选列表(frame={frame_name}):
  208. ```json
  209. {json.dumps(candidates, ensure_ascii=False)}
  210. ```"""
  211. payload = {
  212. "model": ai_text_model,
  213. "messages": [{"role": "user", "content": prompt}],
  214. "max_tokens": 400,
  215. }
  216. headers = {
  217. "Authorization": f"Bearer {ai_api_key}",
  218. "Content-Type": "application/json",
  219. }
  220. try:
  221. print(
  222. f"[{self.platform_name}] AI候选选择器: 正在分析 frame={frame_name}, goal={goal} ..."
  223. )
  224. resp = requests.post(
  225. f"{ai_base_url}/chat/completions",
  226. headers=headers,
  227. json=payload,
  228. timeout=40,
  229. )
  230. if resp.status_code != 200:
  231. print(
  232. f"[{self.platform_name}] AI候选选择器: API 返回错误 {resp.status_code}"
  233. )
  234. return ""
  235. data = resp.json()
  236. content = (
  237. data.get("choices", [{}])[0].get("message", {}).get("content", "") or ""
  238. )
  239. json_match = re.search(r"```json\\s*([\\s\\S]*?)\\s*```", content)
  240. if json_match:
  241. json_str = json_match.group(1)
  242. else:
  243. json_match = re.search(r"\\{[\\s\\S]*\\}", content)
  244. json_str = json_match.group(0) if json_match else "{}"
  245. try:
  246. result = json.loads(json_str)
  247. except Exception:
  248. result = {}
  249. selector = (result.get("selector") or "").strip()
  250. print(f"[{self.platform_name}] AI候选选择器结果: selector='{selector}'")
  251. return selector
  252. except Exception as e:
  253. print(f"[{self.platform_name}] AI候选选择器异常: {e}")
  254. return ""
  255. async def _extract_relevant_html_snippets(self, html: str) -> str:
  256. """
  257. 从 HTML 中抽取与上传相关的片段,减少 token,提升 AI 命中率。
  258. - 优先抓取包含 upload/上传/file/input 等关键词的窗口片段
  259. - 若未命中关键词,返回“开头 + 结尾”的拼接
  260. """
  261. import re
  262. if not html:
  263. return ""
  264. patterns = [
  265. r"upload",
  266. r"uploader",
  267. r"file",
  268. r"type\\s*=\\s*['\\\"]file['\\\"]",
  269. r"input",
  270. r"drag",
  271. r"drop",
  272. r"选择",
  273. r"上传",
  274. r"添加",
  275. r"视频",
  276. ]
  277. regex = re.compile("|".join(patterns), re.IGNORECASE)
  278. snippets = []
  279. for m in regex.finditer(html):
  280. start = max(0, m.start() - 350)
  281. end = min(len(html), m.end() + 350)
  282. snippets.append(html[start:end])
  283. if len(snippets) >= 18:
  284. break
  285. if snippets:
  286. # 去重(粗略)
  287. unique = []
  288. seen = set()
  289. for s in snippets:
  290. key = hash(s)
  291. if key not in seen:
  292. seen.add(key)
  293. unique.append(s)
  294. return "\n\n<!-- SNIPPET -->\n\n".join(unique)[:20000]
  295. # fallback: head + tail
  296. head = html[:9000]
  297. tail = html[-9000:] if len(html) > 9000 else ""
  298. return (head + "\n\n<!-- TAIL -->\n\n" + tail)[:20000]
  299. async def init_browser(self, storage_state: str = None):
  300. """
  301. 初始化浏览器 - 参考 matrix 使用 channel=chrome 避免 H264 编码错误
  302. 重要:如果配置了代理,全程都会使用代理(包括页面访问和视频上传)
  303. """
  304. from playwright.async_api import async_playwright
  305. playwright = await async_playwright().start()
  306. proxy = (
  307. self.proxy_config
  308. if isinstance(getattr(self, "proxy_config", None), dict)
  309. else None
  310. )
  311. if proxy and proxy.get("server"):
  312. # 启用上传 bypass 时:仅对上传 CDN 直连,其余仍走代理
  313. if WEIXIN_UPLOAD_BYPASS_PROXY:
  314. bypass = "findeross.weixin.qq.com,upload.weixin.qq.com,*.cos.qq.com,*.myqcloud.com,*.tencentcloudapi.com"
  315. proxy = dict(proxy)
  316. proxy["bypass"] = bypass
  317. print(
  318. f"[{self.platform_name}] 使用代理(上传 CDN 直连): {proxy.get('server')}",
  319. flush=True,
  320. )
  321. print(
  322. f"[{self.platform_name}] 💡 页面走代理,视频上传 CDN 直连,避免大文件经代理失败",
  323. flush=True,
  324. )
  325. else:
  326. print(
  327. f"[{self.platform_name}] 使用代理(全程): {proxy.get('server')}",
  328. flush=True,
  329. )
  330. print(
  331. f"[{self.platform_name}] 💡 页面访问和视频上传都将通过代理",
  332. flush=True,
  333. )
  334. # 参考 matrix: 使用系统内的 Chrome 浏览器,避免 H264 编码错误
  335. launch_opts = {"headless": self.headless}
  336. if not self.headless:
  337. launch_opts["slow_mo"] = 400
  338. print(
  339. f"[{self.platform_name}] 有头模式 + slow_mo=400ms,浏览器将可见",
  340. flush=True,
  341. )
  342. try:
  343. launch_opts["channel"] = "chrome"
  344. if proxy and proxy.get("server"):
  345. launch_opts["proxy"] = proxy
  346. # 代理下大文件上传优化:禁用 QUIC,部分代理对 QUIC 支持不佳易导致连接中断
  347. launch_opts.setdefault("args", []).append("--disable-quic")
  348. self.browser = await playwright.chromium.launch(**launch_opts)
  349. mode = "代理模式" if proxy else "直连模式"
  350. print(
  351. f"[{self.platform_name}] 使用系统 Chrome 浏览器({mode})", flush=True
  352. )
  353. except Exception as e:
  354. print(
  355. f"[{self.platform_name}] Chrome 不可用,使用 Chromium: {e}", flush=True
  356. )
  357. if "channel" in launch_opts:
  358. del launch_opts["channel"]
  359. if proxy and proxy.get("server"):
  360. launch_opts["proxy"] = proxy
  361. if "--disable-quic" not in (launch_opts.get("args") or []):
  362. launch_opts.setdefault("args", []).append("--disable-quic")
  363. self.browser = await playwright.chromium.launch(**launch_opts)
  364. # 设置 HTTP Headers
  365. headers = {
  366. "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
  367. "Referer": "https://channels.weixin.qq.com/platform/post/list",
  368. }
  369. self.context = await self.browser.new_context(
  370. extra_http_headers=headers,
  371. ignore_https_errors=True,
  372. viewport={"width": 1920, "height": 1080},
  373. user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
  374. )
  375. self.page = await self.context.new_page()
  376. # 注入反检测脚本
  377. if hasattr(self, "inject_stealth_if_available"):
  378. await self.inject_stealth_if_available()
  379. return self.page
  380. async def set_schedule_time(self, publish_date: datetime):
  381. """设置定时发布"""
  382. if not self.page:
  383. return
  384. print(f"[{self.platform_name}] 设置定时发布...")
  385. # 点击定时选项
  386. label_element = self.page.locator("label").filter(has_text="定时").nth(1)
  387. await label_element.click()
  388. # 选择日期
  389. await self.page.click('input[placeholder="请选择发表时间"]')
  390. publish_month = f"{publish_date.month:02d}"
  391. current_month = f"{publish_month}月"
  392. # 检查月份
  393. page_month = await self.page.inner_text(
  394. 'span.weui-desktop-picker__panel__label:has-text("月")'
  395. )
  396. if page_month != current_month:
  397. await self.page.click("button.weui-desktop-btn__icon__right")
  398. # 选择日期
  399. elements = await self.page.query_selector_all(
  400. "table.weui-desktop-picker__table a"
  401. )
  402. for element in elements:
  403. class_name = await element.evaluate("el => el.className")
  404. if "weui-desktop-picker__disabled" in class_name:
  405. continue
  406. text = await element.inner_text()
  407. if text.strip() == str(publish_date.day):
  408. await element.click()
  409. break
  410. # 输入时间
  411. await self.page.click('input[placeholder="请选择时间"]')
  412. await self.page.keyboard.press("Control+KeyA")
  413. await self.page.keyboard.type(str(publish_date.hour))
  414. # 点击其他地方确认
  415. await self.page.locator("div.input-editor").click()
  416. async def handle_upload_error(self, video_path: str):
  417. """处理上传错误(含代理下「网络出错」重试优化)"""
  418. if not self.page:
  419. return
  420. using_proxy = isinstance(
  421. getattr(self, "proxy_config", None), dict
  422. ) and self.proxy_config.get("server")
  423. # 代理模式下先等待,给代理/网络恢复时间,避免连续重试加剧失败
  424. if using_proxy:
  425. wait_sec = 25
  426. print(
  427. f"[{self.platform_name}] 代理模式:检测到上传错误,等待 {wait_sec} 秒后重试...",
  428. flush=True,
  429. )
  430. await asyncio.sleep(wait_sec)
  431. print(f"[{self.platform_name}] 视频出错了,重新上传中...")
  432. # 出错时先截一张当前页面的图,方便排查(代理问题、视频格式问题等)
  433. try:
  434. timestamp = int(time.time() * 1000)
  435. screenshot_dir = os.path.join(
  436. os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
  437. "screenshots",
  438. )
  439. os.makedirs(screenshot_dir, exist_ok=True)
  440. screenshot_path = os.path.join(
  441. screenshot_dir, f"weixin_upload_error_{timestamp}.png"
  442. )
  443. await self.page.screenshot(path=screenshot_path, full_page=True)
  444. print(
  445. f"[{self.platform_name}] 上传错误截图已保存: {screenshot_path}",
  446. flush=True,
  447. )
  448. except Exception as e:
  449. print(f"[{self.platform_name}] 保存上传错误截图失败: {e}", flush=True)
  450. # 删除出错的视频重新上传
  451. await self.page.locator(
  452. 'div.media-status-content div.tag-inner:has-text("删除")'
  453. ).click()
  454. await self.page.get_by_role("button", name="删除", exact=True).click()
  455. file_input = self.page.locator('input[type="file"]')
  456. await file_input.set_input_files(video_path)
  457. async def add_title_tags(self, params: PublishParams):
  458. """添加标题和话题"""
  459. if not self.page:
  460. return
  461. print(f"[{self.platform_name}] 开始添加标题: {params.title}", flush=True)
  462. await self.page.locator("div.input-editor").click()
  463. await self.page.keyboard.type(params.title)
  464. if params.tags:
  465. await self.page.keyboard.press("Enter")
  466. for tag in params.tags:
  467. await self.page.keyboard.type("#" + tag)
  468. await self.page.keyboard.press("Space")
  469. print(
  470. f"[{self.platform_name}] ✓ 成功添加标题和 {len(params.tags)} 个话题",
  471. flush=True,
  472. )
  473. # 🔧 设置位置(使用代理地区或默认位置)
  474. print(f"[{self.platform_name}] 准备设置位置: {params.location}", flush=True)
  475. if params.location:
  476. await self.set_location(params.location)
  477. else:
  478. print(f"[{self.platform_name}] ⚠️ 未设置位置,跳过", flush=True)
  479. async def set_location(self, location: str):
  480. """设置发布位置"""
  481. if not self.page or not location:
  482. return
  483. try:
  484. print(f"[{self.platform_name}] 正在设置位置: {location}", flush=True)
  485. # 等待页面稳定
  486. await asyncio.sleep(1)
  487. # 尝试多种方式找到位置设置元素
  488. location_selectors = [
  489. # 位置输入框
  490. 'input[placeholder*="位置"]',
  491. 'input[placeholder*="所在"]',
  492. 'input[placeholder*="地点"]',
  493. # 位置按钮
  494. 'div:has-text("所在位置")',
  495. 'div:has-text("添加位置")',
  496. 'span:has-text("位置")',
  497. ]
  498. location_element = None
  499. for selector in location_selectors:
  500. try:
  501. element = self.page.locator(selector).first
  502. if await element.count() > 0 and await element.is_visible():
  503. location_element = element
  504. print(
  505. f"[{self.platform_name}] 找到位置元素: {selector}",
  506. flush=True,
  507. )
  508. break
  509. except:
  510. continue
  511. if not location_element:
  512. print(f"[{self.platform_name}] 未找到位置设置元素,跳过", flush=True)
  513. return
  514. # 点击位置元素
  515. await location_element.click()
  516. await asyncio.sleep(1)
  517. # 查找位置输入框
  518. input_selectors = [
  519. 'input[placeholder*="搜索"]',
  520. 'input[placeholder*="输入"]',
  521. 'input[type="text"]',
  522. ]
  523. location_input = None
  524. for selector in input_selectors:
  525. try:
  526. element = self.page.locator(selector).first
  527. if await element.count() > 0 and await element.is_visible():
  528. location_input = element
  529. break
  530. except:
  531. continue
  532. if location_input:
  533. # 输入位置
  534. await location_input.fill(location)
  535. await asyncio.sleep(1)
  536. # 查找匹配的位置选项并点击
  537. try:
  538. # 等待位置建议出现
  539. await asyncio.sleep(1)
  540. # 查找包含位置文本的选项
  541. option = self.page.locator(f'text="{location}"').first
  542. if await option.count() > 0:
  543. await option.click()
  544. print(
  545. f"[{self.platform_name}] ✓ 位置设置成功: {location}",
  546. flush=True,
  547. )
  548. else:
  549. # 如果没有精确匹配,选择第一个建议
  550. first_option = self.page.locator(
  551. 'div[class*="location"] li, div[class*="suggest"] div'
  552. ).first
  553. if await first_option.count() > 0:
  554. await first_option.click()
  555. print(
  556. f"[{self.platform_name}] ✓ 位置已设置(自动选择)",
  557. flush=True,
  558. )
  559. except Exception as e:
  560. print(f"[{self.platform_name}] ⚠️ 选择位置失败: {e}", flush=True)
  561. # 按 Escape 关闭位置选择器
  562. await self.page.keyboard.press("Escape")
  563. else:
  564. print(f"[{self.platform_name}] 未找到位置输入框", flush=True)
  565. await self.page.keyboard.press("Escape")
  566. except Exception as e:
  567. print(f"[{self.platform_name}] 设置位置失败: {e}", flush=True)
  568. try:
  569. await self.page.keyboard.press("Escape")
  570. except:
  571. pass
  572. async def add_short_title(self):
  573. """添加短标题"""
  574. if not self.page:
  575. return
  576. try:
  577. short_title_element = (
  578. self.page.get_by_text("短标题", exact=True)
  579. .locator("..")
  580. .locator("xpath=following-sibling::div")
  581. .locator('span input[type="text"]')
  582. )
  583. if await short_title_element.count():
  584. # 获取已有内容作为短标题
  585. pass
  586. except:
  587. pass
  588. async def upload_cover(self, cover_path: str):
  589. """上传封面图"""
  590. if not self.page or not cover_path or not os.path.exists(cover_path):
  591. return
  592. try:
  593. await asyncio.sleep(2)
  594. preview_btn_info = await self.page.locator(
  595. 'div.finder-tag-wrap.btn:has-text("更换封面")'
  596. ).get_attribute("class")
  597. if "disabled" not in preview_btn_info:
  598. await self.page.locator(
  599. 'div.finder-tag-wrap.btn:has-text("更换封面")'
  600. ).click()
  601. await self.page.locator(
  602. "div.single-cover-uploader-wrap > div.wrap"
  603. ).hover()
  604. # 删除现有封面
  605. if await self.page.locator(".del-wrap > .svg-icon").count():
  606. await self.page.locator(".del-wrap > .svg-icon").click()
  607. # 上传新封面
  608. preview_div = self.page.locator(
  609. "div.single-cover-uploader-wrap > div.wrap"
  610. )
  611. async with self.page.expect_file_chooser() as fc_info:
  612. await preview_div.click()
  613. preview_chooser = await fc_info.value
  614. await preview_chooser.set_files(cover_path)
  615. await asyncio.sleep(2)
  616. await self.page.get_by_role("button", name="确定").click()
  617. await asyncio.sleep(1)
  618. await self.page.get_by_role("button", name="确认").click()
  619. print(f"[{self.platform_name}] 封面上传成功")
  620. except Exception as e:
  621. print(f"[{self.platform_name}] 封面上传失败: {e}")
  622. async def check_captcha(self) -> dict:
  623. """检查页面是否需要验证码"""
  624. if not self.page:
  625. return {"need_captcha": False, "captcha_type": ""}
  626. try:
  627. # 检查各种验证码
  628. captcha_selectors = [
  629. 'text="请输入验证码"',
  630. 'text="滑动验证"',
  631. '[class*="captcha"]',
  632. '[class*="verify"]',
  633. ]
  634. for selector in captcha_selectors:
  635. try:
  636. if await self.page.locator(selector).count() > 0:
  637. print(f"[{self.platform_name}] 检测到验证码: {selector}")
  638. return {"need_captcha": True, "captcha_type": "image"}
  639. except:
  640. pass
  641. # 检查登录弹窗
  642. login_selectors = [
  643. 'text="请登录"',
  644. 'text="扫码登录"',
  645. '[class*="login-dialog"]',
  646. ]
  647. for selector in login_selectors:
  648. try:
  649. if await self.page.locator(selector).count() > 0:
  650. print(f"[{self.platform_name}] 检测到需要登录: {selector}")
  651. return {"need_captcha": True, "captcha_type": "login"}
  652. except:
  653. pass
  654. except Exception as e:
  655. print(f"[{self.platform_name}] 验证码检测异常: {e}")
  656. return {"need_captcha": False, "captcha_type": ""}
  657. async def publish(self, cookies: str, params: PublishParams) -> PublishResult:
  658. """发布视频到视频号"""
  659. print(f"\n{'=' * 60}")
  660. print(f"[{self.platform_name}] 开始发布视频")
  661. print(f"[{self.platform_name}] 视频路径: {params.video_path}")
  662. print(f"[{self.platform_name}] 标题: {params.title}")
  663. print(f"[{self.platform_name}] Headless: {self.headless}")
  664. print(f"{'=' * 60}")
  665. self.report_progress(5, "正在初始化浏览器...")
  666. # 初始化浏览器(使用 Chrome)
  667. await self.init_browser()
  668. print(f"[{self.platform_name}] 浏览器初始化完成")
  669. # 解析并设置 cookies
  670. cookie_list = self.parse_cookies(cookies)
  671. print(cookie_list)
  672. print(f"[{self.platform_name}] 解析到 {len(cookie_list)} 个 cookies")
  673. await self.set_cookies(cookie_list)
  674. if not self.page:
  675. raise Exception("Page not initialized")
  676. # 检查视频文件
  677. if not os.path.exists(params.video_path):
  678. raise Exception(f"视频文件不存在: {params.video_path}")
  679. print(
  680. f"[{self.platform_name}] 视频文件存在,大小: {os.path.getsize(params.video_path)} bytes"
  681. )
  682. self.report_progress(10, "正在打开上传页面...")
  683. # 代理模式下拉长超时,避免大文件上传经代理时超时
  684. using_proxy = isinstance(
  685. getattr(self, "proxy_config", None), dict
  686. ) and self.proxy_config.get("server")
  687. if using_proxy:
  688. self.page.set_default_timeout(300000) # 5 分钟
  689. print(f"[{self.platform_name}] 代理模式:已设置 5 分钟操作超时", flush=True)
  690. # 访问上传页面 - 使用 domcontentloaded 替代 networkidle,避免代理慢速导致超时
  691. await self.page.goto(
  692. self.publish_url, wait_until="domcontentloaded", timeout=90000
  693. )
  694. # 等待页面关键元素加载
  695. try:
  696. await self.page.wait_for_load_state("load", timeout=30000)
  697. except Exception:
  698. pass
  699. await asyncio.sleep(3)
  700. # 代理模式下多等几秒,让代理连接稳定后再上传
  701. if using_proxy:
  702. print(
  703. f"[{self.platform_name}] 代理模式:等待 8 秒后开始上传...", flush=True
  704. )
  705. await asyncio.sleep(8)
  706. # 检查是否跳转到登录页
  707. current_url = self.page.url
  708. print(f"[{self.platform_name}] 当前页面: {current_url}")
  709. if "login" in current_url:
  710. screenshot_base64 = await self.capture_screenshot()
  711. return PublishResult(
  712. success=False,
  713. platform=self.platform_name,
  714. error="Cookie 已过期,需要重新登录",
  715. need_captcha=True,
  716. captcha_type="login",
  717. screenshot_base64=screenshot_base64,
  718. page_url=current_url,
  719. status="need_captcha",
  720. )
  721. # 使用 AI 检查验证码
  722. ai_captcha = await self.ai_check_captcha()
  723. if ai_captcha["has_captcha"]:
  724. print(
  725. f"[{self.platform_name}] AI检测到验证码: {ai_captcha['captcha_type']}",
  726. flush=True,
  727. )
  728. screenshot_base64 = await self.capture_screenshot()
  729. return PublishResult(
  730. success=False,
  731. platform=self.platform_name,
  732. error=f"检测到{ai_captcha['captcha_type']}验证码,需要使用有头浏览器完成验证",
  733. need_captcha=True,
  734. captcha_type=ai_captcha["captcha_type"],
  735. screenshot_base64=screenshot_base64,
  736. page_url=current_url,
  737. status="need_captcha",
  738. )
  739. # 传统方式检查验证码
  740. captcha_result = await self.check_captcha()
  741. if captcha_result["need_captcha"]:
  742. screenshot_base64 = await self.capture_screenshot()
  743. return PublishResult(
  744. success=False,
  745. platform=self.platform_name,
  746. error=f"需要{captcha_result['captcha_type']}验证码,请使用有头浏览器完成验证",
  747. need_captcha=True,
  748. captcha_type=captcha_result["captcha_type"],
  749. screenshot_base64=screenshot_base64,
  750. page_url=current_url,
  751. status="need_captcha",
  752. )
  753. self.report_progress(15, "正在选择视频文件...")
  754. # 上传视频
  755. # 说明:视频号发布页在不同账号/地区/灰度下 DOM 结构差异较大,且上传组件可能在 iframe 中。
  756. # 因此这里按 matrix 的思路“点击触发 file chooser”,同时增加“遍历全部 frame + 精确挑选 video input”的兜底。
  757. upload_success = False
  758. if not self.page:
  759. raise Exception("Page not initialized")
  760. # 等待页面把上传区域渲染出来(避免过早判断)
  761. try:
  762. await self.page.wait_for_selector(
  763. "div.upload-content, input[type='file'], iframe", timeout=20000
  764. )
  765. except Exception:
  766. pass
  767. async def _try_set_files_in_frame(frame, frame_name: str) -> bool:
  768. """在指定 frame 中尝试触发上传"""
  769. nonlocal upload_success
  770. if upload_success:
  771. return True
  772. # 方法0:如果用户通过环境变量显式配置了选择器,优先尝试这个
  773. if WEIXIN_UPLOAD_SELECTOR:
  774. try:
  775. el = frame.locator(WEIXIN_UPLOAD_SELECTOR).first
  776. if await el.count() > 0 and await el.is_visible():
  777. print(
  778. f"[{self.platform_name}] [{frame_name}] 使用环境变量 WEIXIN_UPLOAD_SELECTOR: {WEIXIN_UPLOAD_SELECTOR}"
  779. )
  780. try:
  781. async with self.page.expect_file_chooser(
  782. timeout=5000
  783. ) as fc_info:
  784. await el.click()
  785. chooser = await fc_info.value
  786. await chooser.set_files(params.video_path)
  787. upload_success = True
  788. print(
  789. f"[{self.platform_name}] [{frame_name}] 通过环境变量选择器上传成功"
  790. )
  791. return True
  792. except Exception as e:
  793. print(
  794. f"[{self.platform_name}] [{frame_name}] 环境变量选择器点击失败,尝试直接 set_input_files: {e}"
  795. )
  796. try:
  797. await el.set_input_files(params.video_path)
  798. upload_success = True
  799. print(
  800. f"[{self.platform_name}] [{frame_name}] 环境变量选择器 set_input_files 成功"
  801. )
  802. return True
  803. except Exception as e2:
  804. print(
  805. f"[{self.platform_name}] [{frame_name}] 环境变量选择器 set_input_files 仍失败: {e2}"
  806. )
  807. except Exception as e:
  808. print(
  809. f"[{self.platform_name}] [{frame_name}] 使用环境变量选择器定位元素失败: {e}"
  810. )
  811. # 先尝试点击上传区域触发 chooser(最贴近 matrix)
  812. click_selectors = [
  813. "div.upload-content",
  814. "div[class*='upload-content']",
  815. "div[class*='upload']",
  816. "div.add-wrap",
  817. "[class*='uploader']",
  818. "text=点击上传",
  819. "text=上传视频",
  820. "text=选择视频",
  821. ]
  822. for selector in click_selectors:
  823. try:
  824. el = frame.locator(selector).first
  825. if await el.count() > 0 and await el.is_visible():
  826. print(
  827. f"[{self.platform_name}] [{frame_name}] 找到可点击上传区域: {selector}"
  828. )
  829. try:
  830. async with self.page.expect_file_chooser(
  831. timeout=5000
  832. ) as fc_info:
  833. await el.click()
  834. chooser = await fc_info.value
  835. await chooser.set_files(params.video_path)
  836. upload_success = True
  837. print(
  838. f"[{self.platform_name}] [{frame_name}] 通过 file chooser 上传成功"
  839. )
  840. return True
  841. except Exception as e:
  842. print(
  843. f"[{self.platform_name}] [{frame_name}] 点击触发 chooser 失败: {e}"
  844. )
  845. except Exception:
  846. pass
  847. # 再尝试直接设置 input[type=file](iframe/隐藏 input 常见)
  848. try:
  849. inputs = frame.locator("input[type='file']")
  850. cnt = await inputs.count()
  851. if cnt > 0:
  852. best_idx = 0
  853. best_score = -1
  854. for i in range(cnt):
  855. try:
  856. inp = inputs.nth(i)
  857. accept = (await inp.get_attribute("accept")) or ""
  858. multiple = (await inp.get_attribute("multiple")) or ""
  859. score = 0
  860. if "video" in accept:
  861. score += 10
  862. if "mp4" in accept:
  863. score += 3
  864. if multiple:
  865. score += 1
  866. if score > best_score:
  867. best_score = score
  868. best_idx = i
  869. except Exception:
  870. continue
  871. target = inputs.nth(best_idx)
  872. print(
  873. f"[{self.platform_name}] [{frame_name}] 尝试对 input[{best_idx}] set_input_files (score={best_score})"
  874. )
  875. await target.set_input_files(params.video_path)
  876. upload_success = True
  877. print(
  878. f"[{self.platform_name}] [{frame_name}] 通过 file input 上传成功"
  879. )
  880. return True
  881. except Exception as e:
  882. print(f"[{self.platform_name}] [{frame_name}] file input 上传失败: {e}")
  883. # 不直接返回,让后面的 AI 兜底有机会执行
  884. # 方法4: 兜底使用 AI 分析 HTML,猜测上传入口
  885. try:
  886. frame_url = getattr(frame, "url", "")
  887. html_full = await frame.content()
  888. html_for_ai = await self._extract_relevant_html_snippets(html_full)
  889. print(
  890. f"[{self.platform_name}] [{frame_name}] frame_url={frame_url}, html_len={len(html_full)}, html_for_ai_len={len(html_for_ai)}"
  891. )
  892. ai_selector = await self.ai_find_upload_selector(
  893. html_for_ai, frame_name=frame_name
  894. )
  895. if ai_selector:
  896. try:
  897. el = frame.locator(ai_selector).first
  898. if await el.count() > 0:
  899. print(
  900. f"[{self.platform_name}] [{frame_name}] 使用 AI 选择器点击上传入口: {ai_selector}"
  901. )
  902. try:
  903. async with self.page.expect_file_chooser(
  904. timeout=5000
  905. ) as fc_info:
  906. await el.click()
  907. chooser = await fc_info.value
  908. await chooser.set_files(params.video_path)
  909. upload_success = True
  910. print(
  911. f"[{self.platform_name}] [{frame_name}] 通过 AI 选择器上传成功"
  912. )
  913. return True
  914. except Exception as e:
  915. print(
  916. f"[{self.platform_name}] [{frame_name}] AI 选择器点击失败,改为直接 set_input_files: {e}"
  917. )
  918. try:
  919. await el.set_input_files(params.video_path)
  920. upload_success = True
  921. print(
  922. f"[{self.platform_name}] [{frame_name}] AI 选择器直接 set_input_files 成功"
  923. )
  924. return True
  925. except Exception as e2:
  926. print(
  927. f"[{self.platform_name}] [{frame_name}] AI 选择器 set_input_files 仍失败: {e2}"
  928. )
  929. except Exception as e:
  930. print(
  931. f"[{self.platform_name}] [{frame_name}] 使用 AI 选择器定位元素失败: {e}"
  932. )
  933. else:
  934. # 如果 AI 无法从 HTML 推断,退一步:构造候选元素列表交给 AI 选择
  935. try:
  936. candidates = await frame.evaluate("""
  937. () => {
  938. function cssEscape(s) {
  939. try { return CSS.escape(s); } catch (e) { return s.replace(/[^a-zA-Z0-9_-]/g, '\\\\$&'); }
  940. }
  941. function buildSelector(el) {
  942. if (!el || el.nodeType !== 1) return '';
  943. if (el.id) return `#${cssEscape(el.id)}`;
  944. let parts = [];
  945. let cur = el;
  946. for (let depth = 0; cur && cur.nodeType === 1 && depth < 5; depth++) {
  947. let part = cur.tagName.toLowerCase();
  948. const role = cur.getAttribute('role');
  949. const type = cur.getAttribute('type');
  950. if (type) part += `[type="${type}"]`;
  951. if (role) part += `[role="${role}"]`;
  952. const cls = (cur.className || '').toString().trim().split(/\\s+/).filter(Boolean);
  953. if (cls.length) part += '.' + cls.slice(0, 2).map(cssEscape).join('.');
  954. // nth-of-type
  955. let idx = 1;
  956. let sib = cur;
  957. while (sib && (sib = sib.previousElementSibling)) {
  958. if (sib.tagName === cur.tagName) idx++;
  959. }
  960. part += `:nth-of-type(${idx})`;
  961. parts.unshift(part);
  962. cur = cur.parentElement;
  963. }
  964. return parts.join(' > ');
  965. }
  966. const nodes = Array.from(document.querySelectorAll('input, button, a, div, span'))
  967. .filter(el => {
  968. const tag = el.tagName.toLowerCase();
  969. const type = (el.getAttribute('type') || '').toLowerCase();
  970. const role = (el.getAttribute('role') || '').toLowerCase();
  971. const aria = (el.getAttribute('aria-label') || '').toLowerCase();
  972. const txt = (el.innerText || '').trim().slice(0, 60);
  973. const cls = (el.className || '').toString().toLowerCase();
  974. const isFile = tag === 'input' && type === 'file';
  975. const looksClickable =
  976. tag === 'button' || tag === 'a' || role === 'button' || el.onclick ||
  977. cls.includes('upload') || cls.includes('uploader') || cls.includes('drag') ||
  978. aria.includes('上传') || aria.includes('选择') || aria.includes('添加') ||
  979. txt.includes('上传') || txt.includes('选择') || txt.includes('添加') || txt.includes('点击上传');
  980. if (!isFile && !looksClickable) return false;
  981. const r = el.getBoundingClientRect();
  982. const visible = r.width > 5 && r.height > 5;
  983. return visible;
  984. });
  985. const limited = nodes.slice(0, 120).map(el => ({
  986. css: buildSelector(el),
  987. tag: el.tagName.toLowerCase(),
  988. type: el.getAttribute('type') || '',
  989. role: el.getAttribute('role') || '',
  990. ariaLabel: el.getAttribute('aria-label') || '',
  991. text: (el.innerText || '').trim().slice(0, 80),
  992. id: el.id || '',
  993. className: (el.className || '').toString().slice(0, 120),
  994. accept: el.getAttribute('accept') || '',
  995. }));
  996. return limited;
  997. }
  998. """)
  999. ai_selector2 = await self.ai_pick_selector_from_candidates(
  1000. candidates=candidates,
  1001. goal="上传视频入口",
  1002. frame_name=frame_name,
  1003. )
  1004. if ai_selector2:
  1005. el2 = frame.locator(ai_selector2).first
  1006. if await el2.count() > 0:
  1007. print(
  1008. f"[{self.platform_name}] [{frame_name}] 使用 AI 候选选择器点击上传入口: {ai_selector2}"
  1009. )
  1010. try:
  1011. async with self.page.expect_file_chooser(
  1012. timeout=5000
  1013. ) as fc_info:
  1014. await el2.click()
  1015. chooser2 = await fc_info.value
  1016. await chooser2.set_files(params.video_path)
  1017. upload_success = True
  1018. print(
  1019. f"[{self.platform_name}] [{frame_name}] 通过 AI 候选选择器上传成功"
  1020. )
  1021. return True
  1022. except Exception as e:
  1023. print(
  1024. f"[{self.platform_name}] [{frame_name}] AI 候选选择器点击失败,尝试 set_input_files: {e}"
  1025. )
  1026. try:
  1027. await el2.set_input_files(params.video_path)
  1028. upload_success = True
  1029. print(
  1030. f"[{self.platform_name}] [{frame_name}] AI 候选选择器 set_input_files 成功"
  1031. )
  1032. return True
  1033. except Exception as e2:
  1034. print(
  1035. f"[{self.platform_name}] [{frame_name}] AI 候选选择器 set_input_files 仍失败: {e2}"
  1036. )
  1037. except Exception as e:
  1038. print(
  1039. f"[{self.platform_name}] [{frame_name}] 构造候选并交给 AI 失败: {e}"
  1040. )
  1041. except Exception as e:
  1042. print(
  1043. f"[{self.platform_name}] [{frame_name}] AI 上传入口识别整体失败: {e}"
  1044. )
  1045. return False
  1046. # 先尝试主 frame
  1047. try:
  1048. await _try_set_files_in_frame(self.page.main_frame, "main")
  1049. except Exception as e:
  1050. print(f"[{self.platform_name}] main frame 上传尝试异常: {e}")
  1051. # 再遍历所有子 frame
  1052. if not upload_success:
  1053. try:
  1054. frames = self.page.frames
  1055. print(f"[{self.platform_name}] 发现 frames: {len(frames)}")
  1056. for idx, fr in enumerate(frames):
  1057. if upload_success:
  1058. break
  1059. # main_frame 已尝试过
  1060. if fr == self.page.main_frame:
  1061. continue
  1062. name = fr.name or f"frame-{idx}"
  1063. await _try_set_files_in_frame(fr, name)
  1064. except Exception as e:
  1065. print(f"[{self.platform_name}] 遍历 frames 异常: {e}")
  1066. if not upload_success:
  1067. screenshot_base64 = await self.capture_screenshot()
  1068. return PublishResult(
  1069. success=False,
  1070. platform=self.platform_name,
  1071. error="未找到上传入口(可能在 iframe 中或页面结构已变更)",
  1072. screenshot_base64=screenshot_base64,
  1073. page_url=await self.get_page_url(),
  1074. status="failed",
  1075. )
  1076. self.report_progress(20, "正在填充标题和话题...")
  1077. # 添加标题和话题
  1078. await self.add_title_tags(params)
  1079. self.report_progress(30, "等待视频上传完成...")
  1080. # 代理模式下增加重试次数和总时长,应对「网络出错」等不稳定情况
  1081. using_proxy = isinstance(
  1082. getattr(self, "proxy_config", None), dict
  1083. ) and self.proxy_config.get("server")
  1084. max_upload_error_retries = 20 if using_proxy else 5
  1085. loop_count = 300 if using_proxy else 200 # 代理模式约 15 分钟
  1086. if using_proxy:
  1087. print(
  1088. f"[{self.platform_name}] 代理模式:上传重试上限 {max_upload_error_retries} 次,总等待约 15 分钟",
  1089. flush=True,
  1090. )
  1091. upload_completed = False
  1092. upload_error_retry_count = 0
  1093. for i in range(loop_count):
  1094. try:
  1095. # 每 30 秒打印一次进度,避免“卡住”的错觉
  1096. if i > 0 and i % 10 == 0:
  1097. print(
  1098. f"[{self.platform_name}] 仍在等待上传完成... ({i * 3}s)",
  1099. flush=True,
  1100. )
  1101. # 尝试多种选择器定位“发表”按钮(页面结构可能变化)
  1102. publish_btn = None
  1103. for sel in [
  1104. 'div.form-btns button:has-text("发表")',
  1105. 'button:has-text("发表")',
  1106. 'button:has-text("立即发表")',
  1107. '[role="button"]:has-text("发表")',
  1108. ]:
  1109. try:
  1110. el = self.page.locator(sel).first
  1111. if await el.count() > 0 and await el.is_visible():
  1112. publish_btn = el
  1113. break
  1114. except Exception:
  1115. continue
  1116. if publish_btn:
  1117. btn_class = await publish_btn.get_attribute("class") or ""
  1118. if (
  1119. "weui-desktop-btn_disabled" not in btn_class
  1120. and "disabled" not in btn_class.lower()
  1121. ):
  1122. print(f"[{self.platform_name}] 视频上传完毕")
  1123. # 上传封面
  1124. self.report_progress(50, "正在上传封面...")
  1125. await self.upload_cover(params.cover_path)
  1126. upload_completed = True
  1127. break
  1128. # 检查上传错误(div.status-msg.error,含「网络出错了,请稍候上传」)
  1129. has_error = await self.page.locator("div.status-msg.error").count() > 0
  1130. has_delete_btn = (
  1131. await self.page.locator(
  1132. 'div.media-status-content div.tag-inner:has-text("删除")'
  1133. ).count()
  1134. > 0
  1135. )
  1136. if has_error and has_delete_btn:
  1137. upload_error_retry_count += 1
  1138. print(
  1139. f"[{self.platform_name}] 检测到上传错误,第 {upload_error_retry_count} 次重试",
  1140. flush=True,
  1141. )
  1142. if upload_error_retry_count >= max_upload_error_retries:
  1143. print(
  1144. f"[{self.platform_name}] 上传错误重试已达 {max_upload_error_retries} 次,放弃",
  1145. flush=True,
  1146. )
  1147. break
  1148. # 代理模式下,第 6 次失败时尝试整页刷新以重建代理连接
  1149. if using_proxy and upload_error_retry_count == 6:
  1150. print(
  1151. f"[{self.platform_name}] 代理模式:尝试整页刷新以重建连接...",
  1152. flush=True,
  1153. )
  1154. try:
  1155. await self.page.reload(
  1156. wait_until="domcontentloaded", timeout=60000
  1157. )
  1158. await asyncio.sleep(8)
  1159. await self.page.wait_for_selector(
  1160. "div.upload-content, input[type='file']", timeout=20000
  1161. )
  1162. upload_el = self.page.locator("div.upload-content").first
  1163. if (
  1164. await upload_el.count() > 0
  1165. and await upload_el.is_visible()
  1166. ):
  1167. async with self.page.expect_file_chooser(
  1168. timeout=10000
  1169. ) as fc:
  1170. await upload_el.click()
  1171. chooser = await fc.value
  1172. await chooser.set_files(params.video_path)
  1173. print(
  1174. f"[{self.platform_name}] 刷新后重新上传成功",
  1175. flush=True,
  1176. )
  1177. else:
  1178. file_input = self.page.locator(
  1179. 'input[type="file"]'
  1180. ).first
  1181. if await file_input.count() > 0:
  1182. await file_input.set_input_files(params.video_path)
  1183. await asyncio.sleep(2)
  1184. await self.add_title_tags(params)
  1185. upload_error_retry_count = 0
  1186. except Exception as e:
  1187. print(
  1188. f"[{self.platform_name}] 整页刷新重传失败: {e}",
  1189. flush=True,
  1190. )
  1191. await self.handle_upload_error(params.video_path)
  1192. else:
  1193. await self.handle_upload_error(params.video_path)
  1194. else:
  1195. upload_error_retry_count = 0 # 无错误时重置计数
  1196. await asyncio.sleep(3)
  1197. except Exception as e:
  1198. print(f"[{self.platform_name}] 等待上传时异常: {e}", flush=True)
  1199. await asyncio.sleep(3)
  1200. # 如果一直没有等到“发表”按钮可用,认为上传失败,直接返回失败结果并附带截图
  1201. if not upload_completed:
  1202. try:
  1203. screenshot_base64 = await self.capture_screenshot()
  1204. except Exception as e:
  1205. print(f"[{self.platform_name}] 截图失败: {e}", flush=True)
  1206. screenshot_base64 = ""
  1207. try:
  1208. ts = int(time.time() * 1000)
  1209. screenshot_dir = os.path.join(
  1210. os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
  1211. "screenshots",
  1212. )
  1213. os.makedirs(screenshot_dir, exist_ok=True)
  1214. err_path = os.path.join(
  1215. screenshot_dir, f"weixin_upload_timeout_{ts}.png"
  1216. )
  1217. await self.page.screenshot(path=err_path, full_page=True)
  1218. print(
  1219. f"[{self.platform_name}] 超时/失败截图已保存: {err_path}",
  1220. flush=True,
  1221. )
  1222. except Exception as e:
  1223. print(f"[{self.platform_name}] 保存失败截图到文件失败: {e}", flush=True)
  1224. page_url = await self.get_page_url()
  1225. return PublishResult(
  1226. success=False,
  1227. platform=self.platform_name,
  1228. error="视频上传失败,请查看截图",
  1229. screenshot_base64=screenshot_base64,
  1230. page_url=page_url,
  1231. status="failed",
  1232. )
  1233. self.report_progress(60, "处理视频设置...")
  1234. # 添加短标题
  1235. try:
  1236. short_title_el = (
  1237. self.page.get_by_text("短标题", exact=True)
  1238. .locator("..")
  1239. .locator("xpath=following-sibling::div")
  1240. .locator('span input[type="text"]')
  1241. )
  1242. if await short_title_el.count():
  1243. short_title = format_short_title(params.title)
  1244. await short_title_el.fill(short_title)
  1245. except:
  1246. pass
  1247. # 定时发布
  1248. if params.publish_date:
  1249. self.report_progress(70, "设置定时发布...")
  1250. await self.set_schedule_time(params.publish_date)
  1251. self.report_progress(80, "正在发布...")
  1252. # 点击发布 - 参考 matrix
  1253. for i in range(30):
  1254. try:
  1255. # 参考 matrix: div.form-btns button:has-text("发表")
  1256. publish_btn = self.page.locator('div.form-btns button:has-text("发表")')
  1257. if await publish_btn.count():
  1258. print(f"[{self.platform_name}] 点击发布按钮...")
  1259. await publish_btn.click()
  1260. # 等待跳转到作品列表页面 - 参考 matrix
  1261. await self.page.wait_for_url(
  1262. "https://channels.weixin.qq.com/platform/post/list", timeout=10000
  1263. )
  1264. self.report_progress(100, "发布成功")
  1265. print(f"[{self.platform_name}] 视频发布成功!")
  1266. screenshot_base64 = await self.capture_screenshot()
  1267. return PublishResult(
  1268. success=True,
  1269. platform=self.platform_name,
  1270. message="发布成功",
  1271. screenshot_base64=screenshot_base64,
  1272. page_url=self.page.url,
  1273. status="success",
  1274. )
  1275. except Exception as e:
  1276. current_url = self.page.url
  1277. if "https://channels.weixin.qq.com/platform/post/list" in current_url:
  1278. self.report_progress(100, "发布成功")
  1279. print(f"[{self.platform_name}] 视频发布成功!")
  1280. screenshot_base64 = await self.capture_screenshot()
  1281. return PublishResult(
  1282. success=True,
  1283. platform=self.platform_name,
  1284. message="发布成功",
  1285. screenshot_base64=screenshot_base64,
  1286. page_url=current_url,
  1287. status="success",
  1288. )
  1289. else:
  1290. print(
  1291. f"[{self.platform_name}] 视频正在发布中... {i + 1}/30, URL: {current_url}"
  1292. )
  1293. await asyncio.sleep(1)
  1294. # 发布超时
  1295. screenshot_base64 = await self.capture_screenshot()
  1296. page_url = await self.get_page_url()
  1297. return PublishResult(
  1298. success=False,
  1299. platform=self.platform_name,
  1300. error="发布超时,请检查发布状态",
  1301. screenshot_base64=screenshot_base64,
  1302. page_url=page_url,
  1303. status="need_action",
  1304. )
  1305. async def _get_works_fallback_dom(self, page_size: int) -> tuple:
  1306. """API 失败时从当前页面 DOM 抓取作品列表(兼容新账号/不同入口)"""
  1307. works: List[WorkItem] = []
  1308. total = 0
  1309. has_more = False
  1310. try:
  1311. for selector in [
  1312. "div.post-feed-item",
  1313. "[class*='post-feed']",
  1314. "[class*='feed-item']",
  1315. "div[class*='post']",
  1316. ]:
  1317. try:
  1318. await self.page.wait_for_selector(selector, timeout=8000)
  1319. break
  1320. except Exception:
  1321. continue
  1322. post_items = self.page.locator("div.post-feed-item")
  1323. item_count = await post_items.count()
  1324. if item_count == 0:
  1325. post_items = self.page.locator("[class*='post-feed']")
  1326. item_count = await post_items.count()
  1327. for i in range(min(item_count, page_size)):
  1328. try:
  1329. item = post_items.nth(i)
  1330. cover_el = item.locator("div.media img.thumb").first
  1331. cover_url = (
  1332. await cover_el.get_attribute("src") or ""
  1333. if await cover_el.count() > 0
  1334. else ""
  1335. )
  1336. if not cover_url:
  1337. cover_el = item.locator("img").first
  1338. cover_url = (
  1339. await cover_el.get_attribute("src") or ""
  1340. if await cover_el.count() > 0
  1341. else ""
  1342. )
  1343. title_el = item.locator("div.post-title").first
  1344. title = (
  1345. (await title_el.text_content() or "").strip()
  1346. if await title_el.count() > 0
  1347. else ""
  1348. )
  1349. time_el = item.locator("div.post-time span").first
  1350. publish_time = (
  1351. (await time_el.text_content() or "").strip()
  1352. if await time_el.count() > 0
  1353. else ""
  1354. )
  1355. play_count = like_count = comment_count = share_count = (
  1356. collect_count
  1357. ) = 0
  1358. data_items = item.locator("div.post-data div.data-item")
  1359. for j in range(await data_items.count()):
  1360. data_item = data_items.nth(j)
  1361. count_text = (
  1362. await data_item.locator("span.count").text_content() or "0"
  1363. ).strip()
  1364. if (
  1365. await data_item.locator(
  1366. "span.weui-icon-outlined-eyes-on"
  1367. ).count()
  1368. > 0
  1369. ):
  1370. play_count = self._parse_count(count_text)
  1371. elif (
  1372. await data_item.locator(
  1373. "span.weui-icon-outlined-like"
  1374. ).count()
  1375. > 0
  1376. ):
  1377. like_count = self._parse_count(count_text)
  1378. elif (
  1379. await data_item.locator(
  1380. "span.weui-icon-outlined-comment"
  1381. ).count()
  1382. > 0
  1383. ):
  1384. comment_count = self._parse_count(count_text)
  1385. elif (
  1386. await data_item.locator(
  1387. "use[xlink\\:href='#icon-share']"
  1388. ).count()
  1389. > 0
  1390. ):
  1391. share_count = self._parse_count(count_text)
  1392. elif (
  1393. await data_item.locator(
  1394. "use[xlink\\:href='#icon-thumb']"
  1395. ).count()
  1396. > 0
  1397. ):
  1398. collect_count = self._parse_count(count_text)
  1399. work_id = f"weixin_{i}_{hash(title)}_{hash(publish_time)}"
  1400. works.append(
  1401. WorkItem(
  1402. work_id=work_id,
  1403. title=title or "无标题",
  1404. cover_url=cover_url,
  1405. duration=0,
  1406. status="published",
  1407. publish_time=publish_time,
  1408. play_count=play_count,
  1409. like_count=like_count,
  1410. comment_count=comment_count,
  1411. share_count=share_count,
  1412. collect_count=collect_count,
  1413. )
  1414. )
  1415. except Exception as e:
  1416. print(
  1417. f"[{self.platform_name}] DOM 解析作品 {i} 失败: {e}", flush=True
  1418. )
  1419. continue
  1420. total = len(works)
  1421. has_more = item_count > page_size
  1422. print(f"[{self.platform_name}] DOM 回退获取 {len(works)} 条", flush=True)
  1423. except Exception as e:
  1424. print(f"[{self.platform_name}] DOM 回退失败: {e}", flush=True)
  1425. return (works, total, has_more, "")
  1426. async def get_works(
  1427. self, cookies: str, page: int = 0, page_size: int = 20
  1428. ) -> WorksResult:
  1429. """获取视频号作品列表(调用 post_list 接口)
  1430. page: 页码从 0 开始,或上一页返回的 rawKeyBuff/lastBuff 字符串
  1431. """
  1432. # 分页:首页 currentPage=1/rawKeyBuff=null,下一页用 currentPage 递增或 rawKeyBuff
  1433. if page is None or page == "" or (isinstance(page, int) and page == 0):
  1434. current_page = 1
  1435. raw_key_buff = None
  1436. elif isinstance(page, int):
  1437. current_page = page + 1
  1438. raw_key_buff = None
  1439. else:
  1440. current_page = 1
  1441. raw_key_buff = str(page)
  1442. ts_ms = str(int(time.time() * 1000))
  1443. print(f"\n{'=' * 60}")
  1444. print(
  1445. f"[{self.platform_name}] 获取作品列表 currentPage={current_page}, pageSize={page_size}, rawKeyBuff={raw_key_buff[:40] if raw_key_buff else 'null'}..."
  1446. )
  1447. print(f"{'=' * 60}")
  1448. works: List[WorkItem] = []
  1449. total = 0
  1450. has_more = False
  1451. next_page = ""
  1452. try:
  1453. await self.init_browser()
  1454. cookie_list = self.parse_cookies(cookies)
  1455. await self.set_cookies(cookie_list)
  1456. if not self.page:
  1457. raise Exception("Page not initialized")
  1458. await self.page.goto(
  1459. "https://channels.weixin.qq.com/platform/post/list", timeout=30000
  1460. )
  1461. await asyncio.sleep(3)
  1462. current_url = self.page.url
  1463. if "login" in current_url:
  1464. raise Exception("Cookie 已过期,请重新登录")
  1465. api_url = "https://channels.weixin.qq.com/micro/content/cgi-bin/mmfinderassistant-bin/post/post_list"
  1466. req_body = {
  1467. "pageSize": page_size,
  1468. "currentPage": current_page,
  1469. "userpageType": 11,
  1470. "stickyOrder": True,
  1471. "timestamp": ts_ms,
  1472. "_log_finder_uin": "",
  1473. "_log_finder_id": "",
  1474. "rawKeyBuff": raw_key_buff,
  1475. "pluginSessionId": None,
  1476. "scene": 7,
  1477. "reqScene": 7,
  1478. }
  1479. body_str = json.dumps(req_body)
  1480. response = await self.page.evaluate(
  1481. """
  1482. async ([url, bodyStr]) => {
  1483. try {
  1484. const resp = await fetch(url, {
  1485. method: 'POST',
  1486. credentials: 'include',
  1487. headers: {
  1488. 'Content-Type': 'application/json',
  1489. 'Accept': '*/*',
  1490. 'Referer': 'https://channels.weixin.qq.com/platform/post/list'
  1491. },
  1492. body: bodyStr
  1493. });
  1494. return await resp.json();
  1495. } catch (e) {
  1496. return { error: e.toString() };
  1497. }
  1498. }
  1499. """,
  1500. [api_url, body_str],
  1501. )
  1502. is_first_page = current_page == 1 and raw_key_buff is None
  1503. if response.get("error"):
  1504. print(
  1505. f"[{self.platform_name}] API 请求失败: {response.get('error')}",
  1506. flush=True,
  1507. )
  1508. if is_first_page:
  1509. (
  1510. works,
  1511. total,
  1512. has_more,
  1513. next_page,
  1514. ) = await self._get_works_fallback_dom(page_size)
  1515. if works:
  1516. return WorksResult(
  1517. success=True,
  1518. platform=self.platform_name,
  1519. works=works,
  1520. total=total,
  1521. has_more=has_more,
  1522. next_page=next_page,
  1523. )
  1524. return WorksResult(
  1525. success=False,
  1526. platform=self.platform_name,
  1527. error=response.get("error", "API 请求失败"),
  1528. )
  1529. err_code = response.get("errCode", -1)
  1530. if err_code != 0:
  1531. err_msg = response.get("errMsg", "unknown")
  1532. print(
  1533. f"[{self.platform_name}] API errCode={err_code}, errMsg={err_msg}, 完整响应(前800字): {json.dumps(response, ensure_ascii=False)[:800]}",
  1534. flush=True,
  1535. )
  1536. if is_first_page:
  1537. (
  1538. works,
  1539. total,
  1540. has_more,
  1541. next_page,
  1542. ) = await self._get_works_fallback_dom(page_size)
  1543. if works:
  1544. return WorksResult(
  1545. success=True,
  1546. platform=self.platform_name,
  1547. works=works,
  1548. total=total,
  1549. has_more=has_more,
  1550. next_page=next_page,
  1551. )
  1552. return WorksResult(
  1553. success=False,
  1554. platform=self.platform_name,
  1555. error=f"errCode={err_code}, errMsg={err_msg}",
  1556. )
  1557. data = response.get("data") or {}
  1558. raw_list = data.get("list") or []
  1559. total = int(data.get("totalCount") or 0)
  1560. has_more = bool(data.get("continueFlag", False))
  1561. next_page = (data.get("lastBuff") or "").strip()
  1562. print(
  1563. f"[{self.platform_name}] API 响应: list_len={len(raw_list)}, totalCount={total}, continueFlag={has_more}, lastBuff={next_page[:50] if next_page else ''}..."
  1564. )
  1565. if is_first_page and len(raw_list) == 0:
  1566. works_fb, total_fb, has_more_fb, _ = await self._get_works_fallback_dom(
  1567. page_size
  1568. )
  1569. if works_fb:
  1570. return WorksResult(
  1571. success=True,
  1572. platform=self.platform_name,
  1573. works=works_fb,
  1574. total=total_fb,
  1575. has_more=has_more_fb,
  1576. next_page="",
  1577. )
  1578. for item in raw_list:
  1579. try:
  1580. # 存 works.platform_video_id 统一用 post_list 接口回参中的 exportId(如 export/xxx)
  1581. work_id = str(
  1582. item.get("exportId")
  1583. or item.get("objectId")
  1584. or item.get("id")
  1585. or ""
  1586. ).strip()
  1587. if not work_id:
  1588. work_id = f"weixin_{hash(item.get('createTime', 0))}_{hash(item.get('desc', {}).get('description', ''))}"
  1589. desc = item.get("desc") or {}
  1590. title = (desc.get("description") or "").strip() or "无标题"
  1591. cover_url = ""
  1592. duration = 0
  1593. media_list = desc.get("media") or []
  1594. if media_list and isinstance(media_list[0], dict):
  1595. m = media_list[0]
  1596. cover_url = (
  1597. m.get("coverUrl") or m.get("thumbUrl") or ""
  1598. ).strip()
  1599. duration = int(m.get("videoPlayLen") or 0)
  1600. create_ts = item.get("createTime") or 0
  1601. if isinstance(create_ts, (int, float)) and create_ts:
  1602. publish_time = datetime.fromtimestamp(create_ts).strftime(
  1603. "%Y-%m-%d %H:%M:%S"
  1604. )
  1605. else:
  1606. publish_time = str(create_ts) if create_ts else ""
  1607. # likeCount=推荐, favCount=点赞
  1608. read_count = int(item.get("readCount") or 0)
  1609. like_count = int(item.get("favCount") or 0)
  1610. comment_count = int(item.get("commentCount") or 0)
  1611. forward_count = int(item.get("forwardCount") or 0)
  1612. works.append(
  1613. WorkItem(
  1614. work_id=work_id,
  1615. title=title,
  1616. cover_url=cover_url,
  1617. duration=duration,
  1618. status="published",
  1619. publish_time=publish_time,
  1620. play_count=read_count,
  1621. like_count=like_count,
  1622. comment_count=comment_count,
  1623. share_count=forward_count,
  1624. collect_count=0,
  1625. )
  1626. )
  1627. except Exception as e:
  1628. print(f"[{self.platform_name}] 解析作品项失败: {e}", flush=True)
  1629. continue
  1630. if total == 0 and works:
  1631. total = len(works)
  1632. print(
  1633. f"[{self.platform_name}] 本页获取 {len(works)} 条,totalCount={total}, next_page={bool(next_page)}"
  1634. )
  1635. except Exception as e:
  1636. import traceback
  1637. traceback.print_exc()
  1638. return WorksResult(success=False, platform=self.platform_name, error=str(e))
  1639. return WorksResult(
  1640. success=True,
  1641. platform=self.platform_name,
  1642. works=works,
  1643. total=total,
  1644. has_more=has_more,
  1645. next_page=next_page,
  1646. )
  1647. async def sync_work_daily_stats_via_browser(
  1648. self, cookies: str, work_id: int, platform_video_id: str
  1649. ) -> dict:
  1650. """
  1651. 通过浏览器自动化同步单个作品的每日数据到 work_day_statistics。
  1652. 流程:
  1653. 1. 打开 statistic/post 页,点击单篇视频 tab,点击近30天
  1654. 2. 监听 post_list 接口,根据 exportId 匹配 platform_video_id 得到 objectId
  1655. 3. 找到 data-row-key=objectId 的行,点击「查看」
  1656. 4. 进入详情页,点击数据详情的近30天,点击下载表格
  1657. 5. 解析 CSV 并返回 statistics 列表(供 Node 保存)
  1658. """
  1659. import csv
  1660. import tempfile
  1661. from pathlib import Path
  1662. result = {
  1663. "success": False,
  1664. "error": "",
  1665. "statistics": [],
  1666. "inserted": 0,
  1667. "updated": 0,
  1668. }
  1669. post_list_data = {"list": []}
  1670. async def handle_response(response):
  1671. try:
  1672. if (
  1673. "statistic/post_list" in response.url
  1674. and response.request.method == "POST"
  1675. ):
  1676. try:
  1677. body = await response.json()
  1678. if body.get("errCode") == 0 and body.get("data"):
  1679. post_list_data["list"] = body.get("data", {}).get(
  1680. "list", []
  1681. )
  1682. except Exception:
  1683. pass
  1684. except Exception:
  1685. pass
  1686. try:
  1687. await self.init_browser()
  1688. cookie_list = self.parse_cookies(cookies)
  1689. await self.set_cookies(cookie_list)
  1690. if not self.page:
  1691. raise Exception("Page not initialized")
  1692. self.page.on("response", handle_response)
  1693. # 1. 打开数据分析-作品数据页
  1694. print(f"[{self.platform_name}] 打开数据分析页...", flush=True)
  1695. await self.page.goto(
  1696. "https://channels.weixin.qq.com/platform/statistic/post", timeout=30000
  1697. )
  1698. if not self.headless:
  1699. print(
  1700. f"[{self.platform_name}] 浏览器已打开,请将窗口置于前台观看操作(等待 5 秒)...",
  1701. flush=True,
  1702. )
  1703. await asyncio.sleep(5)
  1704. else:
  1705. await asyncio.sleep(3)
  1706. if "login" in self.page.url:
  1707. raise Exception("Cookie 已过期,请重新登录")
  1708. # 2. 点击「单篇视频」tab
  1709. tab_sel = "div.weui-desktop-tab__navs ul li:nth-child(2) a"
  1710. try:
  1711. await self.page.wait_for_selector(tab_sel, timeout=8000)
  1712. await self.page.click(tab_sel)
  1713. except Exception:
  1714. tab_sel = "a:has-text('单篇视频')"
  1715. await self.page.click(tab_sel)
  1716. await asyncio.sleep(2)
  1717. # 3. 点击「近30天」(单篇视频页的日期范围筛选)
  1718. # 选择器优先级:精确匹配单篇视频区域内的日期范围 radio 组
  1719. radio_selectors = [
  1720. "div.post-single-wrap div.weui-desktop-radio-group.radio-group label:has-text('近30天')",
  1721. "div.post-single-wrap div.filter-wrap div.weui-desktop-radio-group label:nth-child(2)",
  1722. "div.post-single-wrap div.card-body div.filter-wrap div:nth-child(2) label:nth-child(2)",
  1723. "div.post-single-wrap label:has-text('近30天')",
  1724. "div.weui-desktop-radio-group label:has-text('近30天')",
  1725. "label:has-text('近30天')",
  1726. ]
  1727. clicked = False
  1728. for sel in radio_selectors:
  1729. try:
  1730. el = self.page.locator(sel).first
  1731. if await el.count() > 0:
  1732. await el.click()
  1733. clicked = True
  1734. print(
  1735. f"[{self.platform_name}] 已点击近30天按钮 (selector: {sel[:50]}...)",
  1736. flush=True,
  1737. )
  1738. break
  1739. except Exception as e:
  1740. continue
  1741. if not clicked:
  1742. print(
  1743. f"[{self.platform_name}] 警告: 未找到近30天按钮,继续尝试...",
  1744. flush=True,
  1745. )
  1746. await asyncio.sleep(3)
  1747. # 4. 从 post_list 响应中找 exportId -> objectId
  1748. export_id_to_object = {}
  1749. for item in post_list_data["list"]:
  1750. eid = (item.get("exportId") or "").strip()
  1751. oid = (item.get("objectId") or "").strip()
  1752. if eid and oid:
  1753. export_id_to_object[eid] = oid
  1754. object_id = export_id_to_object.get(
  1755. platform_video_id
  1756. ) or export_id_to_object.get(platform_video_id.strip())
  1757. if not object_id:
  1758. # 尝试宽松匹配(platform_video_id 可能带前缀)
  1759. for eid, oid in export_id_to_object.items():
  1760. if platform_video_id in eid or eid in platform_video_id:
  1761. object_id = oid
  1762. break
  1763. if not object_id:
  1764. result["error"] = (
  1765. f"未在 post_list 中匹配到 exportId={platform_video_id}"
  1766. )
  1767. print(f"[{self.platform_name}] {result['error']}", flush=True)
  1768. return result
  1769. # 5. 找到 data-row-key=objectId 的行,点击「查看」
  1770. view_btn = self.page.locator(
  1771. f'tr[data-row-key="{object_id}"] a.detail-wrap, tr[data-row-key="{object_id}"] a:has-text("查看")'
  1772. )
  1773. try:
  1774. await view_btn.first.wait_for(timeout=5000)
  1775. await view_btn.first.click()
  1776. except Exception as e:
  1777. view_btn = self.page.locator(f'tr[data-row-key="{object_id}"] a')
  1778. if await view_btn.count() > 0:
  1779. await view_btn.first.click()
  1780. else:
  1781. raise Exception(f"未找到 objectId={object_id} 的查看按钮: {e}")
  1782. await asyncio.sleep(3)
  1783. # 6. 详情页:点击数据详情的「近30天」,再点击「下载表格」
  1784. detail_radio = (
  1785. "div.post-statistic-common div.filter-wrap label:nth-child(2)"
  1786. )
  1787. for sel in [detail_radio, "div.main-body label:has-text('近30天')"]:
  1788. try:
  1789. el = self.page.locator(sel).first
  1790. if await el.count() > 0:
  1791. await el.click()
  1792. break
  1793. except Exception:
  1794. continue
  1795. await asyncio.sleep(2)
  1796. # 保存到 server/tmp 目录
  1797. download_dir = Path(__file__).resolve().parent.parent.parent / "tmp"
  1798. download_dir.mkdir(parents=True, exist_ok=True)
  1799. async with self.page.expect_download(timeout=15000) as download_info:
  1800. download_btn = self.page.locator(
  1801. "div.post-statistic-common div.filter-extra a, a:has-text('下载表格')"
  1802. )
  1803. if await download_btn.count() == 0:
  1804. raise Exception("未找到「下载表格」按钮")
  1805. await download_btn.first.click()
  1806. download = await download_info.value
  1807. save_path = download_dir / f"work_{work_id}_{int(time.time())}.csv"
  1808. await download.save_as(save_path)
  1809. # 7. 解析 CSV -> statistics
  1810. stats_list = []
  1811. with open(save_path, "r", encoding="utf-8-sig", errors="replace") as f:
  1812. reader = csv.DictReader(f)
  1813. rows = list(reader)
  1814. for row in rows:
  1815. date_val = (
  1816. row.get("日期")
  1817. or row.get("date")
  1818. or row.get("时间")
  1819. or row.get("时间周期", "")
  1820. ).strip()
  1821. if not date_val:
  1822. continue
  1823. dt = None
  1824. norm = (
  1825. date_val[:10]
  1826. .replace("年", "-")
  1827. .replace("月", "-")
  1828. .replace("日", "-")
  1829. .replace("/", "-")
  1830. )
  1831. if len(norm) >= 8 and norm.count("-") >= 2:
  1832. parts = norm.split("-")
  1833. if len(parts) == 3:
  1834. try:
  1835. y, m, d = int(parts[0]), int(parts[1]), int(parts[2])
  1836. if 2000 <= y <= 2100 and 1 <= m <= 12 and 1 <= d <= 31:
  1837. dt = datetime(y, m, d)
  1838. except (ValueError, IndexError):
  1839. pass
  1840. if not dt:
  1841. for fmt in ["%Y-%m-%d", "%Y/%m/%d", "%m/%d/%Y"]:
  1842. try:
  1843. dt = datetime.strptime(
  1844. (date_val.split()[0] if date_val else "")[:10], fmt
  1845. )
  1846. break
  1847. except (ValueError, IndexError):
  1848. dt = None
  1849. if not dt:
  1850. continue
  1851. rec_date = dt.strftime("%Y-%m-%d")
  1852. play = self._parse_count(
  1853. row.get("播放", "")
  1854. or row.get("播放量", "")
  1855. or row.get("play_count", "0")
  1856. )
  1857. like = self._parse_count(
  1858. row.get("点赞", "") or row.get("like_count", "0")
  1859. )
  1860. comment = self._parse_count(
  1861. row.get("评论", "") or row.get("comment_count", "0")
  1862. )
  1863. share = self._parse_count(
  1864. row.get("分享", "") or row.get("share_count", "0")
  1865. )
  1866. collect = self._parse_count(
  1867. row.get("收藏", "") or row.get("collect_count", "0")
  1868. )
  1869. comp_rate = (
  1870. row.get("完播率", "") or row.get("completion_rate", "0")
  1871. ).strip().rstrip("%") or "0"
  1872. avg_dur = (
  1873. row.get("平均播放时长", "") or row.get("avg_watch_duration", "0")
  1874. ).strip()
  1875. stats_list.append(
  1876. {
  1877. "work_id": work_id,
  1878. "record_date": rec_date,
  1879. "play_count": play,
  1880. "like_count": like,
  1881. "comment_count": comment,
  1882. "share_count": share,
  1883. "collect_count": collect,
  1884. "completion_rate": comp_rate,
  1885. "avg_watch_duration": avg_dur,
  1886. }
  1887. )
  1888. result["statistics"] = stats_list
  1889. result["success"] = True
  1890. try:
  1891. os.remove(save_path)
  1892. except Exception:
  1893. pass
  1894. except Exception as e:
  1895. import traceback
  1896. traceback.print_exc()
  1897. result["error"] = str(e)
  1898. finally:
  1899. try:
  1900. await self.close_browser()
  1901. except Exception:
  1902. pass
  1903. return result
  1904. async def sync_account_works_daily_stats_via_browser(
  1905. self,
  1906. cookies: str,
  1907. works: List[dict],
  1908. save_fn=None,
  1909. update_works_fn=None,
  1910. headless: bool = True,
  1911. ) -> dict:
  1912. """
  1913. 纯浏览器批量同步账号下所有作品(在库的)的每日数据到 work_day_statistics。
  1914. 流程:
  1915. 1. 打开 statistic/post → 点击单篇视频 → 点击近30天
  1916. 2. 【首次】监听 post_list 接口 → 解析响应更新 works 表 yesterday_* 字段
  1917. 3. 监听 post_list 获取 exportId->objectId 映射
  1918. 4. 遍历 post_list 的每一条:
  1919. - 若 exportId 在 works 的 platform_video_id 中无匹配 → 跳过
  1920. - 若匹配 → 找到 data-row-key=objectId 的行,点击「查看」
  1921. - 详情页:默认近7天,直接监听 feed_aggreagate_data_by_tab_type 接口
  1922. - 从「全部」tab 解析 browse/like/comment/forward/fav/follow,日期从昨天往前推
  1923. - 通过 save_fn 存入 work_day_statistics
  1924. - 返回列表页,继续下一条
  1925. works: [{"work_id": int, "platform_video_id": str}, ...]
  1926. save_fn: (stats_list: List[dict]) -> {inserted, updated},由调用方传入,用于调用 Node batch-dates
  1927. update_works_fn: (updates: List[dict]) -> {updated},由调用方传入,用于将 post_list 解析数据更新到 works 表(仅首次调用)
  1928. """
  1929. from pathlib import Path
  1930. from datetime import timedelta
  1931. result = {
  1932. "success": True,
  1933. "error": "",
  1934. "total_processed": 0,
  1935. "total_skipped": 0,
  1936. "inserted": 0,
  1937. "updated": 0,
  1938. "works_updated": 0,
  1939. }
  1940. # platform_video_id(exportId) -> work_id
  1941. export_id_to_work = {}
  1942. for w in works:
  1943. pvid = (
  1944. w.get("platform_video_id") or w.get("platformVideoId") or ""
  1945. ).strip()
  1946. wid = w.get("work_id") or w.get("workId")
  1947. if pvid and wid is not None:
  1948. export_id_to_work[pvid] = int(wid)
  1949. # 兼容可能带/不带前缀(如 export/xxx vs xxx)
  1950. if "/" in pvid:
  1951. export_id_to_work[pvid.split("/")[-1]] = int(wid)
  1952. post_list_data = {"list": []}
  1953. feed_aggreagate_data = {"body": None}
  1954. async def handle_response(response):
  1955. try:
  1956. url = response.url
  1957. if "statistic/post_list" in url:
  1958. try:
  1959. body = await response.json()
  1960. if body.get("errCode") == 0 and body.get("data"):
  1961. post_list_data["list"] = body.get("data", {}).get(
  1962. "list", []
  1963. )
  1964. except Exception:
  1965. pass
  1966. elif "feed_aggreagate_data_by_tab_type" in url:
  1967. try:
  1968. body = await response.json()
  1969. if body.get("errCode") == 0 and body.get("data"):
  1970. feed_aggreagate_data["body"] = body
  1971. except Exception:
  1972. pass
  1973. except Exception:
  1974. pass
  1975. try:
  1976. await self.init_browser()
  1977. cookie_list = self.parse_cookies(cookies)
  1978. await self.set_cookies(cookie_list)
  1979. if not self.page:
  1980. raise Exception("Page not initialized")
  1981. self.page.on("response", handle_response)
  1982. # 1. 打开数据分析-作品数据页
  1983. print(f"[{self.platform_name}] 打开数据分析页...", flush=True)
  1984. await self.page.goto(
  1985. "https://channels.weixin.qq.com/platform/statistic/post", timeout=30000
  1986. )
  1987. if not headless:
  1988. print(
  1989. f"[{self.platform_name}] 浏览器已打开,请将窗口置于前台观看操作(等待 5 秒)...",
  1990. flush=True,
  1991. )
  1992. await asyncio.sleep(5)
  1993. else:
  1994. await asyncio.sleep(3)
  1995. if "login" in self.page.url:
  1996. raise Exception("Cookie 已过期,请重新登录")
  1997. # 2. 点击「单篇视频」tab
  1998. tab_sel = "div.weui-desktop-tab__navs ul li:nth-child(2) a"
  1999. try:
  2000. await self.page.wait_for_selector(tab_sel, timeout=8000)
  2001. await self.page.click(tab_sel)
  2002. except Exception:
  2003. tab_sel = "a:has-text('单篇视频')"
  2004. await self.page.click(tab_sel)
  2005. await asyncio.sleep(2)
  2006. # 3. 点击「近30天」前清空 list,点击后等待 handler 捕获带 fullPlayRate 的 post_list
  2007. post_list_data["list"] = []
  2008. radio_selectors = [
  2009. "div.post-single-wrap div.weui-desktop-radio-group.radio-group label:has-text('近30天')",
  2010. "div.post-single-wrap div.filter-wrap div.weui-desktop-radio-group label:nth-child(2)",
  2011. "div.post-single-wrap label:has-text('近30天')",
  2012. "div.weui-desktop-radio-group label:has-text('近30天')",
  2013. "label:has-text('近30天')",
  2014. ]
  2015. clicked = False
  2016. for sel in radio_selectors:
  2017. try:
  2018. el = self.page.locator(sel).first
  2019. if await el.count() > 0:
  2020. await el.click()
  2021. clicked = True
  2022. print(
  2023. f"[{self.platform_name}] 已点击近30天 (selector: {sel[:40]}...)",
  2024. flush=True,
  2025. )
  2026. break
  2027. except Exception:
  2028. continue
  2029. if not clicked:
  2030. print(f"[{self.platform_name}] 警告: 未找到近30天按钮", flush=True)
  2031. await asyncio.sleep(5)
  2032. # 4. 从 post_list 获取列表
  2033. items = post_list_data["list"]
  2034. if not items:
  2035. result["error"] = "未监听到 post_list 或列表为空"
  2036. print(f"[{self.platform_name}] {result['error']}", flush=True)
  2037. return result
  2038. # 4.5 【仅首次】从 post_list 接口响应解析数据 → 更新 works 表(不再下载 CSV)
  2039. # post_list 返回字段映射: readCount->播放量, likeCount->点赞, commentCount->评论, forwardCount->分享,
  2040. # fullPlayRate->完播率(0-1小数), avgPlayTimeSec->平均播放时长(秒), exportId->匹配 work_id
  2041. if update_works_fn and items:
  2042. try:
  2043. updates = []
  2044. for it in items:
  2045. eid = (it.get("exportId") or "").strip()
  2046. if not eid:
  2047. continue
  2048. work_id = export_id_to_work.get(eid)
  2049. if work_id is None:
  2050. for k, v in export_id_to_work.items():
  2051. if eid in k or k in eid:
  2052. work_id = v
  2053. break
  2054. if work_id is None:
  2055. continue
  2056. # likeCount=推荐, favCount=点赞
  2057. read_count = int(it.get("readCount") or 0)
  2058. recommend_count = int(it.get("likeCount") or 0)
  2059. like_count = int(it.get("favCount") or 0)
  2060. comment_count = int(it.get("commentCount") or 0)
  2061. forward_count = int(it.get("forwardCount") or 0)
  2062. follow_count = int(it.get("followCount") or 0)
  2063. full_play_rate = it.get("fullPlayRate")
  2064. if full_play_rate is not None:
  2065. comp_rate = f"{float(full_play_rate) * 100:.2f}%"
  2066. else:
  2067. comp_rate = "0"
  2068. avg_sec = it.get("avgPlayTimeSec")
  2069. if avg_sec is not None:
  2070. avg_dur = f"{float(avg_sec):.2f}秒"
  2071. else:
  2072. avg_dur = "0"
  2073. updates.append(
  2074. {
  2075. "work_id": work_id,
  2076. "yesterday_play_count": read_count,
  2077. "yesterday_like_count": like_count,
  2078. "yesterday_recommend_count": recommend_count,
  2079. "yesterday_comment_count": comment_count,
  2080. "yesterday_share_count": forward_count,
  2081. "yesterday_follow_count": follow_count,
  2082. "yesterday_completion_rate": comp_rate,
  2083. "yesterday_avg_watch_duration": avg_dur,
  2084. }
  2085. )
  2086. if updates:
  2087. try:
  2088. save_result = update_works_fn(updates)
  2089. result["works_updated"] = save_result.get("updated", 0)
  2090. except Exception as api_err:
  2091. import traceback
  2092. traceback.print_exc()
  2093. except Exception as e:
  2094. import traceback
  2095. traceback.print_exc()
  2096. print(
  2097. f"[{self.platform_name}] 解析 post_list 更新 works 失败: {e}",
  2098. flush=True,
  2099. )
  2100. # 辅助:点击单篇视频 + 近30天,恢复列表视图(go_back 后会回到全部视频页)
  2101. async def ensure_single_video_near30():
  2102. tab_sel = "div.weui-desktop-tab__navs ul li:nth-child(2) a"
  2103. try:
  2104. await self.page.wait_for_selector(tab_sel, timeout=8000)
  2105. await self.page.click(tab_sel)
  2106. except Exception:
  2107. await self.page.click("a:has-text('单篇视频')")
  2108. await asyncio.sleep(2)
  2109. for sel in [
  2110. "div.post-single-wrap div.weui-desktop-radio-group.radio-group label:has-text('近30天')",
  2111. "div.post-single-wrap label:has-text('近30天')",
  2112. "div.weui-desktop-radio-group label:has-text('近30天')",
  2113. "label:has-text('近30天')",
  2114. ]:
  2115. try:
  2116. el = self.page.locator(sel).first
  2117. if await el.count() > 0:
  2118. await el.click()
  2119. break
  2120. except Exception:
  2121. continue
  2122. await asyncio.sleep(3)
  2123. # 5. 遍历每一条,按 exportId 匹配作品
  2124. processed_export_ids = set()
  2125. for idx, item in enumerate(items):
  2126. eid = (item.get("exportId") or "").strip()
  2127. oid = (item.get("objectId") or "").strip()
  2128. if not oid:
  2129. continue
  2130. # 已处理过的跳过(理论上循环顺序即处理顺序,此处做双重保险)
  2131. if eid in processed_export_ids:
  2132. print(
  2133. f"[{self.platform_name}] 跳过 [{idx + 1}] exportId={eid} (已处理)",
  2134. flush=True,
  2135. )
  2136. continue
  2137. # go_back 后回到全部视频页,需重新点击单篇视频+近30天
  2138. if idx > 0:
  2139. await ensure_single_video_near30()
  2140. # 匹配 work_id
  2141. work_id = export_id_to_work.get(eid)
  2142. if work_id is None:
  2143. for k, v in export_id_to_work.items():
  2144. if eid in k or k in eid:
  2145. work_id = v
  2146. break
  2147. if work_id is None:
  2148. result["total_skipped"] += 1
  2149. print(
  2150. f"[{self.platform_name}] 跳过 [{idx + 1}] exportId={eid} (库中无对应作品)",
  2151. flush=True,
  2152. )
  2153. continue
  2154. # 点击「查看」:Ant Design 表格 tr[data-row-key] > td > div.slot-wrap > a.detail-wrap
  2155. # 操作列可能在 ant-table-fixed-right 内,优先尝试
  2156. view_selectors = [
  2157. f'div.ant-table-fixed-right tr[data-row-key="{oid}"] a.detail-wrap',
  2158. f'tr[data-row-key="{oid}"] a.detail-wrap',
  2159. f'tr[data-row-key="{oid}"] td a.detail-wrap',
  2160. f'tr[data-row-key="{oid}"] a:has-text("查看")',
  2161. f'tr[data-row-key="{oid}"] a',
  2162. ]
  2163. clicked = False
  2164. for sel in view_selectors:
  2165. view_btn = self.page.locator(sel)
  2166. if await view_btn.count() > 0:
  2167. try:
  2168. await view_btn.first.wait_for(timeout=3000)
  2169. await view_btn.first.click()
  2170. clicked = True
  2171. print(
  2172. f"[{self.platform_name}] 已点击查看 (selector: {sel[:40]}...)",
  2173. flush=True,
  2174. )
  2175. break
  2176. except Exception as e:
  2177. continue
  2178. if not clicked:
  2179. print(
  2180. f"[{self.platform_name}] 未找到 objectId={oid} 的查看按钮",
  2181. flush=True,
  2182. )
  2183. result["total_skipped"] += 1
  2184. continue
  2185. await asyncio.sleep(3)
  2186. # 详情页:默认展示近7天,页面加载时自动请求 feed_aggreagate,不清空 body 避免覆盖已监听到的响应
  2187. await asyncio.sleep(4)
  2188. # 从 feed_aggreagate 响应解析「全部」数据
  2189. # 数据结构: data.dataByFanstype[].dataByTabtype[] 中 tabTypeName="全部" 或 tabType=999
  2190. # 日期:从昨天往前推 N 天(含昨天),数组从最早到最晚排列
  2191. body = feed_aggreagate_data.get("body")
  2192. if not body or not body.get("data"):
  2193. print(
  2194. f"[{self.platform_name}] work_id={work_id} 未监听到 feed_aggreagate 有效响应",
  2195. flush=True,
  2196. )
  2197. await self.page.go_back()
  2198. await asyncio.sleep(2)
  2199. continue
  2200. tab_all = None
  2201. for fan_item in body.get("data", {}).get("dataByFanstype", []):
  2202. for tab_item in fan_item.get("dataByTabtype", []):
  2203. if (
  2204. tab_item.get("tabTypeName") == "全部"
  2205. or tab_item.get("tabType") == 999
  2206. ):
  2207. tab_all = tab_item.get("data")
  2208. break
  2209. if tab_all is not None:
  2210. break
  2211. if not tab_all:
  2212. tab_all = (
  2213. body.get("data", {}).get("feedData", [{}])[0].get("totalData")
  2214. )
  2215. if not tab_all:
  2216. print(
  2217. f"[{self.platform_name}] work_id={work_id} 未找到「全部」数据",
  2218. flush=True,
  2219. )
  2220. await self.page.go_back()
  2221. await asyncio.sleep(2)
  2222. continue
  2223. browse = tab_all.get("browse", [])
  2224. n = len(browse)
  2225. if n == 0:
  2226. print(
  2227. f"[{self.platform_name}] work_id={work_id} browse 为空",
  2228. flush=True,
  2229. )
  2230. await self.page.go_back()
  2231. await asyncio.sleep(2)
  2232. continue
  2233. # 日期:昨天往前推 n 天,index 0 = 最早日
  2234. today = datetime.now().replace(
  2235. hour=0, minute=0, second=0, microsecond=0
  2236. )
  2237. yesterday = today - timedelta(days=1)
  2238. start_date = yesterday - timedelta(days=n - 1)
  2239. # like=推荐, fav=点赞
  2240. like_arr = tab_all.get("like", [])
  2241. comment_arr = tab_all.get("comment", [])
  2242. forward_arr = tab_all.get("forward", [])
  2243. fav_arr = tab_all.get("fav", [])
  2244. follow_arr = tab_all.get("follow", [])
  2245. stats_list = []
  2246. for i in range(n):
  2247. rec_dt = start_date + timedelta(days=i)
  2248. rec_date = rec_dt.strftime("%Y-%m-%d")
  2249. play = self._parse_count(browse[i] if i < len(browse) else "0")
  2250. recommend = self._parse_count(
  2251. like_arr[i] if i < len(like_arr) else "0"
  2252. )
  2253. like = self._parse_count(fav_arr[i] if i < len(fav_arr) else "0")
  2254. comment = self._parse_count(
  2255. comment_arr[i] if i < len(comment_arr) else "0"
  2256. )
  2257. share = self._parse_count(
  2258. forward_arr[i] if i < len(forward_arr) else "0"
  2259. )
  2260. follow = self._parse_count(
  2261. follow_arr[i] if i < len(follow_arr) else "0"
  2262. )
  2263. stats_list.append(
  2264. {
  2265. "work_id": work_id,
  2266. "record_date": rec_date,
  2267. "play_count": play,
  2268. "like_count": like,
  2269. "recommend_count": recommend,
  2270. "comment_count": comment,
  2271. "share_count": share,
  2272. "collect_count": 0,
  2273. "follow_count": follow,
  2274. "completion_rate": "0",
  2275. "avg_watch_duration": "0",
  2276. }
  2277. )
  2278. print(
  2279. f"[{self.platform_name}] work_id={work_id} 从 feed_aggreagate 解析得到 {len(stats_list)} 条日统计",
  2280. flush=True,
  2281. )
  2282. # 存入 work_day_statistics(通过 save_fn 调用 Node)
  2283. if save_fn and stats_list:
  2284. try:
  2285. save_result = save_fn(stats_list)
  2286. result["inserted"] += save_result.get("inserted", 0)
  2287. result["updated"] += save_result.get("updated", 0)
  2288. except Exception as e:
  2289. print(
  2290. f"[{self.platform_name}] work_id={work_id} 保存失败: {e}",
  2291. flush=True,
  2292. )
  2293. result["total_processed"] += 1
  2294. processed_export_ids.add(eid)
  2295. # 返回列表页,继续下一条(会回到全部视频页,下次循环会重新点击单篇视频+近30天)
  2296. await self.page.go_back()
  2297. await asyncio.sleep(2)
  2298. print(
  2299. f"[{self.platform_name}] 批量同步完成: 处理 {result['total_processed']} 个作品, 跳过 {result['total_skipped']} 个",
  2300. flush=True,
  2301. )
  2302. except Exception as e:
  2303. import traceback
  2304. traceback.print_exc()
  2305. result["success"] = False
  2306. result["error"] = str(e)
  2307. finally:
  2308. try:
  2309. await self.close_browser()
  2310. except Exception:
  2311. pass
  2312. return result
  2313. async def get_comments(
  2314. self, cookies: str, work_id: str, cursor: str = ""
  2315. ) -> CommentsResult:
  2316. """
  2317. 获取视频号作品评论(完全参考 get_weixin_work_comments.py 的接口监听逻辑)
  2318. 支持递归提取二级评论,正确处理 parent_comment_id
  2319. """
  2320. print(f"\n{'=' * 60}")
  2321. print(f"[{self.platform_name}] 获取作品评论")
  2322. print(f"[{self.platform_name}] work_id={work_id}")
  2323. print(f"{'=' * 60}")
  2324. comments: List[CommentItem] = []
  2325. total = 0
  2326. has_more = False
  2327. try:
  2328. await self.init_browser()
  2329. cookie_list = self.parse_cookies(cookies)
  2330. await self.set_cookies(cookie_list)
  2331. if not self.page:
  2332. raise Exception("Page not initialized")
  2333. # 访问评论管理页面
  2334. print(f"[{self.platform_name}] 正在打开评论页面...")
  2335. await self.page.goto(
  2336. "https://channels.weixin.qq.com/platform/interaction/comment",
  2337. timeout=30000,
  2338. )
  2339. await asyncio.sleep(2)
  2340. # 检查登录状态
  2341. current_url = self.page.url
  2342. if "login" in current_url:
  2343. raise Exception("Cookie 已过期,请重新登录")
  2344. # === 步骤1: 监听 post_list 接口获取作品列表 ===
  2345. posts = []
  2346. try:
  2347. async with self.page.expect_response(
  2348. lambda res: "/post/post_list" in res.url, timeout=20000
  2349. ) as post_resp_info:
  2350. await self.page.wait_for_selector(
  2351. ".scroll-list .comment-feed-wrap", timeout=15000
  2352. )
  2353. post_resp = await post_resp_info.value
  2354. post_data = await post_resp.json()
  2355. if post_data.get("errCode") == 0:
  2356. posts = post_data.get("data", {}).get("list", [])
  2357. print(f"[{self.platform_name}] ✅ 获取 {len(posts)} 个作品")
  2358. else:
  2359. err_msg = post_data.get("errMsg", "未知错误")
  2360. print(f"[{self.platform_name}] ❌ post_list 业务错误: {err_msg}")
  2361. return CommentsResult(
  2362. success=False,
  2363. platform=self.platform_name,
  2364. work_id=work_id,
  2365. error=f"post_list 业务错误: {err_msg}",
  2366. )
  2367. except Exception as e:
  2368. print(f"[{self.platform_name}] ❌ 获取 post_list 失败: {e}")
  2369. return CommentsResult(
  2370. success=False,
  2371. platform=self.platform_name,
  2372. work_id=work_id,
  2373. error=f"获取 post_list 失败: {e}",
  2374. )
  2375. # === 步骤2: 在 DOM 中查找目标作品 ===
  2376. feed_wraps = await self.page.query_selector_all(
  2377. ".scroll-list .comment-feed-wrap"
  2378. )
  2379. target_feed = None
  2380. target_post = None
  2381. target_index = -1
  2382. for i, feed in enumerate(feed_wraps):
  2383. if i >= len(posts):
  2384. break
  2385. post = posts[i]
  2386. object_nonce = post.get("objectNonce", "")
  2387. post_work_id = post.get("objectId", "") or object_nonce
  2388. # 匹配 work_id(支持 objectId 或 objectNonce 匹配)
  2389. if (
  2390. work_id in [post_work_id, object_nonce]
  2391. or post_work_id in work_id
  2392. or object_nonce in work_id
  2393. ):
  2394. target_feed = feed
  2395. target_post = post
  2396. target_index = i
  2397. work_title = post.get("desc", {}).get("description", "无标题")
  2398. print(f"[{self.platform_name}] ✅ 找到目标作品: {work_title}")
  2399. continue
  2400. if not target_feed or not target_post:
  2401. print(f"[{self.platform_name}] ❌ 未找到 work_id={work_id} 对应的作品")
  2402. return CommentsResult(
  2403. success=True,
  2404. platform=self.platform_name,
  2405. work_id=work_id,
  2406. comments=[],
  2407. total=0,
  2408. has_more=False,
  2409. )
  2410. # 准备作品信息(用于递归函数)
  2411. object_nonce = target_post.get("objectNonce", f"nonce_{target_index}")
  2412. work_title = target_post.get("desc", {}).get(
  2413. "description", f"作品{target_index + 1}"
  2414. )
  2415. work_info = {"work_id": object_nonce, "work_title": work_title}
  2416. # === 步骤3: 点击作品触发 comment_list 接口 ===
  2417. content_wrap = (
  2418. await target_feed.query_selector(".feed-content") or target_feed
  2419. )
  2420. try:
  2421. async with self.page.expect_response(
  2422. lambda res: "/comment/comment_list" in res.url, timeout=15000
  2423. ) as comment_resp_info:
  2424. await content_wrap.click()
  2425. await asyncio.sleep(0.8)
  2426. comment_resp = await comment_resp_info.value
  2427. comment_data = await comment_resp.json()
  2428. if comment_data.get("errCode") != 0:
  2429. err_msg = comment_data.get("errMsg", "未知错误")
  2430. print(f"[{self.platform_name}] ❌ 评论接口错误: {err_msg}")
  2431. return CommentsResult(
  2432. success=False,
  2433. platform=self.platform_name,
  2434. work_id=work_id,
  2435. error=f"评论接口错误: {err_msg}",
  2436. )
  2437. raw_comments = comment_data.get("data", {}).get("comment", [])
  2438. total = comment_data.get("data", {}).get(
  2439. "totalCount", len(raw_comments)
  2440. )
  2441. print(
  2442. f"[{self.platform_name}] 📊 原始评论数: {len(raw_comments)}, 总数: {total}"
  2443. )
  2444. # === 步骤4: 递归提取所有评论(含子评论)===
  2445. extracted = self._extract_comments(
  2446. raw_comments, parent_id="", work_info=work_info
  2447. )
  2448. # === 步骤5: 转换为 CommentItem 列表(保留 weixin.py 的数据结构)===
  2449. for c in extracted:
  2450. # 使用接口返回的 comment_id
  2451. comment_id = c.get("comment_id", "")
  2452. parent_comment_id = c.get("parent_comment_id", "")
  2453. # 构建 CommentItem(保留原有数据结构用于数据库入库)
  2454. comment_item = CommentItem(
  2455. comment_id=comment_id,
  2456. parent_comment_id=parent_comment_id,
  2457. work_id=work_id,
  2458. content=c.get("content", ""),
  2459. author_id=c.get("username", ""), # 使用 username 作为 author_id
  2460. author_name=c.get("nickname", ""),
  2461. author_avatar=c.get("avatar", ""),
  2462. like_count=c.get("like_count", 0),
  2463. reply_count=0,
  2464. create_time=c.get("create_time", ""),
  2465. )
  2466. # 添加扩展字段(用于数据库存储和后续处理)
  2467. # comment_item.parent_comment_id = c.get("parent_comment_id", "")
  2468. comment_item.is_author = c.get("is_author", False)
  2469. comment_item.create_time_unix = c.get("create_time_unix", 0)
  2470. comment_item.work_title = c.get("work_title", "")
  2471. print(comment_item)
  2472. comments.append(comment_item)
  2473. # 打印日志
  2474. author_tag = " 👤(作者)" if c.get("is_author") else ""
  2475. parent_tag = (
  2476. f" [回复: {c.get('parent_comment_id', '')}]"
  2477. if c.get("parent_comment_id")
  2478. else ""
  2479. )
  2480. print(
  2481. f"[{self.platform_name}] - [{c.get('nickname', '')}] {c.get('content', '')[:30]}... "
  2482. f"({c.get('create_time', '')}){author_tag}{parent_tag}"
  2483. )
  2484. # 判断是否还有更多(优先使用接口返回的 continueFlag,否则根据数量判断)
  2485. has_more = (
  2486. comment_data.get("data", {}).get("continueFlag", False)
  2487. or len(extracted) < total
  2488. )
  2489. print(
  2490. f"[{self.platform_name}] ✅ 共提取 {len(comments)} 条评论(含子评论)"
  2491. )
  2492. except Exception as e:
  2493. print(f"[{self.platform_name}] ❌ 获取评论失败: {e}")
  2494. import traceback
  2495. traceback.print_exc()
  2496. return CommentsResult(
  2497. success=False,
  2498. platform=self.platform_name,
  2499. work_id=work_id,
  2500. error=f"获取评论失败: {e}",
  2501. )
  2502. except Exception as e:
  2503. import traceback
  2504. traceback.print_exc()
  2505. return CommentsResult(
  2506. success=False,
  2507. platform=self.platform_name,
  2508. work_id=work_id,
  2509. error=str(e),
  2510. )
  2511. return CommentsResult(
  2512. success=True,
  2513. platform=self.platform_name,
  2514. work_id=work_id,
  2515. comments=comments,
  2516. total=total,
  2517. has_more=has_more,
  2518. )
  2519. def _extract_comments(
  2520. self, comment_list: list, parent_id: str = "", work_info: dict = None
  2521. ) -> list:
  2522. """
  2523. 递归提取一级和二级评论(完全参考 get_weixin_work_comments.py 的 extract_comments 函数)
  2524. Args:
  2525. comment_list: 评论列表(原始接口数据)
  2526. parent_id: 父评论ID(一级评论为空字符串"",二级评论为父级评论ID)
  2527. work_info: 作品信息字典
  2528. Returns:
  2529. list: 扁平化的评论列表,包含一级和二级评论
  2530. """
  2531. result = []
  2532. # 获取当前用户 username(用于判断是否为作者)
  2533. # 优先从环境变量获取,也可通过其他方式配置
  2534. my_username = getattr(self, "my_username", "") or os.environ.get(
  2535. "WEIXIN_MY_USERNAME", ""
  2536. )
  2537. for cmt in comment_list:
  2538. # 处理时间戳
  2539. create_ts = int(cmt.get("commentCreatetime", 0) or 0)
  2540. readable_time = (
  2541. datetime.fromtimestamp(create_ts).strftime("%Y-%m-%d %H:%M:%S")
  2542. if create_ts > 0
  2543. else ""
  2544. )
  2545. # 判断是否作者(如果配置了 my_username)
  2546. username = cmt.get("username", "") or ""
  2547. is_author = (my_username != "") and (username == my_username)
  2548. # 构建评论条目 - 完全参考 get_weixin_work_comments.py 的字段
  2549. entry = {
  2550. "work_id": work_info.get("work_id", "") if work_info else "",
  2551. "work_title": work_info.get("work_title", "") if work_info else "",
  2552. "comment_id": cmt.get("commentId"),
  2553. "parent_comment_id": parent_id, # 关键:一级评论为空字符串"",二级评论为父评论ID
  2554. "username": username,
  2555. "nickname": cmt.get("commentNickname", ""),
  2556. "avatar": cmt.get("commentHeadurl", ""),
  2557. "content": cmt.get("commentContent", ""),
  2558. "create_time_unix": create_ts,
  2559. "create_time": readable_time,
  2560. "is_author": is_author,
  2561. "like_count": cmt.get("commentLikeCount", 0) or 0,
  2562. }
  2563. result.append(entry)
  2564. # 递归处理二级评论(levelTwoComment)
  2565. # 关键:二级评论的 parent_id 应该是当前这条评论的 comment_id
  2566. level_two = cmt.get("levelTwoComment", []) or []
  2567. if level_two and isinstance(level_two, list) and len(level_two) > 0:
  2568. # 当前评论的 ID 作为其子评论的 parent_id
  2569. current_comment_id = cmt.get("commentId", "")
  2570. result.extend(
  2571. self._extract_comments(
  2572. level_two, parent_id=current_comment_id, work_info=work_info
  2573. )
  2574. )
  2575. return result
  2576. async def auto_reply_private_messages(self, cookies: str) -> dict:
  2577. """自动回复私信 - 集成自 pw3.py"""
  2578. print(f"\n{'=' * 60}")
  2579. print(f"[{self.platform_name}] 开始自动回复私信")
  2580. print(f"{'=' * 60}")
  2581. try:
  2582. await self.init_browser()
  2583. cookie_list = self.parse_cookies(cookies)
  2584. await self.set_cookies(cookie_list)
  2585. if not self.page:
  2586. raise Exception("Page not initialized")
  2587. # 访问私信页面
  2588. await self.page.goto(
  2589. "https://channels.weixin.qq.com/platform/private_msg", timeout=30000
  2590. )
  2591. await asyncio.sleep(3)
  2592. # 检查登录状态
  2593. current_url = self.page.url
  2594. print(f"[{self.platform_name}] 当前 URL: {current_url}")
  2595. if "login" in current_url:
  2596. raise Exception("Cookie 已过期,请重新登录")
  2597. # 等待私信页面加载(使用多个选择器容错)
  2598. try:
  2599. await self.page.wait_for_selector(
  2600. ".private-msg-list-header", timeout=15000
  2601. )
  2602. except:
  2603. # 尝试其他选择器
  2604. try:
  2605. await self.page.wait_for_selector(
  2606. ".weui-desktop-tab__navs__inner", timeout=10000
  2607. )
  2608. print(f"[{self.platform_name}] 使用备用选择器加载成功")
  2609. except:
  2610. # 截图调试
  2611. screenshot_dir = os.path.join(
  2612. os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
  2613. "screenshots",
  2614. )
  2615. os.makedirs(screenshot_dir, exist_ok=True)
  2616. screenshot_path = os.path.join(
  2617. screenshot_dir,
  2618. f"weixin_private_msg_{int(asyncio.get_event_loop().time())}.png",
  2619. )
  2620. await self.page.screenshot(path=screenshot_path)
  2621. print(
  2622. f"[{self.platform_name}] 页面加载失败,截图: {screenshot_path}"
  2623. )
  2624. raise Exception(f"私信页面加载超时,当前 URL: {current_url}")
  2625. print(f"[{self.platform_name}] 私信页面加载完成")
  2626. # 处理两个 tab
  2627. total_replied = 0
  2628. for tab_name in ["打招呼消息", "私信"]:
  2629. replied_count = await self._process_tab_sessions(tab_name)
  2630. total_replied += replied_count
  2631. print(f"[{self.platform_name}] 自动回复完成,共回复 {total_replied} 条消息")
  2632. return {
  2633. "success": True,
  2634. "platform": self.platform_name,
  2635. "replied_count": total_replied,
  2636. "message": f"成功回复 {total_replied} 条私信",
  2637. }
  2638. except Exception as e:
  2639. import traceback
  2640. traceback.print_exc()
  2641. return {"success": False, "platform": self.platform_name, "error": str(e)}
  2642. async def _process_tab_sessions(self, tab_name: str) -> int:
  2643. """处理指定 tab 下的所有会话"""
  2644. print(f"\n🔄 正在处理「{tab_name}」中的所有会话...")
  2645. if not self.page:
  2646. return 0
  2647. replied_count = 0
  2648. try:
  2649. # 点击 tab
  2650. if tab_name == "私信":
  2651. tab_link = self.page.locator(
  2652. ".weui-desktop-tab__navs__inner li"
  2653. ).first.locator("a")
  2654. elif tab_name == "打招呼消息":
  2655. tab_link = (
  2656. self.page.locator(".weui-desktop-tab__navs__inner li")
  2657. .nth(1)
  2658. .locator("a")
  2659. )
  2660. else:
  2661. return 0
  2662. if await tab_link.is_visible():
  2663. await tab_link.click()
  2664. print(f" ➤ 已点击「{tab_name}」tab")
  2665. else:
  2666. print(f" ❌ 「{tab_name}」tab 不可见")
  2667. return 0
  2668. # 等待会话列表加载
  2669. try:
  2670. await self.page.wait_for_function(
  2671. """
  2672. () => {
  2673. const hasSession = document.querySelectorAll('.session-wrap').length > 0;
  2674. const hasEmpty = !!document.querySelector('.empty-text');
  2675. return hasSession || hasEmpty;
  2676. }
  2677. """,
  2678. timeout=8000,
  2679. )
  2680. print(" ✅ 会话列表区域已加载")
  2681. except:
  2682. print(" ⚠️ 等待会话列表超时,继续尝试读取...")
  2683. # 获取会话
  2684. session_wraps = self.page.locator(".session-wrap")
  2685. session_count = await session_wraps.count()
  2686. print(f" 💬 共找到 {session_count} 个会话")
  2687. if session_count == 0:
  2688. return 0
  2689. # 遍历每个会话
  2690. for idx in range(session_count):
  2691. try:
  2692. current_sessions = self.page.locator(".session-wrap")
  2693. if idx >= await current_sessions.count():
  2694. break
  2695. session = current_sessions.nth(idx)
  2696. user_name = await session.locator(".name").inner_text()
  2697. last_preview = await session.locator(".feed-info").inner_text()
  2698. print(
  2699. f"\n ➤ [{idx + 1}/{session_count}] 正在处理: {user_name} | 最后消息: {last_preview}"
  2700. )
  2701. await session.click()
  2702. await asyncio.sleep(2)
  2703. # 提取聊天历史
  2704. history = await self._extract_chat_history()
  2705. need_reply = (not history) or (not history[-1]["is_author"])
  2706. if need_reply:
  2707. reply_text = await self._generate_reply_with_ai(history)
  2708. if reply_text == "":
  2709. reply_text = self._generate_reply(history)
  2710. # # 生成回复
  2711. # if history and history[-1]["is_author"]:
  2712. # reply_text = await self._generate_reply_with_ai(history)
  2713. # else:
  2714. # reply_text = self._generate_reply(history)
  2715. if reply_text:
  2716. print(f" 📝 回复内容: {reply_text}")
  2717. try:
  2718. textarea = self.page.locator(".edit_area").first
  2719. send_btn = self.page.locator(
  2720. 'button:has-text("发送")'
  2721. ).first
  2722. if (
  2723. await textarea.is_visible()
  2724. and await send_btn.is_visible()
  2725. ):
  2726. await textarea.fill(reply_text)
  2727. await asyncio.sleep(0.5)
  2728. await send_btn.click()
  2729. print(" ✅ 已发送")
  2730. replied_count += 1
  2731. await asyncio.sleep(1.5)
  2732. else:
  2733. print(" ❌ 输入框或发送按钮不可见")
  2734. except Exception as e:
  2735. print(f" ❌ 发送失败: {e}")
  2736. else:
  2737. print(" ➤ 无需回复")
  2738. else:
  2739. print(" ➤ 最后一条是我发的,跳过回复")
  2740. except Exception as e:
  2741. print(f" ❌ 处理会话 {idx + 1} 时出错: {e}")
  2742. continue
  2743. except Exception as e:
  2744. print(f"❌ 处理「{tab_name}」失败: {e}")
  2745. return replied_count
  2746. async def _extract_chat_history(self) -> list:
  2747. """精准提取聊天记录,区分作者(自己)和用户"""
  2748. if not self.page:
  2749. return []
  2750. history = []
  2751. message_wrappers = self.page.locator(
  2752. ".session-content-wrapper > div:not(.footer) > .text-wrapper"
  2753. )
  2754. count = await message_wrappers.count()
  2755. for i in range(count):
  2756. try:
  2757. wrapper = message_wrappers.nth(i)
  2758. # 判断方向
  2759. is_right = await wrapper.locator(".content-right").count() > 0
  2760. is_left = await wrapper.locator(".content-left").count() > 0
  2761. if not (is_left or is_right):
  2762. continue
  2763. # 提取消息文本
  2764. pre_el = wrapper.locator("pre.message-plain")
  2765. content = ""
  2766. if await pre_el.count() > 0:
  2767. content = await pre_el.inner_text()
  2768. content = content.strip()
  2769. if not content:
  2770. continue
  2771. # 获取头像
  2772. avatar_img = wrapper.locator(".avatar").first
  2773. avatar_src = ""
  2774. if await avatar_img.count() > 0:
  2775. avatar_src = await avatar_img.get_attribute("src") or ""
  2776. # 右侧 = 作者(自己)
  2777. is_author = is_right
  2778. # 获取用户名
  2779. if is_left:
  2780. name_el = wrapper.locator(".profile .name")
  2781. author_name = "用户"
  2782. if await name_el.count() > 0:
  2783. author_name = await name_el.inner_text()
  2784. else:
  2785. author_name = "我"
  2786. history.append(
  2787. {
  2788. "author": author_name,
  2789. "content": content,
  2790. "is_author": is_author,
  2791. "avatar": avatar_src,
  2792. }
  2793. )
  2794. except Exception as e:
  2795. print(f" ⚠️ 解析第 {i + 1} 条消息失败: {e}")
  2796. continue
  2797. return history
  2798. async def _generate_reply_with_ai(self, chat_history: list) -> str:
  2799. """使用 AI 生成智能回复"""
  2800. import requests
  2801. import json
  2802. try:
  2803. # 获取 AI 配置
  2804. ai_api_key = os.environ.get("DASHSCOPE_API_KEY", "")
  2805. ai_base_url = os.environ.get(
  2806. "DASHSCOPE_BASE_URL",
  2807. "https://dashscope.aliyuncs.com/compatible-mode/v1",
  2808. )
  2809. ai_model = os.environ.get("AI_MODEL", "qwen-plus")
  2810. if not ai_api_key:
  2811. print("⚠️ 未配置 AI API Key,使用规则回复")
  2812. return self._generate_reply(chat_history)
  2813. # 构建对话上下文
  2814. messages = [
  2815. {
  2816. "role": "system",
  2817. "content": "你是一个友好的微信视频号创作者助手,负责回复粉丝私信。请保持简洁、友好、专业的语气。回复长度不超过20字。",
  2818. }
  2819. ]
  2820. for msg in chat_history:
  2821. role = "assistant" if msg["is_author"] else "user"
  2822. messages.append({"role": role, "content": msg["content"]})
  2823. # 调用 AI API
  2824. headers = {
  2825. "Authorization": f"Bearer {ai_api_key}",
  2826. "Content-Type": "application/json",
  2827. }
  2828. payload = {
  2829. "model": ai_model,
  2830. "messages": messages,
  2831. "max_tokens": 150,
  2832. "temperature": 0.8,
  2833. }
  2834. print(" 🤖 正在调用 AI 生成回复...")
  2835. response = requests.post(
  2836. f"{ai_base_url}/chat/completions",
  2837. headers=headers,
  2838. json=payload,
  2839. timeout=30,
  2840. )
  2841. if response.status_code != 200:
  2842. print(f" ⚠️ AI API 返回错误 {response.status_code},使用规则回复")
  2843. return self._generate_reply(chat_history)
  2844. result = response.json()
  2845. ai_reply = (
  2846. result.get("choices", [{}])[0]
  2847. .get("message", {})
  2848. .get("content", "")
  2849. .strip()
  2850. )
  2851. if ai_reply:
  2852. print(f" ✅ AI 生成回复: {ai_reply}")
  2853. return ai_reply
  2854. else:
  2855. print(" ⚠️ AI 返回空内容,使用规则回复")
  2856. return self._generate_reply(chat_history)
  2857. except Exception as e:
  2858. print(f" ⚠️ AI 回复生成失败: {e},使用规则回复")
  2859. return self._generate_reply(chat_history)
  2860. def _generate_reply(self, chat_history: list) -> str:
  2861. """根据完整聊天历史生成回复(规则回复方式)"""
  2862. if not chat_history:
  2863. return "你好!感谢联系~"
  2864. # 检查最后一条是否是作者发的
  2865. if chat_history[-1]["is_author"]:
  2866. return "" # 不回复
  2867. # 找最后一条用户消息
  2868. last_user_msg = chat_history[-1]["content"]
  2869. # 简单规则回复
  2870. if "谢谢" in last_user_msg or "感谢" in last_user_msg:
  2871. return "不客气!欢迎常来交流~"
  2872. elif "你好" in last_user_msg or "在吗" in last_user_msg:
  2873. return "你好!请问有什么可以帮您的?"
  2874. elif "视频" in last_user_msg or "怎么拍" in last_user_msg:
  2875. return "视频是用手机拍摄的,注意光线和稳定哦!"
  2876. else:
  2877. return "收到!我会认真阅读您的留言~"