weixin.py 135 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273
  1. # -*- coding: utf-8 -*-
  2. """
  3. 微信视频号发布器
  4. 参考: matrix/tencent_uploader/main.py
  5. """
  6. import asyncio
  7. import json
  8. import os
  9. from datetime import datetime
  10. from typing import List
  11. from .base import (
  12. BasePublisher,
  13. PublishParams,
  14. PublishResult,
  15. WorkItem,
  16. WorksResult,
  17. CommentItem,
  18. CommentsResult,
  19. )
  20. import os
  21. import time
  22. # 允许通过环境变量手动指定“上传视频入口”的选择器,便于在页面结构频繁变更时快速调整
  23. WEIXIN_UPLOAD_SELECTOR = os.environ.get("WEIXIN_UPLOAD_SELECTOR", "").strip()
  24. # 代理下视频上传持续失败时,可设 WEIXIN_UPLOAD_BYPASS_PROXY=1
  25. # 仅对上传 CDN 直连,其余页面仍走代理(解决大文件经代理易「网络出错」)
  26. WEIXIN_UPLOAD_BYPASS_PROXY = os.environ.get(
  27. "WEIXIN_UPLOAD_BYPASS_PROXY", "0"
  28. ).strip() in ("1", "true", "yes")
  29. def format_short_title(origin_title: str) -> str:
  30. """
  31. 格式化短标题
  32. - 移除特殊字符
  33. - 长度限制在 6-16 字符
  34. """
  35. allowed_special_chars = "《》:+?%°"
  36. filtered_chars = [
  37. char
  38. if char.isalnum() or char in allowed_special_chars
  39. else " "
  40. if char == ","
  41. else ""
  42. for char in origin_title
  43. ]
  44. formatted_string = "".join(filtered_chars)
  45. if len(formatted_string) > 16:
  46. formatted_string = formatted_string[:16]
  47. elif len(formatted_string) < 6:
  48. formatted_string += " " * (6 - len(formatted_string))
  49. return formatted_string
  50. class WeixinPublisher(BasePublisher):
  51. """
  52. 微信视频号发布器
  53. 使用 Playwright 自动化操作视频号创作者中心
  54. 注意: 需要使用 Chrome 浏览器,否则可能出现 H264 编码错误
  55. """
  56. platform_name = "weixin"
  57. login_url = "https://channels.weixin.qq.com/platform"
  58. publish_url = "https://channels.weixin.qq.com/platform/post/create"
  59. cookie_domain = ".weixin.qq.com"
  60. def _parse_count(self, count_str: str) -> int:
  61. """解析数字(支持带'万'的格式)"""
  62. try:
  63. count_str = count_str.strip()
  64. if "万" in count_str:
  65. return int(float(count_str.replace("万", "")) * 10000)
  66. return int(count_str)
  67. except:
  68. return 0
  69. async def ai_find_upload_selector(
  70. self, frame_html: str, frame_name: str = "main"
  71. ) -> str:
  72. """
  73. 使用 AI 从 HTML 中识别“上传视频/选择文件”相关元素的 CSS 选择器。
  74. 设计思路:
  75. - 仅在常规 DOM 选择器都失败时调用,避免频繁占用 AI 配额;
  76. - 通过 DashScope 文本模型(与验证码识别同一套配置)分析 HTML;
  77. - 返回一个适合用于 frame.locator(selector) 的 CSS 选择器。
  78. """
  79. import json
  80. import re
  81. import requests
  82. import os
  83. # 避免 HTML 过长导致 token 超限,只截取前 N 字符
  84. if not frame_html:
  85. return ""
  86. max_len = 20000
  87. if len(frame_html) > max_len:
  88. frame_html = frame_html[:max_len]
  89. ai_api_key = os.environ.get("DASHSCOPE_API_KEY", "")
  90. ai_base_url = os.environ.get(
  91. "DASHSCOPE_BASE_URL", "https://dashscope.aliyuncs.com/compatible-mode/v1"
  92. )
  93. ai_text_model = os.environ.get("AI_TEXT_MODEL", "qwen-plus")
  94. if not ai_api_key:
  95. print(f"[{self.platform_name}] AI上传入口识别: 未配置 AI API Key,跳过")
  96. return ""
  97. prompt = f"""
  98. 你是熟悉微信视频号后台的前端工程师,现在需要在一段 HTML 中找到“上传视频文件”的入口。
  99. 页面说明:
  100. - 平台:微信视频号(channels.weixin.qq.com)
  101. - 目标:用于上传视频文件的按钮或 input(一般会触发文件选择框)
  102. - 你会收到某个 frame 的完整 HTML 片段(不包含截图)。
  103. 请你根据下面的 HTML,推断最适合用于上传视频文件的元素,并输出一个可以被 Playwright 使用的 CSS 选择器。
  104. 要求:
  105. 1. 只考虑“上传/选择视频文件”的入口,不要返回“发布/发表/下一步”等按钮;
  106. 2. 选择器需要尽量稳定,不要使用自动生成的随机类名(例如带很多随机字母/数字的类名可以用前缀匹配);
  107. 3. 选择器必须是 CSS 选择器(不要返回 XPath);
  108. 4. 如果确实找不到合理的上传入口,返回 selector 为空字符串。
  109. 请以 JSON 格式输出,严格遵守以下结构(不要添加任何解释文字):
  110. ```json
  111. {{
  112. "selector": "CSS 选择器字符串,比如:input[type='file'] 或 div.upload-content input[type='file']"
  113. }}
  114. ```
  115. 下面是 frame=\"{frame_name}\" 的 HTML:
  116. ```html
  117. {frame_html}
  118. ```"""
  119. payload = {
  120. "model": ai_text_model,
  121. "messages": [
  122. {
  123. "role": "user",
  124. "content": prompt,
  125. }
  126. ],
  127. "max_tokens": 600,
  128. }
  129. headers = {
  130. "Authorization": f"Bearer {ai_api_key}",
  131. "Content-Type": "application/json",
  132. }
  133. try:
  134. print(
  135. f"[{self.platform_name}] AI上传入口识别: 正在分析 frame={frame_name} HTML..."
  136. )
  137. resp = requests.post(
  138. f"{ai_base_url}/chat/completions",
  139. headers=headers,
  140. json=payload,
  141. timeout=40,
  142. )
  143. if resp.status_code != 200:
  144. print(
  145. f"[{self.platform_name}] AI上传入口识别: API 返回错误 {resp.status_code}"
  146. )
  147. return ""
  148. data = resp.json()
  149. content = (
  150. data.get("choices", [{}])[0].get("message", {}).get("content", "") or ""
  151. )
  152. # 尝试从 ```json``` 代码块中解析
  153. json_match = re.search(r"```json\\s*([\\s\\S]*?)\\s*```", content)
  154. if json_match:
  155. json_str = json_match.group(1)
  156. else:
  157. json_match = re.search(r"\\{[\\s\\S]*\\}", content)
  158. json_str = json_match.group(0) if json_match else "{}"
  159. try:
  160. result = json.loads(json_str)
  161. except Exception:
  162. result = {}
  163. selector = (result.get("selector") or "").strip()
  164. print(f"[{self.platform_name}] AI上传入口识别结果: selector='{selector}'")
  165. return selector
  166. except Exception as e:
  167. print(f"[{self.platform_name}] AI上传入口识别异常: {e}")
  168. return ""
  169. async def ai_pick_selector_from_candidates(
  170. self, candidates: list, goal: str, frame_name: str = "main"
  171. ) -> str:
  172. """
  173. 将“候选元素列表(包含 css selector + 文本/属性)”发给 AI,让 AI 直接挑选最符合 goal 的元素。
  174. 适用于:HTML 里看不出上传入口、或页面大量动态渲染时。
  175. """
  176. import json
  177. import re
  178. import requests
  179. import os
  180. if not candidates:
  181. return ""
  182. ai_api_key = os.environ.get("DASHSCOPE_API_KEY", "")
  183. ai_base_url = os.environ.get(
  184. "DASHSCOPE_BASE_URL", "https://dashscope.aliyuncs.com/compatible-mode/v1"
  185. )
  186. ai_text_model = os.environ.get("AI_TEXT_MODEL", "qwen-plus")
  187. if not ai_api_key:
  188. print(f"[{self.platform_name}] AI候选选择器: 未配置 AI API Key,跳过")
  189. return ""
  190. # 控制长度,最多取前 120 个候选
  191. candidates = candidates[:120]
  192. prompt = f"""
  193. 你是自动化发布工程师。现在要在微信视频号(channels.weixin.qq.com)发布页面里找到“{goal}”相关的入口元素。
  194. 我会给你一组候选元素,每个候选都包含:
  195. - css: 可直接用于 Playwright 的 CSS 选择器
  196. - tag / type / role / ariaLabel / text / id / className(部分字段可能为空)
  197. 你的任务:
  198. - 从候选中选出最可能用于“{goal}”的元素,返回它的 css 选择器;
  199. - 如果没有任何候选符合,返回空字符串。
  200. 注意:
  201. - 如果 goal 是“上传视频入口”,优先选择 input[type=file] 或看起来会触发选择文件/上传的区域;
  202. - 不要选择“发布/发表/下一步”等按钮(除非 goal 明确是发布按钮)。
  203. 请严格按 JSON 输出(不要解释):
  204. ```json
  205. {{ "selector": "..." }}
  206. ```
  207. 候选列表(frame={frame_name}):
  208. ```json
  209. {json.dumps(candidates, ensure_ascii=False)}
  210. ```"""
  211. payload = {
  212. "model": ai_text_model,
  213. "messages": [{"role": "user", "content": prompt}],
  214. "max_tokens": 400,
  215. }
  216. headers = {
  217. "Authorization": f"Bearer {ai_api_key}",
  218. "Content-Type": "application/json",
  219. }
  220. try:
  221. print(
  222. f"[{self.platform_name}] AI候选选择器: 正在分析 frame={frame_name}, goal={goal} ..."
  223. )
  224. resp = requests.post(
  225. f"{ai_base_url}/chat/completions",
  226. headers=headers,
  227. json=payload,
  228. timeout=40,
  229. )
  230. if resp.status_code != 200:
  231. print(
  232. f"[{self.platform_name}] AI候选选择器: API 返回错误 {resp.status_code}"
  233. )
  234. return ""
  235. data = resp.json()
  236. content = (
  237. data.get("choices", [{}])[0].get("message", {}).get("content", "") or ""
  238. )
  239. json_match = re.search(r"```json\\s*([\\s\\S]*?)\\s*```", content)
  240. if json_match:
  241. json_str = json_match.group(1)
  242. else:
  243. json_match = re.search(r"\\{[\\s\\S]*\\}", content)
  244. json_str = json_match.group(0) if json_match else "{}"
  245. try:
  246. result = json.loads(json_str)
  247. except Exception:
  248. result = {}
  249. selector = (result.get("selector") or "").strip()
  250. print(f"[{self.platform_name}] AI候选选择器结果: selector='{selector}'")
  251. return selector
  252. except Exception as e:
  253. print(f"[{self.platform_name}] AI候选选择器异常: {e}")
  254. return ""
  255. async def _extract_relevant_html_snippets(self, html: str) -> str:
  256. """
  257. 从 HTML 中抽取与上传相关的片段,减少 token,提升 AI 命中率。
  258. - 优先抓取包含 upload/上传/file/input 等关键词的窗口片段
  259. - 若未命中关键词,返回“开头 + 结尾”的拼接
  260. """
  261. import re
  262. if not html:
  263. return ""
  264. patterns = [
  265. r"upload",
  266. r"uploader",
  267. r"file",
  268. r"type\\s*=\\s*['\\\"]file['\\\"]",
  269. r"input",
  270. r"drag",
  271. r"drop",
  272. r"选择",
  273. r"上传",
  274. r"添加",
  275. r"视频",
  276. ]
  277. regex = re.compile("|".join(patterns), re.IGNORECASE)
  278. snippets = []
  279. for m in regex.finditer(html):
  280. start = max(0, m.start() - 350)
  281. end = min(len(html), m.end() + 350)
  282. snippets.append(html[start:end])
  283. if len(snippets) >= 18:
  284. break
  285. if snippets:
  286. # 去重(粗略)
  287. unique = []
  288. seen = set()
  289. for s in snippets:
  290. key = hash(s)
  291. if key not in seen:
  292. seen.add(key)
  293. unique.append(s)
  294. return "\n\n<!-- SNIPPET -->\n\n".join(unique)[:20000]
  295. # fallback: head + tail
  296. head = html[:9000]
  297. tail = html[-9000:] if len(html) > 9000 else ""
  298. return (head + "\n\n<!-- TAIL -->\n\n" + tail)[:20000]
  299. async def init_browser(self, storage_state: str = None):
  300. """
  301. 初始化浏览器 - 参考 matrix 使用 channel=chrome 避免 H264 编码错误
  302. 重要:如果配置了代理,全程都会使用代理(包括页面访问和视频上传)
  303. """
  304. from playwright.async_api import async_playwright
  305. playwright = await async_playwright().start()
  306. proxy = (
  307. self.proxy_config
  308. if isinstance(getattr(self, "proxy_config", None), dict)
  309. else None
  310. )
  311. if proxy and proxy.get("server"):
  312. # 启用上传 bypass 时:仅对上传 CDN 直连,其余仍走代理
  313. if WEIXIN_UPLOAD_BYPASS_PROXY:
  314. bypass = ",".join([
  315. "findeross.weixin.qq.com",
  316. "upload.weixin.qq.com",
  317. "finder.video.qq.com",
  318. "szextshort.weixin.qq.com",
  319. "mp.weixin.qq.com",
  320. "*.cos.qq.com",
  321. "*.cos.ap-*.myqcloud.com",
  322. "*.myqcloud.com",
  323. "*.tencentcloudapi.com",
  324. "*.video.qq.com",
  325. "*.cdn-go.cn",
  326. ])
  327. proxy = dict(proxy)
  328. proxy["bypass"] = bypass
  329. print(
  330. f"[{self.platform_name}] 使用代理(上传 CDN 直连): {proxy.get('server')}",
  331. flush=True,
  332. )
  333. print(
  334. f"[{self.platform_name}] 💡 页面走代理,视频上传 CDN 直连,避免大文件经代理失败",
  335. flush=True,
  336. )
  337. else:
  338. print(
  339. f"[{self.platform_name}] 使用代理(全程): {proxy.get('server')}",
  340. flush=True,
  341. )
  342. print(
  343. f"[{self.platform_name}] 💡 页面访问和视频上传都将通过代理",
  344. flush=True,
  345. )
  346. # 参考 matrix: 使用系统内的 Chrome 浏览器,避免 H264 编码错误
  347. launch_opts = {"headless": self.headless}
  348. if not self.headless:
  349. launch_opts["slow_mo"] = 400
  350. print(
  351. f"[{self.platform_name}] 有头模式 + slow_mo=400ms,浏览器将可见",
  352. flush=True,
  353. )
  354. try:
  355. launch_opts["channel"] = "chrome"
  356. if proxy and proxy.get("server"):
  357. launch_opts["proxy"] = proxy
  358. # 代理下大文件上传优化:禁用 QUIC,部分代理对 QUIC 支持不佳易导致连接中断
  359. launch_opts.setdefault("args", []).append("--disable-quic")
  360. self.browser = await playwright.chromium.launch(**launch_opts)
  361. mode = "代理模式" if proxy else "直连模式"
  362. print(
  363. f"[{self.platform_name}] 使用系统 Chrome 浏览器({mode})", flush=True
  364. )
  365. except Exception as e:
  366. print(
  367. f"[{self.platform_name}] Chrome 不可用,使用 Chromium: {e}", flush=True
  368. )
  369. if "channel" in launch_opts:
  370. del launch_opts["channel"]
  371. if proxy and proxy.get("server"):
  372. launch_opts["proxy"] = proxy
  373. if "--disable-quic" not in (launch_opts.get("args") or []):
  374. launch_opts.setdefault("args", []).append("--disable-quic")
  375. self.browser = await playwright.chromium.launch(**launch_opts)
  376. # 设置 HTTP Headers
  377. headers = {
  378. "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
  379. "Referer": "https://channels.weixin.qq.com/platform/post/list",
  380. }
  381. self.context = await self.browser.new_context(
  382. extra_http_headers=headers,
  383. ignore_https_errors=True,
  384. viewport={"width": 1920, "height": 1080},
  385. user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
  386. )
  387. self.page = await self.context.new_page()
  388. # 注入反检测脚本
  389. if hasattr(self, "inject_stealth_if_available"):
  390. await self.inject_stealth_if_available()
  391. return self.page
  392. async def set_schedule_time(self, publish_date: datetime):
  393. """设置定时发布"""
  394. if not self.page:
  395. return
  396. print(f"[{self.platform_name}] 设置定时发布...")
  397. # 点击定时选项
  398. label_element = self.page.locator("label").filter(has_text="定时").nth(1)
  399. await label_element.click()
  400. # 选择日期
  401. await self.page.click('input[placeholder="请选择发表时间"]')
  402. publish_month = f"{publish_date.month:02d}"
  403. current_month = f"{publish_month}月"
  404. # 检查月份
  405. page_month = await self.page.inner_text(
  406. 'span.weui-desktop-picker__panel__label:has-text("月")'
  407. )
  408. if page_month != current_month:
  409. await self.page.click("button.weui-desktop-btn__icon__right")
  410. # 选择日期
  411. elements = await self.page.query_selector_all(
  412. "table.weui-desktop-picker__table a"
  413. )
  414. for element in elements:
  415. class_name = await element.evaluate("el => el.className")
  416. if "weui-desktop-picker__disabled" in class_name:
  417. continue
  418. text = await element.inner_text()
  419. if text.strip() == str(publish_date.day):
  420. await element.click()
  421. break
  422. # 输入时间
  423. await self.page.click('input[placeholder="请选择时间"]')
  424. await self.page.keyboard.press("Control+KeyA")
  425. await self.page.keyboard.type(str(publish_date.hour))
  426. # 点击其他地方确认
  427. await self.page.locator("div.input-editor").click()
  428. async def handle_upload_error(self, video_path: str):
  429. """处理上传错误(含代理下「网络出错」重试优化)"""
  430. if not self.page:
  431. return
  432. using_proxy = isinstance(
  433. getattr(self, "proxy_config", None), dict
  434. ) and self.proxy_config.get("server")
  435. # 代理模式下先等待,给代理/网络恢复时间,避免连续重试加剧失败
  436. if using_proxy:
  437. wait_sec = 25
  438. print(
  439. f"[{self.platform_name}] 代理模式:检测到上传错误,等待 {wait_sec} 秒后重试...",
  440. flush=True,
  441. )
  442. await asyncio.sleep(wait_sec)
  443. print(f"[{self.platform_name}] 视频出错了,重新上传中...")
  444. # 出错时先截一张当前页面的图,方便排查(代理问题、视频格式问题等)
  445. try:
  446. timestamp = int(time.time() * 1000)
  447. screenshot_dir = os.path.join(
  448. os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
  449. "screenshots",
  450. )
  451. os.makedirs(screenshot_dir, exist_ok=True)
  452. screenshot_path = os.path.join(
  453. screenshot_dir, f"weixin_upload_error_{timestamp}.png"
  454. )
  455. await self.page.screenshot(path=screenshot_path, full_page=True)
  456. print(
  457. f"[{self.platform_name}] 上传错误截图已保存: {screenshot_path}",
  458. flush=True,
  459. )
  460. except Exception as e:
  461. print(f"[{self.platform_name}] 保存上传错误截图失败: {e}", flush=True)
  462. # 删除出错的视频重新上传
  463. await self.page.locator(
  464. 'div.media-status-content div.tag-inner:has-text("删除")'
  465. ).click()
  466. await self.page.get_by_role("button", name="删除", exact=True).click()
  467. file_input = self.page.locator('input[type="file"]')
  468. await file_input.set_input_files(video_path)
  469. async def add_title_tags(self, params: PublishParams):
  470. """添加标题和话题"""
  471. if not self.page:
  472. return
  473. print(f"[{self.platform_name}] 开始添加标题: {params.title}", flush=True)
  474. await self.page.locator("div.input-editor").click()
  475. await self.page.keyboard.type(params.title)
  476. # 添加描述(换行后在标题下方)
  477. if params.description:
  478. await self.page.keyboard.press("Enter")
  479. await self.page.keyboard.type(params.description)
  480. if params.tags:
  481. await self.page.keyboard.press("Enter")
  482. for tag in params.tags:
  483. await self.page.keyboard.type("#" + tag)
  484. await self.page.keyboard.press("Space")
  485. print(
  486. f"[{self.platform_name}] ✓ 成功添加标题和 {len(params.tags)} 个话题",
  487. flush=True,
  488. )
  489. # 🔧 设置位置(使用代理地区或默认位置)
  490. print(f"[{self.platform_name}] 准备设置位置: {params.location}", flush=True)
  491. if params.location:
  492. await self.set_location(params.location)
  493. else:
  494. print(f"[{self.platform_name}] ⚠️ 未设置位置,跳过", flush=True)
  495. async def set_location(self, location: str):
  496. """设置发布位置"""
  497. if not self.page or not location:
  498. return
  499. try:
  500. print(f"[{self.platform_name}] 正在设置位置: {location}", flush=True)
  501. # 等待页面稳定
  502. await asyncio.sleep(1)
  503. # 尝试多种方式找到位置设置元素
  504. location_selectors = [
  505. # 位置输入框
  506. 'input[placeholder*="位置"]',
  507. 'input[placeholder*="所在"]',
  508. 'input[placeholder*="地点"]',
  509. # 位置按钮
  510. 'div:has-text("所在位置")',
  511. 'div:has-text("添加位置")',
  512. 'span:has-text("位置")',
  513. ]
  514. location_element = None
  515. for selector in location_selectors:
  516. try:
  517. element = self.page.locator(selector).first
  518. if await element.count() > 0 and await element.is_visible():
  519. location_element = element
  520. print(
  521. f"[{self.platform_name}] 找到位置元素: {selector}",
  522. flush=True,
  523. )
  524. break
  525. except:
  526. continue
  527. if not location_element:
  528. print(f"[{self.platform_name}] 未找到位置设置元素,跳过", flush=True)
  529. return
  530. # 点击位置元素
  531. await location_element.click()
  532. await asyncio.sleep(1)
  533. # 查找位置输入框
  534. input_selectors = [
  535. 'input[placeholder*="搜索"]',
  536. 'input[placeholder*="输入"]',
  537. 'input[type="text"]',
  538. ]
  539. location_input = None
  540. for selector in input_selectors:
  541. try:
  542. element = self.page.locator(selector).first
  543. if await element.count() > 0 and await element.is_visible():
  544. location_input = element
  545. break
  546. except:
  547. continue
  548. if location_input:
  549. # 输入位置
  550. await location_input.fill(location)
  551. await asyncio.sleep(1)
  552. # 查找匹配的位置选项并点击
  553. try:
  554. # 等待位置建议出现
  555. await asyncio.sleep(1)
  556. # 查找包含位置文本的选项
  557. option = self.page.locator(f'text="{location}"').first
  558. if await option.count() > 0:
  559. await option.click()
  560. print(
  561. f"[{self.platform_name}] ✓ 位置设置成功: {location}",
  562. flush=True,
  563. )
  564. else:
  565. # 如果没有精确匹配,选择第一个建议
  566. first_option = self.page.locator(
  567. 'div[class*="location"] li, div[class*="suggest"] div'
  568. ).first
  569. if await first_option.count() > 0:
  570. await first_option.click()
  571. print(
  572. f"[{self.platform_name}] ✓ 位置已设置(自动选择)",
  573. flush=True,
  574. )
  575. except Exception as e:
  576. print(f"[{self.platform_name}] ⚠️ 选择位置失败: {e}", flush=True)
  577. # 按 Escape 关闭位置选择器
  578. await self.page.keyboard.press("Escape")
  579. else:
  580. print(f"[{self.platform_name}] 未找到位置输入框", flush=True)
  581. await self.page.keyboard.press("Escape")
  582. except Exception as e:
  583. print(f"[{self.platform_name}] 设置位置失败: {e}", flush=True)
  584. try:
  585. await self.page.keyboard.press("Escape")
  586. except:
  587. pass
  588. async def add_short_title(self):
  589. """添加短标题"""
  590. if not self.page:
  591. return
  592. try:
  593. short_title_element = (
  594. self.page.get_by_text("短标题", exact=True)
  595. .locator("..")
  596. .locator("xpath=following-sibling::div")
  597. .locator('span input[type="text"]')
  598. )
  599. if await short_title_element.count():
  600. # 获取已有内容作为短标题
  601. pass
  602. except:
  603. pass
  604. async def upload_cover(self, cover_path: str):
  605. """上传封面图"""
  606. if not self.page or not cover_path or not os.path.exists(cover_path):
  607. return
  608. try:
  609. await asyncio.sleep(2)
  610. preview_btn_info = await self.page.locator(
  611. 'div.finder-tag-wrap.btn:has-text("更换封面")'
  612. ).get_attribute("class")
  613. if "disabled" not in preview_btn_info:
  614. await self.page.locator(
  615. 'div.finder-tag-wrap.btn:has-text("更换封面")'
  616. ).click()
  617. await self.page.locator(
  618. "div.single-cover-uploader-wrap > div.wrap"
  619. ).hover()
  620. # 删除现有封面
  621. if await self.page.locator(".del-wrap > .svg-icon").count():
  622. await self.page.locator(".del-wrap > .svg-icon").click()
  623. # 上传新封面
  624. preview_div = self.page.locator(
  625. "div.single-cover-uploader-wrap > div.wrap"
  626. )
  627. async with self.page.expect_file_chooser() as fc_info:
  628. await preview_div.click()
  629. preview_chooser = await fc_info.value
  630. await preview_chooser.set_files(cover_path)
  631. await asyncio.sleep(2)
  632. await self.page.get_by_role("button", name="确定").click()
  633. await asyncio.sleep(1)
  634. await self.page.get_by_role("button", name="确认").click()
  635. print(f"[{self.platform_name}] 封面上传成功")
  636. except Exception as e:
  637. print(f"[{self.platform_name}] 封面上传失败: {e}")
  638. async def check_captcha(self) -> dict:
  639. """检查页面是否需要验证码"""
  640. if not self.page:
  641. return {"need_captcha": False, "captcha_type": ""}
  642. try:
  643. # 检查各种验证码
  644. captcha_selectors = [
  645. 'text="请输入验证码"',
  646. 'text="滑动验证"',
  647. '[class*="captcha"]',
  648. '[class*="verify"]',
  649. ]
  650. for selector in captcha_selectors:
  651. try:
  652. if await self.page.locator(selector).count() > 0:
  653. print(f"[{self.platform_name}] 检测到验证码: {selector}")
  654. return {"need_captcha": True, "captcha_type": "image"}
  655. except:
  656. pass
  657. # 检查登录弹窗
  658. login_selectors = [
  659. 'text="请登录"',
  660. 'text="扫码登录"',
  661. '[class*="login-dialog"]',
  662. ]
  663. for selector in login_selectors:
  664. try:
  665. if await self.page.locator(selector).count() > 0:
  666. print(f"[{self.platform_name}] 检测到需要登录: {selector}")
  667. return {"need_captcha": True, "captcha_type": "login"}
  668. except:
  669. pass
  670. except Exception as e:
  671. print(f"[{self.platform_name}] 验证码检测异常: {e}")
  672. return {"need_captcha": False, "captcha_type": ""}
  673. async def publish(self, cookies: str, params: PublishParams) -> PublishResult:
  674. """发布视频到视频号"""
  675. print(f"\n{'=' * 60}")
  676. print(f"[{self.platform_name}] 开始发布视频")
  677. print(f"[{self.platform_name}] 视频路径: {params.video_path}")
  678. print(f"[{self.platform_name}] 标题: {params.title}")
  679. print(f"[{self.platform_name}] Headless: {self.headless}")
  680. print(f"{'=' * 60}")
  681. self.report_progress(5, "正在初始化浏览器...")
  682. # 初始化浏览器(使用 Chrome)
  683. await self.init_browser()
  684. print(f"[{self.platform_name}] 浏览器初始化完成")
  685. # 解析并设置 cookies
  686. cookie_list = self.parse_cookies(cookies)
  687. print(cookie_list)
  688. print(f"[{self.platform_name}] 解析到 {len(cookie_list)} 个 cookies")
  689. await self.set_cookies(cookie_list)
  690. if not self.page:
  691. raise Exception("Page not initialized")
  692. # 检查视频文件
  693. if not os.path.exists(params.video_path):
  694. raise Exception(f"视频文件不存在: {params.video_path}")
  695. print(
  696. f"[{self.platform_name}] 视频文件存在,大小: {os.path.getsize(params.video_path)} bytes"
  697. )
  698. self.report_progress(10, "正在打开上传页面...")
  699. # 代理模式下拉长超时,避免大文件上传经代理时超时
  700. using_proxy = isinstance(
  701. getattr(self, "proxy_config", None), dict
  702. ) and self.proxy_config.get("server")
  703. if using_proxy:
  704. self.page.set_default_timeout(300000) # 5 分钟
  705. print(f"[{self.platform_name}] 代理模式:已设置 5 分钟操作超时", flush=True)
  706. # 访问上传页面 - 使用 domcontentloaded 替代 networkidle,避免代理慢速导致超时
  707. await self.page.goto(
  708. self.publish_url, wait_until="domcontentloaded", timeout=90000
  709. )
  710. # 等待页面关键元素加载
  711. try:
  712. await self.page.wait_for_load_state("load", timeout=30000)
  713. except Exception:
  714. pass
  715. await asyncio.sleep(3)
  716. # 代理模式下多等几秒,让代理连接稳定后再上传
  717. if using_proxy:
  718. print(
  719. f"[{self.platform_name}] 代理模式:等待 8 秒后开始上传...", flush=True
  720. )
  721. await asyncio.sleep(8)
  722. # 检查是否跳转到登录页
  723. current_url = self.page.url
  724. print(f"[{self.platform_name}] 当前页面: {current_url}")
  725. if "login" in current_url:
  726. screenshot_base64 = await self.capture_screenshot()
  727. return PublishResult(
  728. success=False,
  729. platform=self.platform_name,
  730. error="Cookie 已过期,需要重新登录",
  731. need_captcha=True,
  732. captcha_type="login",
  733. screenshot_base64=screenshot_base64,
  734. page_url=current_url,
  735. status="need_captcha",
  736. )
  737. # 使用 AI 检查验证码
  738. ai_captcha = await self.ai_check_captcha()
  739. if ai_captcha["has_captcha"]:
  740. print(
  741. f"[{self.platform_name}] AI检测到验证码: {ai_captcha['captcha_type']}",
  742. flush=True,
  743. )
  744. screenshot_base64 = await self.capture_screenshot()
  745. return PublishResult(
  746. success=False,
  747. platform=self.platform_name,
  748. error=f"检测到{ai_captcha['captcha_type']}验证码,需要使用有头浏览器完成验证",
  749. need_captcha=True,
  750. captcha_type=ai_captcha["captcha_type"],
  751. screenshot_base64=screenshot_base64,
  752. page_url=current_url,
  753. status="need_captcha",
  754. )
  755. # 传统方式检查验证码
  756. captcha_result = await self.check_captcha()
  757. if captcha_result["need_captcha"]:
  758. screenshot_base64 = await self.capture_screenshot()
  759. return PublishResult(
  760. success=False,
  761. platform=self.platform_name,
  762. error=f"需要{captcha_result['captcha_type']}验证码,请使用有头浏览器完成验证",
  763. need_captcha=True,
  764. captcha_type=captcha_result["captcha_type"],
  765. screenshot_base64=screenshot_base64,
  766. page_url=current_url,
  767. status="need_captcha",
  768. )
  769. self.report_progress(15, "正在选择视频文件...")
  770. # 上传视频
  771. # 说明:视频号发布页在不同账号/地区/灰度下 DOM 结构差异较大,且上传组件可能在 iframe 中。
  772. # 因此这里按 matrix 的思路“点击触发 file chooser”,同时增加“遍历全部 frame + 精确挑选 video input”的兜底。
  773. upload_success = False
  774. if not self.page:
  775. raise Exception("Page not initialized")
  776. # 等待页面把上传区域渲染出来(避免过早判断)
  777. try:
  778. await self.page.wait_for_selector(
  779. "div.upload-content, input[type='file'], iframe", timeout=20000
  780. )
  781. except Exception:
  782. pass
  783. async def _try_set_files_in_frame(frame, frame_name: str) -> bool:
  784. """在指定 frame 中尝试触发上传"""
  785. nonlocal upload_success
  786. if upload_success:
  787. return True
  788. # 方法0:如果用户通过环境变量显式配置了选择器,优先尝试这个
  789. if WEIXIN_UPLOAD_SELECTOR:
  790. try:
  791. el = frame.locator(WEIXIN_UPLOAD_SELECTOR).first
  792. if await el.count() > 0 and await el.is_visible():
  793. print(
  794. f"[{self.platform_name}] [{frame_name}] 使用环境变量 WEIXIN_UPLOAD_SELECTOR: {WEIXIN_UPLOAD_SELECTOR}"
  795. )
  796. try:
  797. async with self.page.expect_file_chooser(
  798. timeout=5000
  799. ) as fc_info:
  800. await el.click()
  801. chooser = await fc_info.value
  802. await chooser.set_files(params.video_path)
  803. upload_success = True
  804. print(
  805. f"[{self.platform_name}] [{frame_name}] 通过环境变量选择器上传成功"
  806. )
  807. return True
  808. except Exception as e:
  809. print(
  810. f"[{self.platform_name}] [{frame_name}] 环境变量选择器点击失败,尝试直接 set_input_files: {e}"
  811. )
  812. try:
  813. await el.set_input_files(params.video_path)
  814. upload_success = True
  815. print(
  816. f"[{self.platform_name}] [{frame_name}] 环境变量选择器 set_input_files 成功"
  817. )
  818. return True
  819. except Exception as e2:
  820. print(
  821. f"[{self.platform_name}] [{frame_name}] 环境变量选择器 set_input_files 仍失败: {e2}"
  822. )
  823. except Exception as e:
  824. print(
  825. f"[{self.platform_name}] [{frame_name}] 使用环境变量选择器定位元素失败: {e}"
  826. )
  827. # 先尝试点击上传区域触发 chooser(最贴近 matrix)
  828. click_selectors = [
  829. "div.upload-content",
  830. "div[class*='upload-content']",
  831. "div[class*='upload']",
  832. "div.add-wrap",
  833. "[class*='uploader']",
  834. "text=点击上传",
  835. "text=上传视频",
  836. "text=选择视频",
  837. ]
  838. for selector in click_selectors:
  839. try:
  840. el = frame.locator(selector).first
  841. if await el.count() > 0 and await el.is_visible():
  842. print(
  843. f"[{self.platform_name}] [{frame_name}] 找到可点击上传区域: {selector}"
  844. )
  845. try:
  846. async with self.page.expect_file_chooser(
  847. timeout=5000
  848. ) as fc_info:
  849. await el.click()
  850. chooser = await fc_info.value
  851. await chooser.set_files(params.video_path)
  852. upload_success = True
  853. print(
  854. f"[{self.platform_name}] [{frame_name}] 通过 file chooser 上传成功"
  855. )
  856. return True
  857. except Exception as e:
  858. print(
  859. f"[{self.platform_name}] [{frame_name}] 点击触发 chooser 失败: {e}"
  860. )
  861. except Exception:
  862. pass
  863. # 再尝试直接设置 input[type=file](iframe/隐藏 input 常见)
  864. try:
  865. inputs = frame.locator("input[type='file']")
  866. cnt = await inputs.count()
  867. if cnt > 0:
  868. best_idx = 0
  869. best_score = -1
  870. for i in range(cnt):
  871. try:
  872. inp = inputs.nth(i)
  873. accept = (await inp.get_attribute("accept")) or ""
  874. multiple = (await inp.get_attribute("multiple")) or ""
  875. score = 0
  876. if "video" in accept:
  877. score += 10
  878. if "mp4" in accept:
  879. score += 3
  880. if multiple:
  881. score += 1
  882. if score > best_score:
  883. best_score = score
  884. best_idx = i
  885. except Exception:
  886. continue
  887. target = inputs.nth(best_idx)
  888. print(
  889. f"[{self.platform_name}] [{frame_name}] 尝试对 input[{best_idx}] set_input_files (score={best_score})"
  890. )
  891. await target.set_input_files(params.video_path)
  892. upload_success = True
  893. print(
  894. f"[{self.platform_name}] [{frame_name}] 通过 file input 上传成功"
  895. )
  896. return True
  897. except Exception as e:
  898. print(f"[{self.platform_name}] [{frame_name}] file input 上传失败: {e}")
  899. # 不直接返回,让后面的 AI 兜底有机会执行
  900. # 方法4: 兜底使用 AI 分析 HTML,猜测上传入口
  901. try:
  902. frame_url = getattr(frame, "url", "")
  903. html_full = await frame.content()
  904. html_for_ai = await self._extract_relevant_html_snippets(html_full)
  905. print(
  906. f"[{self.platform_name}] [{frame_name}] frame_url={frame_url}, html_len={len(html_full)}, html_for_ai_len={len(html_for_ai)}"
  907. )
  908. ai_selector = await self.ai_find_upload_selector(
  909. html_for_ai, frame_name=frame_name
  910. )
  911. if ai_selector:
  912. try:
  913. el = frame.locator(ai_selector).first
  914. if await el.count() > 0:
  915. print(
  916. f"[{self.platform_name}] [{frame_name}] 使用 AI 选择器点击上传入口: {ai_selector}"
  917. )
  918. try:
  919. async with self.page.expect_file_chooser(
  920. timeout=5000
  921. ) as fc_info:
  922. await el.click()
  923. chooser = await fc_info.value
  924. await chooser.set_files(params.video_path)
  925. upload_success = True
  926. print(
  927. f"[{self.platform_name}] [{frame_name}] 通过 AI 选择器上传成功"
  928. )
  929. return True
  930. except Exception as e:
  931. print(
  932. f"[{self.platform_name}] [{frame_name}] AI 选择器点击失败,改为直接 set_input_files: {e}"
  933. )
  934. try:
  935. await el.set_input_files(params.video_path)
  936. upload_success = True
  937. print(
  938. f"[{self.platform_name}] [{frame_name}] AI 选择器直接 set_input_files 成功"
  939. )
  940. return True
  941. except Exception as e2:
  942. print(
  943. f"[{self.platform_name}] [{frame_name}] AI 选择器 set_input_files 仍失败: {e2}"
  944. )
  945. except Exception as e:
  946. print(
  947. f"[{self.platform_name}] [{frame_name}] 使用 AI 选择器定位元素失败: {e}"
  948. )
  949. else:
  950. # 如果 AI 无法从 HTML 推断,退一步:构造候选元素列表交给 AI 选择
  951. try:
  952. candidates = await frame.evaluate("""
  953. () => {
  954. function cssEscape(s) {
  955. try { return CSS.escape(s); } catch (e) { return s.replace(/[^a-zA-Z0-9_-]/g, '\\\\$&'); }
  956. }
  957. function buildSelector(el) {
  958. if (!el || el.nodeType !== 1) return '';
  959. if (el.id) return `#${cssEscape(el.id)}`;
  960. let parts = [];
  961. let cur = el;
  962. for (let depth = 0; cur && cur.nodeType === 1 && depth < 5; depth++) {
  963. let part = cur.tagName.toLowerCase();
  964. const role = cur.getAttribute('role');
  965. const type = cur.getAttribute('type');
  966. if (type) part += `[type="${type}"]`;
  967. if (role) part += `[role="${role}"]`;
  968. const cls = (cur.className || '').toString().trim().split(/\\s+/).filter(Boolean);
  969. if (cls.length) part += '.' + cls.slice(0, 2).map(cssEscape).join('.');
  970. // nth-of-type
  971. let idx = 1;
  972. let sib = cur;
  973. while (sib && (sib = sib.previousElementSibling)) {
  974. if (sib.tagName === cur.tagName) idx++;
  975. }
  976. part += `:nth-of-type(${idx})`;
  977. parts.unshift(part);
  978. cur = cur.parentElement;
  979. }
  980. return parts.join(' > ');
  981. }
  982. const nodes = Array.from(document.querySelectorAll('input, button, a, div, span'))
  983. .filter(el => {
  984. const tag = el.tagName.toLowerCase();
  985. const type = (el.getAttribute('type') || '').toLowerCase();
  986. const role = (el.getAttribute('role') || '').toLowerCase();
  987. const aria = (el.getAttribute('aria-label') || '').toLowerCase();
  988. const txt = (el.innerText || '').trim().slice(0, 60);
  989. const cls = (el.className || '').toString().toLowerCase();
  990. const isFile = tag === 'input' && type === 'file';
  991. const looksClickable =
  992. tag === 'button' || tag === 'a' || role === 'button' || el.onclick ||
  993. cls.includes('upload') || cls.includes('uploader') || cls.includes('drag') ||
  994. aria.includes('上传') || aria.includes('选择') || aria.includes('添加') ||
  995. txt.includes('上传') || txt.includes('选择') || txt.includes('添加') || txt.includes('点击上传');
  996. if (!isFile && !looksClickable) return false;
  997. const r = el.getBoundingClientRect();
  998. const visible = r.width > 5 && r.height > 5;
  999. return visible;
  1000. });
  1001. const limited = nodes.slice(0, 120).map(el => ({
  1002. css: buildSelector(el),
  1003. tag: el.tagName.toLowerCase(),
  1004. type: el.getAttribute('type') || '',
  1005. role: el.getAttribute('role') || '',
  1006. ariaLabel: el.getAttribute('aria-label') || '',
  1007. text: (el.innerText || '').trim().slice(0, 80),
  1008. id: el.id || '',
  1009. className: (el.className || '').toString().slice(0, 120),
  1010. accept: el.getAttribute('accept') || '',
  1011. }));
  1012. return limited;
  1013. }
  1014. """)
  1015. ai_selector2 = await self.ai_pick_selector_from_candidates(
  1016. candidates=candidates,
  1017. goal="上传视频入口",
  1018. frame_name=frame_name,
  1019. )
  1020. if ai_selector2:
  1021. el2 = frame.locator(ai_selector2).first
  1022. if await el2.count() > 0:
  1023. print(
  1024. f"[{self.platform_name}] [{frame_name}] 使用 AI 候选选择器点击上传入口: {ai_selector2}"
  1025. )
  1026. try:
  1027. async with self.page.expect_file_chooser(
  1028. timeout=5000
  1029. ) as fc_info:
  1030. await el2.click()
  1031. chooser2 = await fc_info.value
  1032. await chooser2.set_files(params.video_path)
  1033. upload_success = True
  1034. print(
  1035. f"[{self.platform_name}] [{frame_name}] 通过 AI 候选选择器上传成功"
  1036. )
  1037. return True
  1038. except Exception as e:
  1039. print(
  1040. f"[{self.platform_name}] [{frame_name}] AI 候选选择器点击失败,尝试 set_input_files: {e}"
  1041. )
  1042. try:
  1043. await el2.set_input_files(params.video_path)
  1044. upload_success = True
  1045. print(
  1046. f"[{self.platform_name}] [{frame_name}] AI 候选选择器 set_input_files 成功"
  1047. )
  1048. return True
  1049. except Exception as e2:
  1050. print(
  1051. f"[{self.platform_name}] [{frame_name}] AI 候选选择器 set_input_files 仍失败: {e2}"
  1052. )
  1053. except Exception as e:
  1054. print(
  1055. f"[{self.platform_name}] [{frame_name}] 构造候选并交给 AI 失败: {e}"
  1056. )
  1057. except Exception as e:
  1058. print(
  1059. f"[{self.platform_name}] [{frame_name}] AI 上传入口识别整体失败: {e}"
  1060. )
  1061. return False
  1062. # 先尝试主 frame
  1063. try:
  1064. await _try_set_files_in_frame(self.page.main_frame, "main")
  1065. except Exception as e:
  1066. print(f"[{self.platform_name}] main frame 上传尝试异常: {e}")
  1067. # 再遍历所有子 frame
  1068. if not upload_success:
  1069. try:
  1070. frames = self.page.frames
  1071. print(f"[{self.platform_name}] 发现 frames: {len(frames)}")
  1072. for idx, fr in enumerate(frames):
  1073. if upload_success:
  1074. break
  1075. # main_frame 已尝试过
  1076. if fr == self.page.main_frame:
  1077. continue
  1078. name = fr.name or f"frame-{idx}"
  1079. await _try_set_files_in_frame(fr, name)
  1080. except Exception as e:
  1081. print(f"[{self.platform_name}] 遍历 frames 异常: {e}")
  1082. if not upload_success:
  1083. screenshot_base64 = await self.capture_screenshot()
  1084. return PublishResult(
  1085. success=False,
  1086. platform=self.platform_name,
  1087. error="未找到上传入口(可能在 iframe 中或页面结构已变更)",
  1088. screenshot_base64=screenshot_base64,
  1089. page_url=await self.get_page_url(),
  1090. status="failed",
  1091. )
  1092. self.report_progress(20, "正在填充标题和话题...")
  1093. # 添加标题和话题
  1094. await self.add_title_tags(params)
  1095. self.report_progress(30, "等待视频上传完成...")
  1096. # 监控网络请求,捕捉上传相关域名和状态
  1097. _upload_domains_seen = set()
  1098. def _on_request(req):
  1099. url = req.url
  1100. if any(kw in url for kw in ["upload", "cos.", "myqcloud", "finder", "video", "media"]):
  1101. from urllib.parse import urlparse
  1102. domain = urlparse(url).netloc
  1103. method = req.method
  1104. if domain not in _upload_domains_seen:
  1105. _upload_domains_seen.add(domain)
  1106. print(f"[{self.platform_name}] ⭐ 上传相关请求: {method} {domain} ({url[:120]})", flush=True)
  1107. def _on_response(resp):
  1108. url = resp.url
  1109. if any(kw in url for kw in ["upload", "cos.", "myqcloud", "finder", "video", "media"]):
  1110. from urllib.parse import urlparse
  1111. domain = urlparse(url).netloc
  1112. status = resp.status
  1113. if status >= 400 or status == 0:
  1114. print(f"[{self.platform_name}] ❌ 上传响应失败: {status} {domain} ({url[:120]})", flush=True)
  1115. else:
  1116. print(f"[{self.platform_name}] ✅ 上传响应: {status} {domain}", flush=True)
  1117. def _on_request_failed(req):
  1118. url = req.url
  1119. if any(kw in url for kw in ["upload", "cos.", "myqcloud", "finder", "video", "media"]):
  1120. from urllib.parse import urlparse
  1121. domain = urlparse(url).netloc
  1122. failure = req.failure
  1123. print(f"[{self.platform_name}] ❌ 上传请求失败: {domain} failure={failure} ({url[:120]})", flush=True)
  1124. self.page.on("request", _on_request)
  1125. self.page.on("response", _on_response)
  1126. self.page.on("requestfailed", _on_request_failed)
  1127. print(f"[{self.platform_name}] 已启用上传网络请求监控", flush=True)
  1128. # 代理模式下增加重试次数和总时长,应对「网络出错」等不稳定情况
  1129. using_proxy = isinstance(
  1130. getattr(self, "proxy_config", None), dict
  1131. ) and self.proxy_config.get("server")
  1132. max_upload_error_retries = 20 if using_proxy else 5
  1133. loop_count = 300 if using_proxy else 200 # 代理模式约 15 分钟
  1134. if using_proxy:
  1135. print(
  1136. f"[{self.platform_name}] 代理模式:上传重试上限 {max_upload_error_retries} 次,总等待约 15 分钟",
  1137. flush=True,
  1138. )
  1139. upload_completed = False
  1140. upload_error_retry_count = 0
  1141. for i in range(loop_count):
  1142. try:
  1143. # 每 30 秒打印一次进度,避免“卡住”的错觉
  1144. if i > 0 and i % 10 == 0:
  1145. elapsed_s = i * 3
  1146. print(
  1147. f"[{self.platform_name}] 仍在等待上传完成... ({elapsed_s}s)",
  1148. flush=True,
  1149. )
  1150. # 每 60 秒保存一次截图,方便排查上传卡住问题
  1151. if i % 20 == 0:
  1152. try:
  1153. ss_path = await self.save_screenshot_to_file(
  1154. filename_prefix=f"weixin_upload_waiting_{elapsed_s}s"
  1155. )
  1156. if ss_path:
  1157. print(f"[{self.platform_name}] 等待中截图已保存: {ss_path}", flush=True)
  1158. except Exception as ss_err:
  1159. print(f"[{self.platform_name}] 等待中截图失败: {ss_err}", flush=True)
  1160. # 尝试多种选择器定位“发表”按钮(页面结构可能变化)
  1161. publish_btn = None
  1162. for sel in [
  1163. 'div.form-btns button:has-text("发表")',
  1164. 'button:has-text("发表")',
  1165. 'button:has-text("立即发表")',
  1166. '[role="button"]:has-text("发表")',
  1167. ]:
  1168. try:
  1169. el = self.page.locator(sel).first
  1170. if await el.count() > 0 and await el.is_visible():
  1171. publish_btn = el
  1172. break
  1173. except Exception:
  1174. continue
  1175. if publish_btn:
  1176. btn_class = await publish_btn.get_attribute("class") or ""
  1177. if (
  1178. "weui-desktop-btn_disabled" not in btn_class
  1179. and "disabled" not in btn_class.lower()
  1180. ):
  1181. print(f"[{self.platform_name}] 视频上传完毕")
  1182. # 上传封面
  1183. self.report_progress(50, "正在上传封面...")
  1184. await self.upload_cover(params.cover_path)
  1185. upload_completed = True
  1186. break
  1187. # 检查上传错误(div.status-msg.error,含「网络出错了,请稍候上传」)
  1188. has_error = await self.page.locator("div.status-msg.error").count() > 0
  1189. has_delete_btn = (
  1190. await self.page.locator(
  1191. 'div.media-status-content div.tag-inner:has-text("删除")'
  1192. ).count()
  1193. > 0
  1194. )
  1195. if has_error and has_delete_btn:
  1196. upload_error_retry_count += 1
  1197. print(
  1198. f"[{self.platform_name}] 检测到上传错误,第 {upload_error_retry_count} 次重试",
  1199. flush=True,
  1200. )
  1201. if upload_error_retry_count >= max_upload_error_retries:
  1202. print(
  1203. f"[{self.platform_name}] 上传错误重试已达 {max_upload_error_retries} 次,放弃",
  1204. flush=True,
  1205. )
  1206. break
  1207. # 代理模式下,第 6 次失败时尝试整页刷新以重建代理连接
  1208. if using_proxy and upload_error_retry_count == 6:
  1209. print(
  1210. f"[{self.platform_name}] 代理模式:尝试整页刷新以重建连接...",
  1211. flush=True,
  1212. )
  1213. try:
  1214. await self.page.reload(
  1215. wait_until="domcontentloaded", timeout=60000
  1216. )
  1217. await asyncio.sleep(8)
  1218. await self.page.wait_for_selector(
  1219. "div.upload-content, input[type='file']", timeout=20000
  1220. )
  1221. upload_el = self.page.locator("div.upload-content").first
  1222. if (
  1223. await upload_el.count() > 0
  1224. and await upload_el.is_visible()
  1225. ):
  1226. async with self.page.expect_file_chooser(
  1227. timeout=10000
  1228. ) as fc:
  1229. await upload_el.click()
  1230. chooser = await fc.value
  1231. await chooser.set_files(params.video_path)
  1232. print(
  1233. f"[{self.platform_name}] 刷新后重新上传成功",
  1234. flush=True,
  1235. )
  1236. else:
  1237. file_input = self.page.locator(
  1238. 'input[type="file"]'
  1239. ).first
  1240. if await file_input.count() > 0:
  1241. await file_input.set_input_files(params.video_path)
  1242. await asyncio.sleep(2)
  1243. await self.add_title_tags(params)
  1244. upload_error_retry_count = 0
  1245. except Exception as e:
  1246. print(
  1247. f"[{self.platform_name}] 整页刷新重传失败: {e}",
  1248. flush=True,
  1249. )
  1250. await self.handle_upload_error(params.video_path)
  1251. else:
  1252. await self.handle_upload_error(params.video_path)
  1253. else:
  1254. upload_error_retry_count = 0 # 无错误时重置计数
  1255. await asyncio.sleep(3)
  1256. except Exception as e:
  1257. print(f"[{self.platform_name}] 等待上传时异常: {e}", flush=True)
  1258. await asyncio.sleep(3)
  1259. # 如果一直没有等到“发表”按钮可用,认为上传失败,直接返回失败结果并附带截图
  1260. if not upload_completed:
  1261. try:
  1262. screenshot_base64 = await self.capture_screenshot()
  1263. except Exception as e:
  1264. print(f"[{self.platform_name}] 截图失败: {e}", flush=True)
  1265. screenshot_base64 = ""
  1266. try:
  1267. ts = int(time.time() * 1000)
  1268. screenshot_dir = os.path.join(
  1269. os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
  1270. "screenshots",
  1271. )
  1272. os.makedirs(screenshot_dir, exist_ok=True)
  1273. err_path = os.path.join(
  1274. screenshot_dir, f"weixin_upload_timeout_{ts}.png"
  1275. )
  1276. await self.page.screenshot(path=err_path, full_page=True)
  1277. print(
  1278. f"[{self.platform_name}] 超时/失败截图已保存: {err_path}",
  1279. flush=True,
  1280. )
  1281. except Exception as e:
  1282. print(f"[{self.platform_name}] 保存失败截图到文件失败: {e}", flush=True)
  1283. page_url = await self.get_page_url()
  1284. return PublishResult(
  1285. success=False,
  1286. platform=self.platform_name,
  1287. error="视频上传失败,请查看截图",
  1288. screenshot_base64=screenshot_base64,
  1289. page_url=page_url,
  1290. status="failed",
  1291. )
  1292. self.report_progress(60, "处理视频设置...")
  1293. # 添加短标题
  1294. try:
  1295. short_title_el = (
  1296. self.page.get_by_text("短标题", exact=True)
  1297. .locator("..")
  1298. .locator("xpath=following-sibling::div")
  1299. .locator('span input[type="text"]')
  1300. )
  1301. if await short_title_el.count():
  1302. short_title = format_short_title(params.title)
  1303. await short_title_el.fill(short_title)
  1304. except:
  1305. pass
  1306. # 定时发布
  1307. if params.publish_date:
  1308. self.report_progress(70, "设置定时发布...")
  1309. await self.set_schedule_time(params.publish_date)
  1310. self.report_progress(80, "正在发布...")
  1311. # 点击发布 - 参考 matrix
  1312. for i in range(30):
  1313. try:
  1314. # 参考 matrix: div.form-btns button:has-text("发表")
  1315. publish_btn = self.page.locator('div.form-btns button:has-text("发表")')
  1316. if await publish_btn.count():
  1317. print(f"[{self.platform_name}] 点击发布按钮...")
  1318. await publish_btn.click()
  1319. # 等待跳转到作品列表页面 - 参考 matrix
  1320. await self.page.wait_for_url(
  1321. "https://channels.weixin.qq.com/platform/post/list", timeout=10000
  1322. )
  1323. self.report_progress(100, "发布成功")
  1324. print(f"[{self.platform_name}] 视频发布成功!")
  1325. screenshot_base64 = await self.capture_screenshot()
  1326. return PublishResult(
  1327. success=True,
  1328. platform=self.platform_name,
  1329. message="发布成功",
  1330. screenshot_base64=screenshot_base64,
  1331. page_url=self.page.url,
  1332. status="success",
  1333. )
  1334. except Exception as e:
  1335. current_url = self.page.url
  1336. if "https://channels.weixin.qq.com/platform/post/list" in current_url:
  1337. self.report_progress(100, "发布成功")
  1338. print(f"[{self.platform_name}] 视频发布成功!")
  1339. screenshot_base64 = await self.capture_screenshot()
  1340. return PublishResult(
  1341. success=True,
  1342. platform=self.platform_name,
  1343. message="发布成功",
  1344. screenshot_base64=screenshot_base64,
  1345. page_url=current_url,
  1346. status="success",
  1347. )
  1348. else:
  1349. print(
  1350. f"[{self.platform_name}] 视频正在发布中... {i + 1}/30, URL: {current_url}"
  1351. )
  1352. await asyncio.sleep(1)
  1353. # 发布超时
  1354. screenshot_base64 = await self.capture_screenshot()
  1355. page_url = await self.get_page_url()
  1356. return PublishResult(
  1357. success=False,
  1358. platform=self.platform_name,
  1359. error="发布超时,请检查发布状态",
  1360. screenshot_base64=screenshot_base64,
  1361. page_url=page_url,
  1362. status="need_action",
  1363. )
  1364. async def _get_works_fallback_dom(self, page_size: int) -> tuple:
  1365. """API 失败时从当前页面 DOM 抓取作品列表(兼容新账号/不同入口)"""
  1366. works: List[WorkItem] = []
  1367. total = 0
  1368. has_more = False
  1369. try:
  1370. for selector in [
  1371. "div.post-feed-item",
  1372. "[class*='post-feed']",
  1373. "[class*='feed-item']",
  1374. "div[class*='post']",
  1375. ]:
  1376. try:
  1377. await self.page.wait_for_selector(selector, timeout=8000)
  1378. break
  1379. except Exception:
  1380. continue
  1381. post_items = self.page.locator("div.post-feed-item")
  1382. item_count = await post_items.count()
  1383. if item_count == 0:
  1384. post_items = self.page.locator("[class*='post-feed']")
  1385. item_count = await post_items.count()
  1386. for i in range(min(item_count, page_size)):
  1387. try:
  1388. item = post_items.nth(i)
  1389. cover_el = item.locator("div.media img.thumb").first
  1390. cover_url = (
  1391. await cover_el.get_attribute("src") or ""
  1392. if await cover_el.count() > 0
  1393. else ""
  1394. )
  1395. if not cover_url:
  1396. cover_el = item.locator("img").first
  1397. cover_url = (
  1398. await cover_el.get_attribute("src") or ""
  1399. if await cover_el.count() > 0
  1400. else ""
  1401. )
  1402. title_el = item.locator("div.post-title").first
  1403. title = (
  1404. (await title_el.text_content() or "").strip()
  1405. if await title_el.count() > 0
  1406. else ""
  1407. )
  1408. time_el = item.locator("div.post-time span").first
  1409. publish_time = (
  1410. (await time_el.text_content() or "").strip()
  1411. if await time_el.count() > 0
  1412. else ""
  1413. )
  1414. play_count = like_count = comment_count = share_count = (
  1415. collect_count
  1416. ) = 0
  1417. data_items = item.locator("div.post-data div.data-item")
  1418. for j in range(await data_items.count()):
  1419. data_item = data_items.nth(j)
  1420. count_text = (
  1421. await data_item.locator("span.count").text_content() or "0"
  1422. ).strip()
  1423. if (
  1424. await data_item.locator(
  1425. "span.weui-icon-outlined-eyes-on"
  1426. ).count()
  1427. > 0
  1428. ):
  1429. play_count = self._parse_count(count_text)
  1430. elif (
  1431. await data_item.locator(
  1432. "span.weui-icon-outlined-like"
  1433. ).count()
  1434. > 0
  1435. ):
  1436. like_count = self._parse_count(count_text)
  1437. elif (
  1438. await data_item.locator(
  1439. "span.weui-icon-outlined-comment"
  1440. ).count()
  1441. > 0
  1442. ):
  1443. comment_count = self._parse_count(count_text)
  1444. elif (
  1445. await data_item.locator(
  1446. "use[xlink\\:href='#icon-share']"
  1447. ).count()
  1448. > 0
  1449. ):
  1450. share_count = self._parse_count(count_text)
  1451. elif (
  1452. await data_item.locator(
  1453. "use[xlink\\:href='#icon-thumb']"
  1454. ).count()
  1455. > 0
  1456. ):
  1457. collect_count = self._parse_count(count_text)
  1458. work_id = f"weixin_{i}_{hash(title)}_{hash(publish_time)}"
  1459. works.append(
  1460. WorkItem(
  1461. work_id=work_id,
  1462. title=title or "无标题",
  1463. cover_url=cover_url,
  1464. duration=0,
  1465. status="published",
  1466. publish_time=publish_time,
  1467. play_count=play_count,
  1468. like_count=like_count,
  1469. comment_count=comment_count,
  1470. share_count=share_count,
  1471. collect_count=collect_count,
  1472. )
  1473. )
  1474. except Exception as e:
  1475. print(
  1476. f"[{self.platform_name}] DOM 解析作品 {i} 失败: {e}", flush=True
  1477. )
  1478. continue
  1479. total = len(works)
  1480. has_more = item_count > page_size
  1481. print(f"[{self.platform_name}] DOM 回退获取 {len(works)} 条", flush=True)
  1482. except Exception as e:
  1483. print(f"[{self.platform_name}] DOM 回退失败: {e}", flush=True)
  1484. return (works, total, has_more, "")
  1485. async def get_works(
  1486. self, cookies: str, page: int = 0, page_size: int = 20
  1487. ) -> WorksResult:
  1488. """获取视频号作品列表(调用 post_list 接口)
  1489. page: 页码从 0 开始,或上一页返回的 rawKeyBuff/lastBuff 字符串
  1490. """
  1491. # 分页:首页 currentPage=1/rawKeyBuff=null,下一页用 currentPage 递增或 rawKeyBuff
  1492. if page is None or page == "" or page == "0" or (isinstance(page, int) and page == 0):
  1493. current_page = 1
  1494. raw_key_buff = None
  1495. elif isinstance(page, int):
  1496. current_page = page + 1
  1497. raw_key_buff = None
  1498. else:
  1499. current_page = 1
  1500. raw_key_buff = str(page)
  1501. ts_ms = str(int(time.time() * 1000))
  1502. print(f"\n{'=' * 60}")
  1503. print(
  1504. f"[{self.platform_name}] 获取作品列表 currentPage={current_page}, pageSize={page_size}, rawKeyBuff={raw_key_buff[:40] if raw_key_buff else 'null'}..."
  1505. )
  1506. print(f"{'=' * 60}")
  1507. works: List[WorkItem] = []
  1508. total = 0
  1509. has_more = False
  1510. next_page = ""
  1511. try:
  1512. await self.init_browser()
  1513. cookie_list = self.parse_cookies(cookies)
  1514. await self.set_cookies(cookie_list)
  1515. if not self.page:
  1516. raise Exception("Page not initialized")
  1517. await self.page.goto(
  1518. "https://channels.weixin.qq.com/platform/post/list", timeout=30000
  1519. )
  1520. await asyncio.sleep(3)
  1521. current_url = self.page.url
  1522. if "login" in current_url:
  1523. raise Exception("Cookie 已过期,请重新登录")
  1524. api_url = "https://channels.weixin.qq.com/micro/content/cgi-bin/mmfinderassistant-bin/post/post_list"
  1525. req_body = {
  1526. "pageSize": page_size,
  1527. "currentPage": current_page,
  1528. "userpageType": 11,
  1529. "stickyOrder": True,
  1530. "timestamp": ts_ms,
  1531. "_log_finder_uin": "",
  1532. "_log_finder_id": "",
  1533. "rawKeyBuff": raw_key_buff,
  1534. "pluginSessionId": None,
  1535. "scene": 7,
  1536. "reqScene": 7,
  1537. }
  1538. body_str = json.dumps(req_body)
  1539. response = await self.page.evaluate(
  1540. """
  1541. async ([url, bodyStr]) => {
  1542. try {
  1543. const resp = await fetch(url, {
  1544. method: 'POST',
  1545. credentials: 'include',
  1546. headers: {
  1547. 'Content-Type': 'application/json',
  1548. 'Accept': '*/*',
  1549. 'Referer': 'https://channels.weixin.qq.com/platform/post/list'
  1550. },
  1551. body: bodyStr
  1552. });
  1553. return await resp.json();
  1554. } catch (e) {
  1555. return { error: e.toString() };
  1556. }
  1557. }
  1558. """,
  1559. [api_url, body_str],
  1560. )
  1561. is_first_page = current_page == 1 and raw_key_buff is None
  1562. if response.get("error"):
  1563. print(
  1564. f"[{self.platform_name}] API 请求失败: {response.get('error')}",
  1565. flush=True,
  1566. )
  1567. if is_first_page:
  1568. (
  1569. works,
  1570. total,
  1571. has_more,
  1572. next_page,
  1573. ) = await self._get_works_fallback_dom(page_size)
  1574. if works:
  1575. return WorksResult(
  1576. success=True,
  1577. platform=self.platform_name,
  1578. works=works,
  1579. total=total,
  1580. has_more=has_more,
  1581. next_page=next_page,
  1582. )
  1583. return WorksResult(
  1584. success=False,
  1585. platform=self.platform_name,
  1586. error=response.get("error", "API 请求失败"),
  1587. )
  1588. err_code = response.get("errCode", -1)
  1589. if err_code != 0:
  1590. err_msg = response.get("errMsg", "unknown")
  1591. print(
  1592. f"[{self.platform_name}] API errCode={err_code}, errMsg={err_msg}, 完整响应(前800字): {json.dumps(response, ensure_ascii=False)[:800]}",
  1593. flush=True,
  1594. )
  1595. if is_first_page:
  1596. (
  1597. works,
  1598. total,
  1599. has_more,
  1600. next_page,
  1601. ) = await self._get_works_fallback_dom(page_size)
  1602. if works:
  1603. return WorksResult(
  1604. success=True,
  1605. platform=self.platform_name,
  1606. works=works,
  1607. total=total,
  1608. has_more=has_more,
  1609. next_page=next_page,
  1610. )
  1611. return WorksResult(
  1612. success=False,
  1613. platform=self.platform_name,
  1614. error=f"errCode={err_code}, errMsg={err_msg}",
  1615. )
  1616. data = response.get("data") or {}
  1617. raw_list = data.get("list") or []
  1618. total = int(data.get("totalCount") or 0)
  1619. has_more = bool(data.get("continueFlag", False))
  1620. next_page = (data.get("lastBuff") or "").strip()
  1621. print(
  1622. f"[{self.platform_name}] API 响应: list_len={len(raw_list)}, totalCount={total}, continueFlag={has_more}, lastBuff={next_page[:50] if next_page else ''}..."
  1623. )
  1624. if is_first_page and len(raw_list) == 0:
  1625. works_fb, total_fb, has_more_fb, _ = await self._get_works_fallback_dom(
  1626. page_size
  1627. )
  1628. if works_fb:
  1629. return WorksResult(
  1630. success=True,
  1631. platform=self.platform_name,
  1632. works=works_fb,
  1633. total=total_fb,
  1634. has_more=has_more_fb,
  1635. next_page="",
  1636. )
  1637. for item in raw_list:
  1638. try:
  1639. # 存 works.platform_video_id 统一用 post_list 接口回参中的 exportId(如 export/xxx)
  1640. work_id = str(
  1641. item.get("exportId")
  1642. or item.get("objectId")
  1643. or item.get("id")
  1644. or ""
  1645. ).strip()
  1646. if not work_id:
  1647. work_id = f"weixin_{hash(item.get('createTime', 0))}_{hash(item.get('desc', {}).get('description', ''))}"
  1648. desc = item.get("desc") or {}
  1649. title = (desc.get("description") or "").strip() or "无标题"
  1650. cover_url = ""
  1651. duration = 0
  1652. media_list = desc.get("media") or []
  1653. if media_list and isinstance(media_list[0], dict):
  1654. m = media_list[0]
  1655. cover_url = (
  1656. m.get("coverUrl") or m.get("thumbUrl") or ""
  1657. ).strip()
  1658. duration = int(m.get("videoPlayLen") or 0)
  1659. create_ts = item.get("createTime") or 0
  1660. if isinstance(create_ts, (int, float)) and create_ts:
  1661. publish_time = datetime.fromtimestamp(create_ts).strftime(
  1662. "%Y-%m-%d %H:%M:%S"
  1663. )
  1664. else:
  1665. publish_time = str(create_ts) if create_ts else ""
  1666. # likeCount=推荐, favCount=点赞
  1667. read_count = int(item.get("readCount") or 0)
  1668. like_count = int(item.get("favCount") or 0)
  1669. comment_count = int(item.get("commentCount") or 0)
  1670. forward_count = int(item.get("forwardCount") or 0)
  1671. works.append(
  1672. WorkItem(
  1673. work_id=work_id,
  1674. title=title,
  1675. cover_url=cover_url,
  1676. duration=duration,
  1677. status="published",
  1678. publish_time=publish_time,
  1679. play_count=read_count,
  1680. like_count=like_count,
  1681. comment_count=comment_count,
  1682. share_count=forward_count,
  1683. collect_count=0,
  1684. )
  1685. )
  1686. except Exception as e:
  1687. print(f"[{self.platform_name}] 解析作品项失败: {e}", flush=True)
  1688. continue
  1689. if total == 0 and works:
  1690. total = len(works)
  1691. print(
  1692. f"[{self.platform_name}] 本页获取 {len(works)} 条,totalCount={total}, next_page={bool(next_page)}"
  1693. )
  1694. except Exception as e:
  1695. import traceback
  1696. traceback.print_exc()
  1697. return WorksResult(success=False, platform=self.platform_name, error=str(e))
  1698. return WorksResult(
  1699. success=True,
  1700. platform=self.platform_name,
  1701. works=works,
  1702. total=total,
  1703. has_more=has_more,
  1704. next_page=next_page,
  1705. )
  1706. async def sync_work_daily_stats_via_browser(
  1707. self, cookies: str, work_id: int, platform_video_id: str
  1708. ) -> dict:
  1709. """
  1710. 通过浏览器自动化同步单个作品的每日数据到 work_day_statistics。
  1711. 流程:
  1712. 1. 打开 statistic/post 页,点击单篇视频 tab,点击近30天
  1713. 2. 监听 post_list 接口,根据 exportId 匹配 platform_video_id 得到 objectId
  1714. 3. 找到 data-row-key=objectId 的行,点击「查看」
  1715. 4. 进入详情页,点击数据详情的近30天,点击下载表格
  1716. 5. 解析 CSV 并返回 statistics 列表(供 Node 保存)
  1717. """
  1718. import csv
  1719. import tempfile
  1720. from pathlib import Path
  1721. result = {
  1722. "success": False,
  1723. "error": "",
  1724. "statistics": [],
  1725. "inserted": 0,
  1726. "updated": 0,
  1727. }
  1728. post_list_data = {"list": []}
  1729. async def handle_response(response):
  1730. try:
  1731. if (
  1732. "statistic/post_list" in response.url
  1733. and response.request.method == "POST"
  1734. ):
  1735. try:
  1736. body = await response.json()
  1737. if body.get("errCode") == 0 and body.get("data"):
  1738. post_list_data["list"] = body.get("data", {}).get(
  1739. "list", []
  1740. )
  1741. except Exception:
  1742. pass
  1743. except Exception:
  1744. pass
  1745. try:
  1746. await self.init_browser()
  1747. cookie_list = self.parse_cookies(cookies)
  1748. await self.set_cookies(cookie_list)
  1749. if not self.page:
  1750. raise Exception("Page not initialized")
  1751. self.page.on("response", handle_response)
  1752. # 1. 打开数据分析-作品数据页
  1753. print(f"[{self.platform_name}] 打开数据分析页...", flush=True)
  1754. await self.page.goto(
  1755. "https://channels.weixin.qq.com/platform/statistic/post", timeout=30000
  1756. )
  1757. if not self.headless:
  1758. print(
  1759. f"[{self.platform_name}] 浏览器已打开,请将窗口置于前台观看操作(等待 5 秒)...",
  1760. flush=True,
  1761. )
  1762. await asyncio.sleep(5)
  1763. else:
  1764. await asyncio.sleep(3)
  1765. if "login" in self.page.url:
  1766. raise Exception("Cookie 已过期,请重新登录")
  1767. # 2. 点击「单篇视频」tab
  1768. tab_sel = "div.weui-desktop-tab__navs ul li:nth-child(2) a"
  1769. try:
  1770. await self.page.wait_for_selector(tab_sel, timeout=8000)
  1771. await self.page.click(tab_sel)
  1772. except Exception:
  1773. tab_sel = "a:has-text('单篇视频')"
  1774. await self.page.click(tab_sel)
  1775. await asyncio.sleep(2)
  1776. # 3. 点击「近30天」(单篇视频页的日期范围筛选)
  1777. # 选择器优先级:精确匹配单篇视频区域内的日期范围 radio 组
  1778. radio_selectors = [
  1779. "div.post-single-wrap div.weui-desktop-radio-group.radio-group label:has-text('近30天')",
  1780. "div.post-single-wrap div.filter-wrap div.weui-desktop-radio-group label:nth-child(2)",
  1781. "div.post-single-wrap div.card-body div.filter-wrap div:nth-child(2) label:nth-child(2)",
  1782. "div.post-single-wrap label:has-text('近30天')",
  1783. "div.weui-desktop-radio-group label:has-text('近30天')",
  1784. "label:has-text('近30天')",
  1785. ]
  1786. clicked = False
  1787. for sel in radio_selectors:
  1788. try:
  1789. el = self.page.locator(sel).first
  1790. if await el.count() > 0:
  1791. await el.click()
  1792. clicked = True
  1793. print(
  1794. f"[{self.platform_name}] 已点击近30天按钮 (selector: {sel[:50]}...)",
  1795. flush=True,
  1796. )
  1797. break
  1798. except Exception as e:
  1799. continue
  1800. if not clicked:
  1801. print(
  1802. f"[{self.platform_name}] 警告: 未找到近30天按钮,继续尝试...",
  1803. flush=True,
  1804. )
  1805. await asyncio.sleep(3)
  1806. # 4. 从 post_list 响应中找 exportId -> objectId
  1807. export_id_to_object = {}
  1808. for item in post_list_data["list"]:
  1809. eid = (item.get("exportId") or "").strip()
  1810. oid = (item.get("objectId") or "").strip()
  1811. if eid and oid:
  1812. export_id_to_object[eid] = oid
  1813. object_id = export_id_to_object.get(
  1814. platform_video_id
  1815. ) or export_id_to_object.get(platform_video_id.strip())
  1816. if not object_id:
  1817. # 尝试宽松匹配(platform_video_id 可能带前缀)
  1818. for eid, oid in export_id_to_object.items():
  1819. if platform_video_id in eid or eid in platform_video_id:
  1820. object_id = oid
  1821. break
  1822. if not object_id:
  1823. result["error"] = (
  1824. f"未在 post_list 中匹配到 exportId={platform_video_id}"
  1825. )
  1826. print(f"[{self.platform_name}] {result['error']}", flush=True)
  1827. return result
  1828. # 5. 找到 data-row-key=objectId 的行,点击「查看」
  1829. view_btn = self.page.locator(
  1830. f'tr[data-row-key="{object_id}"] a.detail-wrap, tr[data-row-key="{object_id}"] a:has-text("查看")'
  1831. )
  1832. try:
  1833. await view_btn.first.wait_for(timeout=5000)
  1834. await view_btn.first.click()
  1835. except Exception as e:
  1836. view_btn = self.page.locator(f'tr[data-row-key="{object_id}"] a')
  1837. if await view_btn.count() > 0:
  1838. await view_btn.first.click()
  1839. else:
  1840. raise Exception(f"未找到 objectId={object_id} 的查看按钮: {e}")
  1841. await asyncio.sleep(3)
  1842. # 6. 详情页:点击数据详情的「近30天」,再点击「下载表格」
  1843. detail_radio = (
  1844. "div.post-statistic-common div.filter-wrap label:nth-child(2)"
  1845. )
  1846. for sel in [detail_radio, "div.main-body label:has-text('近30天')"]:
  1847. try:
  1848. el = self.page.locator(sel).first
  1849. if await el.count() > 0:
  1850. await el.click()
  1851. break
  1852. except Exception:
  1853. continue
  1854. await asyncio.sleep(2)
  1855. # 保存到 server/tmp 目录
  1856. download_dir = Path(__file__).resolve().parent.parent.parent / "tmp"
  1857. download_dir.mkdir(parents=True, exist_ok=True)
  1858. async with self.page.expect_download(timeout=15000) as download_info:
  1859. download_btn = self.page.locator(
  1860. "div.post-statistic-common div.filter-extra a, a:has-text('下载表格')"
  1861. )
  1862. if await download_btn.count() == 0:
  1863. raise Exception("未找到「下载表格」按钮")
  1864. await download_btn.first.click()
  1865. download = await download_info.value
  1866. save_path = download_dir / f"work_{work_id}_{int(time.time())}.csv"
  1867. await download.save_as(save_path)
  1868. # 7. 解析 CSV -> statistics
  1869. stats_list = []
  1870. with open(save_path, "r", encoding="utf-8-sig", errors="replace") as f:
  1871. reader = csv.DictReader(f)
  1872. rows = list(reader)
  1873. for row in rows:
  1874. date_val = (
  1875. row.get("日期")
  1876. or row.get("date")
  1877. or row.get("时间")
  1878. or row.get("时间周期", "")
  1879. ).strip()
  1880. if not date_val:
  1881. continue
  1882. dt = None
  1883. norm = (
  1884. date_val[:10]
  1885. .replace("年", "-")
  1886. .replace("月", "-")
  1887. .replace("日", "-")
  1888. .replace("/", "-")
  1889. )
  1890. if len(norm) >= 8 and norm.count("-") >= 2:
  1891. parts = norm.split("-")
  1892. if len(parts) == 3:
  1893. try:
  1894. y, m, d = int(parts[0]), int(parts[1]), int(parts[2])
  1895. if 2000 <= y <= 2100 and 1 <= m <= 12 and 1 <= d <= 31:
  1896. dt = datetime(y, m, d)
  1897. except (ValueError, IndexError):
  1898. pass
  1899. if not dt:
  1900. for fmt in ["%Y-%m-%d", "%Y/%m/%d", "%m/%d/%Y"]:
  1901. try:
  1902. dt = datetime.strptime(
  1903. (date_val.split()[0] if date_val else "")[:10], fmt
  1904. )
  1905. break
  1906. except (ValueError, IndexError):
  1907. dt = None
  1908. if not dt:
  1909. continue
  1910. rec_date = dt.strftime("%Y-%m-%d")
  1911. play = self._parse_count(
  1912. row.get("播放", "")
  1913. or row.get("播放量", "")
  1914. or row.get("play_count", "0")
  1915. )
  1916. like = self._parse_count(
  1917. row.get("点赞", "") or row.get("like_count", "0")
  1918. )
  1919. comment = self._parse_count(
  1920. row.get("评论", "") or row.get("comment_count", "0")
  1921. )
  1922. share = self._parse_count(
  1923. row.get("分享", "") or row.get("share_count", "0")
  1924. )
  1925. collect = self._parse_count(
  1926. row.get("收藏", "") or row.get("collect_count", "0")
  1927. )
  1928. comp_rate = (
  1929. row.get("完播率", "") or row.get("completion_rate", "0")
  1930. ).strip().rstrip("%") or "0"
  1931. avg_dur = (
  1932. row.get("平均播放时长", "") or row.get("avg_watch_duration", "0")
  1933. ).strip()
  1934. stats_list.append(
  1935. {
  1936. "work_id": work_id,
  1937. "record_date": rec_date,
  1938. "play_count": play,
  1939. "like_count": like,
  1940. "comment_count": comment,
  1941. "share_count": share,
  1942. "collect_count": collect,
  1943. "completion_rate": comp_rate,
  1944. "avg_watch_duration": avg_dur,
  1945. }
  1946. )
  1947. result["statistics"] = stats_list
  1948. result["success"] = True
  1949. try:
  1950. os.remove(save_path)
  1951. except Exception:
  1952. pass
  1953. except Exception as e:
  1954. import traceback
  1955. traceback.print_exc()
  1956. result["error"] = str(e)
  1957. finally:
  1958. try:
  1959. await self.close_browser()
  1960. except Exception:
  1961. pass
  1962. return result
  1963. async def sync_account_works_daily_stats_via_browser(
  1964. self,
  1965. cookies: str,
  1966. works: List[dict],
  1967. save_fn=None,
  1968. update_works_fn=None,
  1969. headless: bool = True,
  1970. ) -> dict:
  1971. """
  1972. 纯浏览器批量同步账号下所有作品(在库的)的每日数据到 work_day_statistics。
  1973. 流程:
  1974. 1. 打开 statistic/post → 点击单篇视频 → 点击近30天
  1975. 2. 【首次】监听 post_list 接口 → 解析响应更新 works 表 yesterday_* 字段
  1976. 3. 监听 post_list 获取 exportId->objectId 映射
  1977. 4. 遍历 post_list 的每一条:
  1978. - 若 exportId 在 works 的 platform_video_id 中无匹配 → 跳过
  1979. - 若匹配 → 找到 data-row-key=objectId 的行,点击「查看」
  1980. - 详情页:默认近7天,直接监听 feed_aggreagate_data_by_tab_type 接口
  1981. - 从「全部」tab 解析 browse/like/comment/forward/fav/follow,日期从昨天往前推
  1982. - 通过 save_fn 存入 work_day_statistics
  1983. - 返回列表页,继续下一条
  1984. works: [{"work_id": int, "platform_video_id": str}, ...]
  1985. save_fn: (stats_list: List[dict]) -> {inserted, updated},由调用方传入,用于调用 Node batch-dates
  1986. update_works_fn: (updates: List[dict]) -> {updated},由调用方传入,用于将 post_list 解析数据更新到 works 表(仅首次调用)
  1987. """
  1988. from pathlib import Path
  1989. from datetime import timedelta
  1990. result = {
  1991. "success": True,
  1992. "error": "",
  1993. "total_processed": 0,
  1994. "total_skipped": 0,
  1995. "inserted": 0,
  1996. "updated": 0,
  1997. "works_updated": 0,
  1998. }
  1999. # platform_video_id(exportId) -> work_id
  2000. export_id_to_work = {}
  2001. for w in works:
  2002. pvid = (
  2003. w.get("platform_video_id") or w.get("platformVideoId") or ""
  2004. ).strip()
  2005. wid = w.get("work_id") or w.get("workId")
  2006. if pvid and wid is not None:
  2007. export_id_to_work[pvid] = int(wid)
  2008. # 兼容可能带/不带前缀(如 export/xxx vs xxx)
  2009. if "/" in pvid:
  2010. export_id_to_work[pvid.split("/")[-1]] = int(wid)
  2011. post_list_data = {"list": []}
  2012. feed_aggreagate_data = {"body": None}
  2013. async def handle_response(response):
  2014. try:
  2015. url = response.url
  2016. if "statistic/post_list" in url:
  2017. try:
  2018. body = await response.json()
  2019. if body.get("errCode") == 0 and body.get("data"):
  2020. post_list_data["list"] = body.get("data", {}).get(
  2021. "list", []
  2022. )
  2023. except Exception:
  2024. pass
  2025. elif "feed_aggreagate_data_by_tab_type" in url:
  2026. try:
  2027. body = await response.json()
  2028. if body.get("errCode") == 0 and body.get("data"):
  2029. feed_aggreagate_data["body"] = body
  2030. except Exception:
  2031. pass
  2032. except Exception:
  2033. pass
  2034. try:
  2035. await self.init_browser()
  2036. cookie_list = self.parse_cookies(cookies)
  2037. await self.set_cookies(cookie_list)
  2038. if not self.page:
  2039. raise Exception("Page not initialized")
  2040. self.page.on("response", handle_response)
  2041. # 1. 打开数据分析-作品数据页
  2042. print(f"[{self.platform_name}] 打开数据分析页...", flush=True)
  2043. await self.page.goto(
  2044. "https://channels.weixin.qq.com/platform/statistic/post", timeout=30000
  2045. )
  2046. if not headless:
  2047. print(
  2048. f"[{self.platform_name}] 浏览器已打开,请将窗口置于前台观看操作(等待 5 秒)...",
  2049. flush=True,
  2050. )
  2051. await asyncio.sleep(5)
  2052. else:
  2053. await asyncio.sleep(3)
  2054. if "login" in self.page.url:
  2055. raise Exception("Cookie 已过期,请重新登录")
  2056. # 2. 点击「单篇视频」tab
  2057. tab_sel = "div.weui-desktop-tab__navs ul li:nth-child(2) a"
  2058. try:
  2059. await self.page.wait_for_selector(tab_sel, timeout=8000)
  2060. await self.page.click(tab_sel)
  2061. except Exception:
  2062. tab_sel = "a:has-text('单篇视频')"
  2063. await self.page.click(tab_sel)
  2064. await asyncio.sleep(2)
  2065. # 3. 点击「近30天」前清空 list,点击后等待 handler 捕获带 fullPlayRate 的 post_list
  2066. post_list_data["list"] = []
  2067. radio_selectors = [
  2068. "div.post-single-wrap div.weui-desktop-radio-group.radio-group label:has-text('近30天')",
  2069. "div.post-single-wrap div.filter-wrap div.weui-desktop-radio-group label:nth-child(2)",
  2070. "div.post-single-wrap label:has-text('近30天')",
  2071. "div.weui-desktop-radio-group label:has-text('近30天')",
  2072. "label:has-text('近30天')",
  2073. ]
  2074. clicked = False
  2075. for sel in radio_selectors:
  2076. try:
  2077. el = self.page.locator(sel).first
  2078. if await el.count() > 0:
  2079. await el.click()
  2080. clicked = True
  2081. print(
  2082. f"[{self.platform_name}] 已点击近30天 (selector: {sel[:40]}...)",
  2083. flush=True,
  2084. )
  2085. break
  2086. except Exception:
  2087. continue
  2088. if not clicked:
  2089. print(f"[{self.platform_name}] 警告: 未找到近30天按钮", flush=True)
  2090. await asyncio.sleep(5)
  2091. # 4. 从 post_list 获取列表
  2092. items = post_list_data["list"]
  2093. if not items:
  2094. result["error"] = "未监听到 post_list 或列表为空"
  2095. print(f"[{self.platform_name}] {result['error']}", flush=True)
  2096. return result
  2097. # 4.5 【仅首次】从 post_list 接口响应解析数据 → 更新 works 表(不再下载 CSV)
  2098. # post_list 返回字段映射: readCount->播放量, likeCount->点赞, commentCount->评论, forwardCount->分享,
  2099. # fullPlayRate->完播率(0-1小数), avgPlayTimeSec->平均播放时长(秒), exportId->匹配 work_id
  2100. if update_works_fn and items:
  2101. try:
  2102. updates = []
  2103. for it in items:
  2104. eid = (it.get("exportId") or "").strip()
  2105. if not eid:
  2106. continue
  2107. work_id = export_id_to_work.get(eid)
  2108. if work_id is None:
  2109. for k, v in export_id_to_work.items():
  2110. if eid in k or k in eid:
  2111. work_id = v
  2112. break
  2113. if work_id is None:
  2114. continue
  2115. # likeCount=推荐, favCount=点赞
  2116. read_count = int(it.get("readCount") or 0)
  2117. recommend_count = int(it.get("likeCount") or 0)
  2118. like_count = int(it.get("favCount") or 0)
  2119. comment_count = int(it.get("commentCount") or 0)
  2120. forward_count = int(it.get("forwardCount") or 0)
  2121. follow_count = int(it.get("followCount") or 0)
  2122. full_play_rate = it.get("fullPlayRate")
  2123. if full_play_rate is not None:
  2124. comp_rate = f"{float(full_play_rate) * 100:.2f}%"
  2125. else:
  2126. comp_rate = "0"
  2127. avg_sec = it.get("avgPlayTimeSec")
  2128. if avg_sec is not None:
  2129. avg_dur = f"{float(avg_sec):.2f}秒"
  2130. else:
  2131. avg_dur = "0"
  2132. updates.append(
  2133. {
  2134. "work_id": work_id,
  2135. "yesterday_play_count": read_count,
  2136. "yesterday_like_count": like_count,
  2137. "yesterday_recommend_count": recommend_count,
  2138. "yesterday_comment_count": comment_count,
  2139. "yesterday_share_count": forward_count,
  2140. "yesterday_follow_count": follow_count,
  2141. "yesterday_completion_rate": comp_rate,
  2142. "yesterday_avg_watch_duration": avg_dur,
  2143. }
  2144. )
  2145. if updates:
  2146. try:
  2147. save_result = update_works_fn(updates)
  2148. result["works_updated"] = save_result.get("updated", 0)
  2149. except Exception as api_err:
  2150. import traceback
  2151. traceback.print_exc()
  2152. except Exception as e:
  2153. import traceback
  2154. traceback.print_exc()
  2155. print(
  2156. f"[{self.platform_name}] 解析 post_list 更新 works 失败: {e}",
  2157. flush=True,
  2158. )
  2159. # 辅助:点击单篇视频 + 近30天,恢复列表视图(go_back 后会回到全部视频页)
  2160. async def ensure_single_video_near30():
  2161. tab_sel = "div.weui-desktop-tab__navs ul li:nth-child(2) a"
  2162. try:
  2163. await self.page.wait_for_selector(tab_sel, timeout=8000)
  2164. await self.page.click(tab_sel)
  2165. except Exception:
  2166. await self.page.click("a:has-text('单篇视频')")
  2167. await asyncio.sleep(2)
  2168. for sel in [
  2169. "div.post-single-wrap div.weui-desktop-radio-group.radio-group label:has-text('近30天')",
  2170. "div.post-single-wrap label:has-text('近30天')",
  2171. "div.weui-desktop-radio-group label:has-text('近30天')",
  2172. "label:has-text('近30天')",
  2173. ]:
  2174. try:
  2175. el = self.page.locator(sel).first
  2176. if await el.count() > 0:
  2177. await el.click()
  2178. break
  2179. except Exception:
  2180. continue
  2181. await asyncio.sleep(3)
  2182. # 5. 遍历每一条,按 exportId 匹配作品
  2183. processed_export_ids = set()
  2184. for idx, item in enumerate(items):
  2185. eid = (item.get("exportId") or "").strip()
  2186. oid = (item.get("objectId") or "").strip()
  2187. if not oid:
  2188. continue
  2189. # 已处理过的跳过(理论上循环顺序即处理顺序,此处做双重保险)
  2190. if eid in processed_export_ids:
  2191. print(
  2192. f"[{self.platform_name}] 跳过 [{idx + 1}] exportId={eid} (已处理)",
  2193. flush=True,
  2194. )
  2195. continue
  2196. # go_back 后回到全部视频页,需重新点击单篇视频+近30天
  2197. if idx > 0:
  2198. await ensure_single_video_near30()
  2199. # 匹配 work_id
  2200. work_id = export_id_to_work.get(eid)
  2201. if work_id is None:
  2202. for k, v in export_id_to_work.items():
  2203. if eid in k or k in eid:
  2204. work_id = v
  2205. break
  2206. if work_id is None:
  2207. result["total_skipped"] += 1
  2208. print(
  2209. f"[{self.platform_name}] 跳过 [{idx + 1}] exportId={eid} (库中无对应作品)",
  2210. flush=True,
  2211. )
  2212. continue
  2213. # 点击「查看」:Ant Design 表格 tr[data-row-key] > td > div.slot-wrap > a.detail-wrap
  2214. # 操作列可能在 ant-table-fixed-right 内,优先尝试
  2215. view_selectors = [
  2216. f'div.ant-table-fixed-right tr[data-row-key="{oid}"] a.detail-wrap',
  2217. f'tr[data-row-key="{oid}"] a.detail-wrap',
  2218. f'tr[data-row-key="{oid}"] td a.detail-wrap',
  2219. f'tr[data-row-key="{oid}"] a:has-text("查看")',
  2220. f'tr[data-row-key="{oid}"] a',
  2221. ]
  2222. clicked = False
  2223. for sel in view_selectors:
  2224. view_btn = self.page.locator(sel)
  2225. if await view_btn.count() > 0:
  2226. try:
  2227. await view_btn.first.wait_for(timeout=3000)
  2228. await view_btn.first.click()
  2229. clicked = True
  2230. print(
  2231. f"[{self.platform_name}] 已点击查看 (selector: {sel[:40]}...)",
  2232. flush=True,
  2233. )
  2234. break
  2235. except Exception as e:
  2236. continue
  2237. if not clicked:
  2238. print(
  2239. f"[{self.platform_name}] 未找到 objectId={oid} 的查看按钮",
  2240. flush=True,
  2241. )
  2242. result["total_skipped"] += 1
  2243. continue
  2244. await asyncio.sleep(3)
  2245. # 详情页:默认展示近7天,页面加载时自动请求 feed_aggreagate,不清空 body 避免覆盖已监听到的响应
  2246. await asyncio.sleep(4)
  2247. # 从 feed_aggreagate 响应解析「全部」数据
  2248. # 数据结构: data.dataByFanstype[].dataByTabtype[] 中 tabTypeName="全部" 或 tabType=999
  2249. # 日期:从昨天往前推 N 天(含昨天),数组从最早到最晚排列
  2250. body = feed_aggreagate_data.get("body")
  2251. if not body or not body.get("data"):
  2252. print(
  2253. f"[{self.platform_name}] work_id={work_id} 未监听到 feed_aggreagate 有效响应",
  2254. flush=True,
  2255. )
  2256. await self.page.go_back()
  2257. await asyncio.sleep(2)
  2258. continue
  2259. tab_all = None
  2260. for fan_item in body.get("data", {}).get("dataByFanstype", []):
  2261. for tab_item in fan_item.get("dataByTabtype", []):
  2262. if (
  2263. tab_item.get("tabTypeName") == "全部"
  2264. or tab_item.get("tabType") == 999
  2265. ):
  2266. tab_all = tab_item.get("data")
  2267. break
  2268. if tab_all is not None:
  2269. break
  2270. if not tab_all:
  2271. tab_all = (
  2272. body.get("data", {}).get("feedData", [{}])[0].get("totalData")
  2273. )
  2274. if not tab_all:
  2275. print(
  2276. f"[{self.platform_name}] work_id={work_id} 未找到「全部」数据",
  2277. flush=True,
  2278. )
  2279. await self.page.go_back()
  2280. await asyncio.sleep(2)
  2281. continue
  2282. browse = tab_all.get("browse", [])
  2283. n = len(browse)
  2284. if n == 0:
  2285. print(
  2286. f"[{self.platform_name}] work_id={work_id} browse 为空",
  2287. flush=True,
  2288. )
  2289. await self.page.go_back()
  2290. await asyncio.sleep(2)
  2291. continue
  2292. # 日期:昨天往前推 n 天,index 0 = 最早日
  2293. today = datetime.now().replace(
  2294. hour=0, minute=0, second=0, microsecond=0
  2295. )
  2296. yesterday = today - timedelta(days=1)
  2297. start_date = yesterday - timedelta(days=n - 1)
  2298. # like=推荐, fav=点赞
  2299. like_arr = tab_all.get("like", [])
  2300. comment_arr = tab_all.get("comment", [])
  2301. forward_arr = tab_all.get("forward", [])
  2302. fav_arr = tab_all.get("fav", [])
  2303. follow_arr = tab_all.get("follow", [])
  2304. stats_list = []
  2305. for i in range(n):
  2306. rec_dt = start_date + timedelta(days=i)
  2307. rec_date = rec_dt.strftime("%Y-%m-%d")
  2308. play = self._parse_count(browse[i] if i < len(browse) else "0")
  2309. recommend = self._parse_count(
  2310. like_arr[i] if i < len(like_arr) else "0"
  2311. )
  2312. like = self._parse_count(fav_arr[i] if i < len(fav_arr) else "0")
  2313. comment = self._parse_count(
  2314. comment_arr[i] if i < len(comment_arr) else "0"
  2315. )
  2316. share = self._parse_count(
  2317. forward_arr[i] if i < len(forward_arr) else "0"
  2318. )
  2319. follow = self._parse_count(
  2320. follow_arr[i] if i < len(follow_arr) else "0"
  2321. )
  2322. stats_list.append(
  2323. {
  2324. "work_id": work_id,
  2325. "record_date": rec_date,
  2326. "play_count": play,
  2327. "like_count": like,
  2328. "recommend_count": recommend,
  2329. "comment_count": comment,
  2330. "share_count": share,
  2331. "collect_count": 0,
  2332. "follow_count": follow,
  2333. "completion_rate": "0",
  2334. "avg_watch_duration": "0",
  2335. }
  2336. )
  2337. print(
  2338. f"[{self.platform_name}] work_id={work_id} 从 feed_aggreagate 解析得到 {len(stats_list)} 条日统计",
  2339. flush=True,
  2340. )
  2341. # 存入 work_day_statistics(通过 save_fn 调用 Node)
  2342. if save_fn and stats_list:
  2343. try:
  2344. save_result = save_fn(stats_list)
  2345. result["inserted"] += save_result.get("inserted", 0)
  2346. result["updated"] += save_result.get("updated", 0)
  2347. except Exception as e:
  2348. print(
  2349. f"[{self.platform_name}] work_id={work_id} 保存失败: {e}",
  2350. flush=True,
  2351. )
  2352. result["total_processed"] += 1
  2353. processed_export_ids.add(eid)
  2354. # 返回列表页,继续下一条(会回到全部视频页,下次循环会重新点击单篇视频+近30天)
  2355. await self.page.go_back()
  2356. await asyncio.sleep(2)
  2357. print(
  2358. f"[{self.platform_name}] 批量同步完成: 处理 {result['total_processed']} 个作品, 跳过 {result['total_skipped']} 个",
  2359. flush=True,
  2360. )
  2361. except Exception as e:
  2362. import traceback
  2363. traceback.print_exc()
  2364. result["success"] = False
  2365. result["error"] = str(e)
  2366. finally:
  2367. try:
  2368. await self.close_browser()
  2369. except Exception:
  2370. pass
  2371. return result
  2372. async def get_comments(
  2373. self, cookies: str, work_id: str, cursor: str = ""
  2374. ) -> CommentsResult:
  2375. """
  2376. 获取视频号作品评论(完全参考 get_weixin_work_comments.py 的接口监听逻辑)
  2377. 支持递归提取二级评论,正确处理 parent_comment_id
  2378. """
  2379. print(f"\n{'=' * 60}")
  2380. print(f"[{self.platform_name}] 获取作品评论")
  2381. print(f"[{self.platform_name}] work_id={work_id}")
  2382. print(f"{'=' * 60}")
  2383. comments: List[CommentItem] = []
  2384. total = 0
  2385. has_more = False
  2386. try:
  2387. await self.init_browser()
  2388. cookie_list = self.parse_cookies(cookies)
  2389. await self.set_cookies(cookie_list)
  2390. if not self.page:
  2391. raise Exception("Page not initialized")
  2392. # 访问评论管理页面
  2393. print(f"[{self.platform_name}] 正在打开评论页面...")
  2394. await self.page.goto(
  2395. "https://channels.weixin.qq.com/platform/interaction/comment",
  2396. timeout=30000,
  2397. )
  2398. await asyncio.sleep(2)
  2399. # 检查登录状态
  2400. current_url = self.page.url
  2401. if "login" in current_url:
  2402. raise Exception("Cookie 已过期,请重新登录")
  2403. # === 步骤1: 监听 post_list 接口获取作品列表 ===
  2404. posts = []
  2405. try:
  2406. async with self.page.expect_response(
  2407. lambda res: "/post/post_list" in res.url, timeout=20000
  2408. ) as post_resp_info:
  2409. await self.page.wait_for_selector(
  2410. ".scroll-list .comment-feed-wrap", timeout=15000
  2411. )
  2412. post_resp = await post_resp_info.value
  2413. post_data = await post_resp.json()
  2414. if post_data.get("errCode") == 0:
  2415. posts = post_data.get("data", {}).get("list", [])
  2416. print(f"[{self.platform_name}] ✅ 获取 {len(posts)} 个作品")
  2417. else:
  2418. err_msg = post_data.get("errMsg", "未知错误")
  2419. print(f"[{self.platform_name}] ❌ post_list 业务错误: {err_msg}")
  2420. return CommentsResult(
  2421. success=False,
  2422. platform=self.platform_name,
  2423. work_id=work_id,
  2424. error=f"post_list 业务错误: {err_msg}",
  2425. )
  2426. except Exception as e:
  2427. print(f"[{self.platform_name}] ❌ 获取 post_list 失败: {e}")
  2428. return CommentsResult(
  2429. success=False,
  2430. platform=self.platform_name,
  2431. work_id=work_id,
  2432. error=f"获取 post_list 失败: {e}",
  2433. )
  2434. # === 步骤2: 在 DOM 中查找目标作品 ===
  2435. feed_wraps = await self.page.query_selector_all(
  2436. ".scroll-list .comment-feed-wrap"
  2437. )
  2438. target_feed = None
  2439. target_post = None
  2440. target_index = -1
  2441. for i, feed in enumerate(feed_wraps):
  2442. if i >= len(posts):
  2443. break
  2444. post = posts[i]
  2445. object_nonce = post.get("objectNonce", "")
  2446. post_work_id = post.get("objectId", "") or object_nonce
  2447. # 匹配 work_id(支持 objectId 或 objectNonce 匹配)
  2448. if (
  2449. work_id in [post_work_id, object_nonce]
  2450. or post_work_id in work_id
  2451. or object_nonce in work_id
  2452. ):
  2453. target_feed = feed
  2454. target_post = post
  2455. target_index = i
  2456. work_title = post.get("desc", {}).get("description", "无标题")
  2457. print(f"[{self.platform_name}] ✅ 找到目标作品: {work_title}")
  2458. continue
  2459. if not target_feed or not target_post:
  2460. print(f"[{self.platform_name}] ❌ 未找到 work_id={work_id} 对应的作品")
  2461. return CommentsResult(
  2462. success=True,
  2463. platform=self.platform_name,
  2464. work_id=work_id,
  2465. comments=[],
  2466. total=0,
  2467. has_more=False,
  2468. )
  2469. # 准备作品信息(用于递归函数)
  2470. object_nonce = target_post.get("objectNonce", f"nonce_{target_index}")
  2471. work_title = target_post.get("desc", {}).get(
  2472. "description", f"作品{target_index + 1}"
  2473. )
  2474. work_info = {"work_id": object_nonce, "work_title": work_title}
  2475. # === 步骤3: 点击作品触发 comment_list 接口 ===
  2476. content_wrap = (
  2477. await target_feed.query_selector(".feed-content") or target_feed
  2478. )
  2479. try:
  2480. async with self.page.expect_response(
  2481. lambda res: "/comment/comment_list" in res.url, timeout=15000
  2482. ) as comment_resp_info:
  2483. await content_wrap.click()
  2484. await asyncio.sleep(0.8)
  2485. comment_resp = await comment_resp_info.value
  2486. comment_data = await comment_resp.json()
  2487. if comment_data.get("errCode") != 0:
  2488. err_msg = comment_data.get("errMsg", "未知错误")
  2489. print(f"[{self.platform_name}] ❌ 评论接口错误: {err_msg}")
  2490. return CommentsResult(
  2491. success=False,
  2492. platform=self.platform_name,
  2493. work_id=work_id,
  2494. error=f"评论接口错误: {err_msg}",
  2495. )
  2496. raw_comments = comment_data.get("data", {}).get("comment", [])
  2497. total = comment_data.get("data", {}).get(
  2498. "totalCount", len(raw_comments)
  2499. )
  2500. print(
  2501. f"[{self.platform_name}] 📊 原始评论数: {len(raw_comments)}, 总数: {total}"
  2502. )
  2503. # === 步骤4: 递归提取所有评论(含子评论)===
  2504. extracted = self._extract_comments(
  2505. raw_comments, parent_id="", work_info=work_info
  2506. )
  2507. # === 步骤5: 转换为 CommentItem 列表(保留 weixin.py 的数据结构)===
  2508. for c in extracted:
  2509. # 使用接口返回的 comment_id
  2510. comment_id = c.get("comment_id", "")
  2511. parent_comment_id = c.get("parent_comment_id", "")
  2512. # 构建 CommentItem(保留原有数据结构用于数据库入库)
  2513. comment_item = CommentItem(
  2514. comment_id=comment_id,
  2515. parent_comment_id=parent_comment_id,
  2516. work_id=work_id,
  2517. content=c.get("content", ""),
  2518. author_id=c.get("username", ""), # 使用 username 作为 author_id
  2519. author_name=c.get("nickname", ""),
  2520. author_avatar=c.get("avatar", ""),
  2521. like_count=c.get("like_count", 0),
  2522. reply_count=0,
  2523. create_time=c.get("create_time", ""),
  2524. )
  2525. # 添加扩展字段(用于数据库存储和后续处理)
  2526. # comment_item.parent_comment_id = c.get("parent_comment_id", "")
  2527. comment_item.is_author = c.get("is_author", False)
  2528. comment_item.create_time_unix = c.get("create_time_unix", 0)
  2529. comment_item.work_title = c.get("work_title", "")
  2530. print(comment_item)
  2531. comments.append(comment_item)
  2532. # 打印日志
  2533. author_tag = " 👤(作者)" if c.get("is_author") else ""
  2534. parent_tag = (
  2535. f" [回复: {c.get('parent_comment_id', '')}]"
  2536. if c.get("parent_comment_id")
  2537. else ""
  2538. )
  2539. print(
  2540. f"[{self.platform_name}] - [{c.get('nickname', '')}] {c.get('content', '')[:30]}... "
  2541. f"({c.get('create_time', '')}){author_tag}{parent_tag}"
  2542. )
  2543. # 判断是否还有更多(优先使用接口返回的 continueFlag,否则根据数量判断)
  2544. has_more = (
  2545. comment_data.get("data", {}).get("continueFlag", False)
  2546. or len(extracted) < total
  2547. )
  2548. print(
  2549. f"[{self.platform_name}] ✅ 共提取 {len(comments)} 条评论(含子评论)"
  2550. )
  2551. except Exception as e:
  2552. print(f"[{self.platform_name}] ❌ 获取评论失败: {e}")
  2553. import traceback
  2554. traceback.print_exc()
  2555. return CommentsResult(
  2556. success=False,
  2557. platform=self.platform_name,
  2558. work_id=work_id,
  2559. error=f"获取评论失败: {e}",
  2560. )
  2561. except Exception as e:
  2562. import traceback
  2563. traceback.print_exc()
  2564. return CommentsResult(
  2565. success=False,
  2566. platform=self.platform_name,
  2567. work_id=work_id,
  2568. error=str(e),
  2569. )
  2570. return CommentsResult(
  2571. success=True,
  2572. platform=self.platform_name,
  2573. work_id=work_id,
  2574. comments=comments,
  2575. total=total,
  2576. has_more=has_more,
  2577. )
  2578. def _extract_comments(
  2579. self, comment_list: list, parent_id: str = "", work_info: dict = None
  2580. ) -> list:
  2581. """
  2582. 递归提取一级和二级评论(完全参考 get_weixin_work_comments.py 的 extract_comments 函数)
  2583. Args:
  2584. comment_list: 评论列表(原始接口数据)
  2585. parent_id: 父评论ID(一级评论为空字符串"",二级评论为父级评论ID)
  2586. work_info: 作品信息字典
  2587. Returns:
  2588. list: 扁平化的评论列表,包含一级和二级评论
  2589. """
  2590. result = []
  2591. # 获取当前用户 username(用于判断是否为作者)
  2592. # 优先从环境变量获取,也可通过其他方式配置
  2593. my_username = getattr(self, "my_username", "") or os.environ.get(
  2594. "WEIXIN_MY_USERNAME", ""
  2595. )
  2596. for cmt in comment_list:
  2597. # 处理时间戳
  2598. create_ts = int(cmt.get("commentCreatetime", 0) or 0)
  2599. readable_time = (
  2600. datetime.fromtimestamp(create_ts).strftime("%Y-%m-%d %H:%M:%S")
  2601. if create_ts > 0
  2602. else ""
  2603. )
  2604. # 判断是否作者(如果配置了 my_username)
  2605. username = cmt.get("username", "") or ""
  2606. is_author = (my_username != "") and (username == my_username)
  2607. # 构建评论条目 - 完全参考 get_weixin_work_comments.py 的字段
  2608. entry = {
  2609. "work_id": work_info.get("work_id", "") if work_info else "",
  2610. "work_title": work_info.get("work_title", "") if work_info else "",
  2611. "comment_id": cmt.get("commentId"),
  2612. "parent_comment_id": parent_id, # 关键:一级评论为空字符串"",二级评论为父评论ID
  2613. "username": username,
  2614. "nickname": cmt.get("commentNickname", ""),
  2615. "avatar": cmt.get("commentHeadurl", ""),
  2616. "content": cmt.get("commentContent", ""),
  2617. "create_time_unix": create_ts,
  2618. "create_time": readable_time,
  2619. "is_author": is_author,
  2620. "like_count": cmt.get("commentLikeCount", 0) or 0,
  2621. }
  2622. result.append(entry)
  2623. # 递归处理二级评论(levelTwoComment)
  2624. # 关键:二级评论的 parent_id 应该是当前这条评论的 comment_id
  2625. level_two = cmt.get("levelTwoComment", []) or []
  2626. if level_two and isinstance(level_two, list) and len(level_two) > 0:
  2627. # 当前评论的 ID 作为其子评论的 parent_id
  2628. current_comment_id = cmt.get("commentId", "")
  2629. result.extend(
  2630. self._extract_comments(
  2631. level_two, parent_id=current_comment_id, work_info=work_info
  2632. )
  2633. )
  2634. return result
  2635. async def auto_reply_private_messages(self, cookies: str) -> dict:
  2636. """自动回复私信 - 集成自 pw3.py"""
  2637. print(f"\n{'=' * 60}")
  2638. print(f"[{self.platform_name}] 开始自动回复私信")
  2639. print(f"{'=' * 60}")
  2640. try:
  2641. await self.init_browser()
  2642. cookie_list = self.parse_cookies(cookies)
  2643. await self.set_cookies(cookie_list)
  2644. if not self.page:
  2645. raise Exception("Page not initialized")
  2646. # 访问私信页面
  2647. await self.page.goto(
  2648. "https://channels.weixin.qq.com/platform/private_msg", timeout=30000
  2649. )
  2650. await asyncio.sleep(3)
  2651. # 检查登录状态
  2652. current_url = self.page.url
  2653. print(f"[{self.platform_name}] 当前 URL: {current_url}")
  2654. if "login" in current_url:
  2655. raise Exception("Cookie 已过期,请重新登录")
  2656. # 等待私信页面加载(使用多个选择器容错)
  2657. try:
  2658. await self.page.wait_for_selector(
  2659. ".private-msg-list-header", timeout=15000
  2660. )
  2661. except:
  2662. # 尝试其他选择器
  2663. try:
  2664. await self.page.wait_for_selector(
  2665. ".weui-desktop-tab__navs__inner", timeout=10000
  2666. )
  2667. print(f"[{self.platform_name}] 使用备用选择器加载成功")
  2668. except:
  2669. # 截图调试
  2670. screenshot_dir = os.path.join(
  2671. os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
  2672. "screenshots",
  2673. )
  2674. os.makedirs(screenshot_dir, exist_ok=True)
  2675. screenshot_path = os.path.join(
  2676. screenshot_dir,
  2677. f"weixin_private_msg_{int(asyncio.get_event_loop().time())}.png",
  2678. )
  2679. await self.page.screenshot(path=screenshot_path)
  2680. print(
  2681. f"[{self.platform_name}] 页面加载失败,截图: {screenshot_path}"
  2682. )
  2683. raise Exception(f"私信页面加载超时,当前 URL: {current_url}")
  2684. print(f"[{self.platform_name}] 私信页面加载完成")
  2685. # 处理两个 tab
  2686. total_replied = 0
  2687. for tab_name in ["打招呼消息", "私信"]:
  2688. replied_count = await self._process_tab_sessions(tab_name)
  2689. total_replied += replied_count
  2690. print(f"[{self.platform_name}] 自动回复完成,共回复 {total_replied} 条消息")
  2691. return {
  2692. "success": True,
  2693. "platform": self.platform_name,
  2694. "replied_count": total_replied,
  2695. "message": f"成功回复 {total_replied} 条私信",
  2696. }
  2697. except Exception as e:
  2698. import traceback
  2699. traceback.print_exc()
  2700. return {"success": False, "platform": self.platform_name, "error": str(e)}
  2701. async def _process_tab_sessions(self, tab_name: str) -> int:
  2702. """处理指定 tab 下的所有会话"""
  2703. print(f"\n🔄 正在处理「{tab_name}」中的所有会话...")
  2704. if not self.page:
  2705. return 0
  2706. replied_count = 0
  2707. try:
  2708. # 点击 tab
  2709. if tab_name == "私信":
  2710. tab_link = self.page.locator(
  2711. ".weui-desktop-tab__navs__inner li"
  2712. ).first.locator("a")
  2713. elif tab_name == "打招呼消息":
  2714. tab_link = (
  2715. self.page.locator(".weui-desktop-tab__navs__inner li")
  2716. .nth(1)
  2717. .locator("a")
  2718. )
  2719. else:
  2720. return 0
  2721. if await tab_link.is_visible():
  2722. await tab_link.click()
  2723. print(f" ➤ 已点击「{tab_name}」tab")
  2724. else:
  2725. print(f" ❌ 「{tab_name}」tab 不可见")
  2726. return 0
  2727. # 等待会话列表加载
  2728. try:
  2729. await self.page.wait_for_function(
  2730. """
  2731. () => {
  2732. const hasSession = document.querySelectorAll('.session-wrap').length > 0;
  2733. const hasEmpty = !!document.querySelector('.empty-text');
  2734. return hasSession || hasEmpty;
  2735. }
  2736. """,
  2737. timeout=8000,
  2738. )
  2739. print(" ✅ 会话列表区域已加载")
  2740. except:
  2741. print(" ⚠️ 等待会话列表超时,继续尝试读取...")
  2742. # 获取会话
  2743. session_wraps = self.page.locator(".session-wrap")
  2744. session_count = await session_wraps.count()
  2745. print(f" 💬 共找到 {session_count} 个会话")
  2746. if session_count == 0:
  2747. return 0
  2748. # 遍历每个会话
  2749. for idx in range(session_count):
  2750. try:
  2751. current_sessions = self.page.locator(".session-wrap")
  2752. if idx >= await current_sessions.count():
  2753. break
  2754. session = current_sessions.nth(idx)
  2755. user_name = await session.locator(".name").inner_text()
  2756. last_preview = await session.locator(".feed-info").inner_text()
  2757. print(
  2758. f"\n ➤ [{idx + 1}/{session_count}] 正在处理: {user_name} | 最后消息: {last_preview}"
  2759. )
  2760. await session.click()
  2761. await asyncio.sleep(2)
  2762. # 提取聊天历史
  2763. history = await self._extract_chat_history()
  2764. need_reply = (not history) or (not history[-1]["is_author"])
  2765. if need_reply:
  2766. reply_text = await self._generate_reply_with_ai(history)
  2767. if reply_text == "":
  2768. reply_text = self._generate_reply(history)
  2769. # # 生成回复
  2770. # if history and history[-1]["is_author"]:
  2771. # reply_text = await self._generate_reply_with_ai(history)
  2772. # else:
  2773. # reply_text = self._generate_reply(history)
  2774. if reply_text:
  2775. print(f" 📝 回复内容: {reply_text}")
  2776. try:
  2777. textarea = self.page.locator(".edit_area").first
  2778. send_btn = self.page.locator(
  2779. 'button:has-text("发送")'
  2780. ).first
  2781. if (
  2782. await textarea.is_visible()
  2783. and await send_btn.is_visible()
  2784. ):
  2785. await textarea.fill(reply_text)
  2786. await asyncio.sleep(0.5)
  2787. await send_btn.click()
  2788. print(" ✅ 已发送")
  2789. replied_count += 1
  2790. await asyncio.sleep(1.5)
  2791. else:
  2792. print(" ❌ 输入框或发送按钮不可见")
  2793. except Exception as e:
  2794. print(f" ❌ 发送失败: {e}")
  2795. else:
  2796. print(" ➤ 无需回复")
  2797. else:
  2798. print(" ➤ 最后一条是我发的,跳过回复")
  2799. except Exception as e:
  2800. print(f" ❌ 处理会话 {idx + 1} 时出错: {e}")
  2801. continue
  2802. except Exception as e:
  2803. print(f"❌ 处理「{tab_name}」失败: {e}")
  2804. return replied_count
  2805. async def _extract_chat_history(self) -> list:
  2806. """精准提取聊天记录,区分作者(自己)和用户"""
  2807. if not self.page:
  2808. return []
  2809. history = []
  2810. message_wrappers = self.page.locator(
  2811. ".session-content-wrapper > div:not(.footer) > .text-wrapper"
  2812. )
  2813. count = await message_wrappers.count()
  2814. for i in range(count):
  2815. try:
  2816. wrapper = message_wrappers.nth(i)
  2817. # 判断方向
  2818. is_right = await wrapper.locator(".content-right").count() > 0
  2819. is_left = await wrapper.locator(".content-left").count() > 0
  2820. if not (is_left or is_right):
  2821. continue
  2822. # 提取消息文本
  2823. pre_el = wrapper.locator("pre.message-plain")
  2824. content = ""
  2825. if await pre_el.count() > 0:
  2826. content = await pre_el.inner_text()
  2827. content = content.strip()
  2828. if not content:
  2829. continue
  2830. # 获取头像
  2831. avatar_img = wrapper.locator(".avatar").first
  2832. avatar_src = ""
  2833. if await avatar_img.count() > 0:
  2834. avatar_src = await avatar_img.get_attribute("src") or ""
  2835. # 右侧 = 作者(自己)
  2836. is_author = is_right
  2837. # 获取用户名
  2838. if is_left:
  2839. name_el = wrapper.locator(".profile .name")
  2840. author_name = "用户"
  2841. if await name_el.count() > 0:
  2842. author_name = await name_el.inner_text()
  2843. else:
  2844. author_name = "我"
  2845. history.append(
  2846. {
  2847. "author": author_name,
  2848. "content": content,
  2849. "is_author": is_author,
  2850. "avatar": avatar_src,
  2851. }
  2852. )
  2853. except Exception as e:
  2854. print(f" ⚠️ 解析第 {i + 1} 条消息失败: {e}")
  2855. continue
  2856. return history
  2857. async def _generate_reply_with_ai(self, chat_history: list) -> str:
  2858. """使用 AI 生成智能回复"""
  2859. import requests
  2860. import json
  2861. try:
  2862. # 获取 AI 配置
  2863. ai_api_key = os.environ.get("DASHSCOPE_API_KEY", "")
  2864. ai_base_url = os.environ.get(
  2865. "DASHSCOPE_BASE_URL",
  2866. "https://dashscope.aliyuncs.com/compatible-mode/v1",
  2867. )
  2868. ai_model = os.environ.get("AI_MODEL", "qwen-plus")
  2869. if not ai_api_key:
  2870. print("⚠️ 未配置 AI API Key,使用规则回复")
  2871. return self._generate_reply(chat_history)
  2872. # 构建对话上下文
  2873. messages = [
  2874. {
  2875. "role": "system",
  2876. "content": "你是一个友好的微信视频号创作者助手,负责回复粉丝私信。请保持简洁、友好、专业的语气。回复长度不超过20字。",
  2877. }
  2878. ]
  2879. for msg in chat_history:
  2880. role = "assistant" if msg["is_author"] else "user"
  2881. messages.append({"role": role, "content": msg["content"]})
  2882. # 调用 AI API
  2883. headers = {
  2884. "Authorization": f"Bearer {ai_api_key}",
  2885. "Content-Type": "application/json",
  2886. }
  2887. payload = {
  2888. "model": ai_model,
  2889. "messages": messages,
  2890. "max_tokens": 150,
  2891. "temperature": 0.8,
  2892. }
  2893. print(" 🤖 正在调用 AI 生成回复...")
  2894. response = requests.post(
  2895. f"{ai_base_url}/chat/completions",
  2896. headers=headers,
  2897. json=payload,
  2898. timeout=30,
  2899. )
  2900. if response.status_code != 200:
  2901. print(f" ⚠️ AI API 返回错误 {response.status_code},使用规则回复")
  2902. return self._generate_reply(chat_history)
  2903. result = response.json()
  2904. ai_reply = (
  2905. result.get("choices", [{}])[0]
  2906. .get("message", {})
  2907. .get("content", "")
  2908. .strip()
  2909. )
  2910. if ai_reply:
  2911. print(f" ✅ AI 生成回复: {ai_reply}")
  2912. return ai_reply
  2913. else:
  2914. print(" ⚠️ AI 返回空内容,使用规则回复")
  2915. return self._generate_reply(chat_history)
  2916. except Exception as e:
  2917. print(f" ⚠️ AI 回复生成失败: {e},使用规则回复")
  2918. return self._generate_reply(chat_history)
  2919. def _generate_reply(self, chat_history: list) -> str:
  2920. """根据完整聊天历史生成回复(规则回复方式)"""
  2921. if not chat_history:
  2922. return "你好!感谢联系~"
  2923. # 检查最后一条是否是作者发的
  2924. if chat_history[-1]["is_author"]:
  2925. return "" # 不回复
  2926. # 找最后一条用户消息
  2927. last_user_msg = chat_history[-1]["content"]
  2928. # 简单规则回复
  2929. if "谢谢" in last_user_msg or "感谢" in last_user_msg:
  2930. return "不客气!欢迎常来交流~"
  2931. elif "你好" in last_user_msg or "在吗" in last_user_msg:
  2932. return "你好!请问有什么可以帮您的?"
  2933. elif "视频" in last_user_msg or "怎么拍" in last_user_msg:
  2934. return "视频是用手机拍摄的,注意光线和稳定哦!"
  2935. else:
  2936. return "收到!我会认真阅读您的留言~"