weixin.py 135 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268
  1. # -*- coding: utf-8 -*-
  2. """
  3. 微信视频号发布器
  4. 参考: matrix/tencent_uploader/main.py
  5. """
  6. import asyncio
  7. import json
  8. import os
  9. from datetime import datetime
  10. from typing import List
  11. from .base import (
  12. BasePublisher,
  13. PublishParams,
  14. PublishResult,
  15. WorkItem,
  16. WorksResult,
  17. CommentItem,
  18. CommentsResult,
  19. )
  20. import os
  21. import time
  22. # 允许通过环境变量手动指定“上传视频入口”的选择器,便于在页面结构频繁变更时快速调整
  23. WEIXIN_UPLOAD_SELECTOR = os.environ.get("WEIXIN_UPLOAD_SELECTOR", "").strip()
  24. # 代理下视频上传持续失败时,可设 WEIXIN_UPLOAD_BYPASS_PROXY=1
  25. # 仅对上传 CDN 直连,其余页面仍走代理(解决大文件经代理易「网络出错」)
  26. WEIXIN_UPLOAD_BYPASS_PROXY = os.environ.get(
  27. "WEIXIN_UPLOAD_BYPASS_PROXY", "0"
  28. ).strip() in ("1", "true", "yes")
  29. def format_short_title(origin_title: str) -> str:
  30. """
  31. 格式化短标题
  32. - 移除特殊字符
  33. - 长度限制在 6-16 字符
  34. """
  35. allowed_special_chars = "《》:+?%°"
  36. filtered_chars = [
  37. char
  38. if char.isalnum() or char in allowed_special_chars
  39. else " "
  40. if char == ","
  41. else ""
  42. for char in origin_title
  43. ]
  44. formatted_string = "".join(filtered_chars)
  45. if len(formatted_string) > 16:
  46. formatted_string = formatted_string[:16]
  47. elif len(formatted_string) < 6:
  48. formatted_string += " " * (6 - len(formatted_string))
  49. return formatted_string
  50. class WeixinPublisher(BasePublisher):
  51. """
  52. 微信视频号发布器
  53. 使用 Playwright 自动化操作视频号创作者中心
  54. 注意: 需要使用 Chrome 浏览器,否则可能出现 H264 编码错误
  55. """
  56. platform_name = "weixin"
  57. login_url = "https://channels.weixin.qq.com/platform"
  58. publish_url = "https://channels.weixin.qq.com/platform/post/create"
  59. cookie_domain = ".weixin.qq.com"
  60. def _parse_count(self, count_str: str) -> int:
  61. """解析数字(支持带'万'的格式)"""
  62. try:
  63. count_str = count_str.strip()
  64. if "万" in count_str:
  65. return int(float(count_str.replace("万", "")) * 10000)
  66. return int(count_str)
  67. except:
  68. return 0
  69. async def ai_find_upload_selector(
  70. self, frame_html: str, frame_name: str = "main"
  71. ) -> str:
  72. """
  73. 使用 AI 从 HTML 中识别“上传视频/选择文件”相关元素的 CSS 选择器。
  74. 设计思路:
  75. - 仅在常规 DOM 选择器都失败时调用,避免频繁占用 AI 配额;
  76. - 通过 DashScope 文本模型(与验证码识别同一套配置)分析 HTML;
  77. - 返回一个适合用于 frame.locator(selector) 的 CSS 选择器。
  78. """
  79. import json
  80. import re
  81. import requests
  82. import os
  83. # 避免 HTML 过长导致 token 超限,只截取前 N 字符
  84. if not frame_html:
  85. return ""
  86. max_len = 20000
  87. if len(frame_html) > max_len:
  88. frame_html = frame_html[:max_len]
  89. ai_api_key = os.environ.get("DASHSCOPE_API_KEY", "")
  90. ai_base_url = os.environ.get(
  91. "DASHSCOPE_BASE_URL", "https://dashscope.aliyuncs.com/compatible-mode/v1"
  92. )
  93. ai_text_model = os.environ.get("AI_TEXT_MODEL", "qwen-plus")
  94. if not ai_api_key:
  95. print(f"[{self.platform_name}] AI上传入口识别: 未配置 AI API Key,跳过")
  96. return ""
  97. prompt = f"""
  98. 你是熟悉微信视频号后台的前端工程师,现在需要在一段 HTML 中找到“上传视频文件”的入口。
  99. 页面说明:
  100. - 平台:微信视频号(channels.weixin.qq.com)
  101. - 目标:用于上传视频文件的按钮或 input(一般会触发文件选择框)
  102. - 你会收到某个 frame 的完整 HTML 片段(不包含截图)。
  103. 请你根据下面的 HTML,推断最适合用于上传视频文件的元素,并输出一个可以被 Playwright 使用的 CSS 选择器。
  104. 要求:
  105. 1. 只考虑“上传/选择视频文件”的入口,不要返回“发布/发表/下一步”等按钮;
  106. 2. 选择器需要尽量稳定,不要使用自动生成的随机类名(例如带很多随机字母/数字的类名可以用前缀匹配);
  107. 3. 选择器必须是 CSS 选择器(不要返回 XPath);
  108. 4. 如果确实找不到合理的上传入口,返回 selector 为空字符串。
  109. 请以 JSON 格式输出,严格遵守以下结构(不要添加任何解释文字):
  110. ```json
  111. {{
  112. "selector": "CSS 选择器字符串,比如:input[type='file'] 或 div.upload-content input[type='file']"
  113. }}
  114. ```
  115. 下面是 frame=\"{frame_name}\" 的 HTML:
  116. ```html
  117. {frame_html}
  118. ```"""
  119. payload = {
  120. "model": ai_text_model,
  121. "messages": [
  122. {
  123. "role": "user",
  124. "content": prompt,
  125. }
  126. ],
  127. "max_tokens": 600,
  128. }
  129. headers = {
  130. "Authorization": f"Bearer {ai_api_key}",
  131. "Content-Type": "application/json",
  132. }
  133. try:
  134. print(
  135. f"[{self.platform_name}] AI上传入口识别: 正在分析 frame={frame_name} HTML..."
  136. )
  137. resp = requests.post(
  138. f"{ai_base_url}/chat/completions",
  139. headers=headers,
  140. json=payload,
  141. timeout=40,
  142. )
  143. if resp.status_code != 200:
  144. print(
  145. f"[{self.platform_name}] AI上传入口识别: API 返回错误 {resp.status_code}"
  146. )
  147. return ""
  148. data = resp.json()
  149. content = (
  150. data.get("choices", [{}])[0].get("message", {}).get("content", "") or ""
  151. )
  152. # 尝试从 ```json``` 代码块中解析
  153. json_match = re.search(r"```json\\s*([\\s\\S]*?)\\s*```", content)
  154. if json_match:
  155. json_str = json_match.group(1)
  156. else:
  157. json_match = re.search(r"\\{[\\s\\S]*\\}", content)
  158. json_str = json_match.group(0) if json_match else "{}"
  159. try:
  160. result = json.loads(json_str)
  161. except Exception:
  162. result = {}
  163. selector = (result.get("selector") or "").strip()
  164. print(f"[{self.platform_name}] AI上传入口识别结果: selector='{selector}'")
  165. return selector
  166. except Exception as e:
  167. print(f"[{self.platform_name}] AI上传入口识别异常: {e}")
  168. return ""
  169. async def ai_pick_selector_from_candidates(
  170. self, candidates: list, goal: str, frame_name: str = "main"
  171. ) -> str:
  172. """
  173. 将“候选元素列表(包含 css selector + 文本/属性)”发给 AI,让 AI 直接挑选最符合 goal 的元素。
  174. 适用于:HTML 里看不出上传入口、或页面大量动态渲染时。
  175. """
  176. import json
  177. import re
  178. import requests
  179. import os
  180. if not candidates:
  181. return ""
  182. ai_api_key = os.environ.get("DASHSCOPE_API_KEY", "")
  183. ai_base_url = os.environ.get(
  184. "DASHSCOPE_BASE_URL", "https://dashscope.aliyuncs.com/compatible-mode/v1"
  185. )
  186. ai_text_model = os.environ.get("AI_TEXT_MODEL", "qwen-plus")
  187. if not ai_api_key:
  188. print(f"[{self.platform_name}] AI候选选择器: 未配置 AI API Key,跳过")
  189. return ""
  190. # 控制长度,最多取前 120 个候选
  191. candidates = candidates[:120]
  192. prompt = f"""
  193. 你是自动化发布工程师。现在要在微信视频号(channels.weixin.qq.com)发布页面里找到“{goal}”相关的入口元素。
  194. 我会给你一组候选元素,每个候选都包含:
  195. - css: 可直接用于 Playwright 的 CSS 选择器
  196. - tag / type / role / ariaLabel / text / id / className(部分字段可能为空)
  197. 你的任务:
  198. - 从候选中选出最可能用于“{goal}”的元素,返回它的 css 选择器;
  199. - 如果没有任何候选符合,返回空字符串。
  200. 注意:
  201. - 如果 goal 是“上传视频入口”,优先选择 input[type=file] 或看起来会触发选择文件/上传的区域;
  202. - 不要选择“发布/发表/下一步”等按钮(除非 goal 明确是发布按钮)。
  203. 请严格按 JSON 输出(不要解释):
  204. ```json
  205. {{ "selector": "..." }}
  206. ```
  207. 候选列表(frame={frame_name}):
  208. ```json
  209. {json.dumps(candidates, ensure_ascii=False)}
  210. ```"""
  211. payload = {
  212. "model": ai_text_model,
  213. "messages": [{"role": "user", "content": prompt}],
  214. "max_tokens": 400,
  215. }
  216. headers = {
  217. "Authorization": f"Bearer {ai_api_key}",
  218. "Content-Type": "application/json",
  219. }
  220. try:
  221. print(
  222. f"[{self.platform_name}] AI候选选择器: 正在分析 frame={frame_name}, goal={goal} ..."
  223. )
  224. resp = requests.post(
  225. f"{ai_base_url}/chat/completions",
  226. headers=headers,
  227. json=payload,
  228. timeout=40,
  229. )
  230. if resp.status_code != 200:
  231. print(
  232. f"[{self.platform_name}] AI候选选择器: API 返回错误 {resp.status_code}"
  233. )
  234. return ""
  235. data = resp.json()
  236. content = (
  237. data.get("choices", [{}])[0].get("message", {}).get("content", "") or ""
  238. )
  239. json_match = re.search(r"```json\\s*([\\s\\S]*?)\\s*```", content)
  240. if json_match:
  241. json_str = json_match.group(1)
  242. else:
  243. json_match = re.search(r"\\{[\\s\\S]*\\}", content)
  244. json_str = json_match.group(0) if json_match else "{}"
  245. try:
  246. result = json.loads(json_str)
  247. except Exception:
  248. result = {}
  249. selector = (result.get("selector") or "").strip()
  250. print(f"[{self.platform_name}] AI候选选择器结果: selector='{selector}'")
  251. return selector
  252. except Exception as e:
  253. print(f"[{self.platform_name}] AI候选选择器异常: {e}")
  254. return ""
  255. async def _extract_relevant_html_snippets(self, html: str) -> str:
  256. """
  257. 从 HTML 中抽取与上传相关的片段,减少 token,提升 AI 命中率。
  258. - 优先抓取包含 upload/上传/file/input 等关键词的窗口片段
  259. - 若未命中关键词,返回“开头 + 结尾”的拼接
  260. """
  261. import re
  262. if not html:
  263. return ""
  264. patterns = [
  265. r"upload",
  266. r"uploader",
  267. r"file",
  268. r"type\\s*=\\s*['\\\"]file['\\\"]",
  269. r"input",
  270. r"drag",
  271. r"drop",
  272. r"选择",
  273. r"上传",
  274. r"添加",
  275. r"视频",
  276. ]
  277. regex = re.compile("|".join(patterns), re.IGNORECASE)
  278. snippets = []
  279. for m in regex.finditer(html):
  280. start = max(0, m.start() - 350)
  281. end = min(len(html), m.end() + 350)
  282. snippets.append(html[start:end])
  283. if len(snippets) >= 18:
  284. break
  285. if snippets:
  286. # 去重(粗略)
  287. unique = []
  288. seen = set()
  289. for s in snippets:
  290. key = hash(s)
  291. if key not in seen:
  292. seen.add(key)
  293. unique.append(s)
  294. return "\n\n<!-- SNIPPET -->\n\n".join(unique)[:20000]
  295. # fallback: head + tail
  296. head = html[:9000]
  297. tail = html[-9000:] if len(html) > 9000 else ""
  298. return (head + "\n\n<!-- TAIL -->\n\n" + tail)[:20000]
  299. async def init_browser(self, storage_state: str = None):
  300. """
  301. 初始化浏览器 - 参考 matrix 使用 channel=chrome 避免 H264 编码错误
  302. 重要:如果配置了代理,全程都会使用代理(包括页面访问和视频上传)
  303. """
  304. from playwright.async_api import async_playwright
  305. playwright = await async_playwright().start()
  306. proxy = (
  307. self.proxy_config
  308. if isinstance(getattr(self, "proxy_config", None), dict)
  309. else None
  310. )
  311. if proxy and proxy.get("server"):
  312. # 启用上传 bypass 时:仅对上传 CDN 直连,其余仍走代理
  313. if WEIXIN_UPLOAD_BYPASS_PROXY:
  314. bypass = ",".join([
  315. "findeross.weixin.qq.com",
  316. "upload.weixin.qq.com",
  317. "finder.video.qq.com",
  318. "szextshort.weixin.qq.com",
  319. "mp.weixin.qq.com",
  320. "*.cos.qq.com",
  321. "*.cos.ap-*.myqcloud.com",
  322. "*.myqcloud.com",
  323. "*.tencentcloudapi.com",
  324. "*.video.qq.com",
  325. "*.cdn-go.cn",
  326. ])
  327. proxy = dict(proxy)
  328. proxy["bypass"] = bypass
  329. print(
  330. f"[{self.platform_name}] 使用代理(上传 CDN 直连): {proxy.get('server')}",
  331. flush=True,
  332. )
  333. print(
  334. f"[{self.platform_name}] 💡 页面走代理,视频上传 CDN 直连,避免大文件经代理失败",
  335. flush=True,
  336. )
  337. else:
  338. print(
  339. f"[{self.platform_name}] 使用代理(全程): {proxy.get('server')}",
  340. flush=True,
  341. )
  342. print(
  343. f"[{self.platform_name}] 💡 页面访问和视频上传都将通过代理",
  344. flush=True,
  345. )
  346. # 参考 matrix: 使用系统内的 Chrome 浏览器,避免 H264 编码错误
  347. launch_opts = {"headless": self.headless}
  348. if not self.headless:
  349. launch_opts["slow_mo"] = 400
  350. print(
  351. f"[{self.platform_name}] 有头模式 + slow_mo=400ms,浏览器将可见",
  352. flush=True,
  353. )
  354. try:
  355. launch_opts["channel"] = "chrome"
  356. if proxy and proxy.get("server"):
  357. launch_opts["proxy"] = proxy
  358. # 代理下大文件上传优化:禁用 QUIC,部分代理对 QUIC 支持不佳易导致连接中断
  359. launch_opts.setdefault("args", []).append("--disable-quic")
  360. self.browser = await playwright.chromium.launch(**launch_opts)
  361. mode = "代理模式" if proxy else "直连模式"
  362. print(
  363. f"[{self.platform_name}] 使用系统 Chrome 浏览器({mode})", flush=True
  364. )
  365. except Exception as e:
  366. print(
  367. f"[{self.platform_name}] Chrome 不可用,使用 Chromium: {e}", flush=True
  368. )
  369. if "channel" in launch_opts:
  370. del launch_opts["channel"]
  371. if proxy and proxy.get("server"):
  372. launch_opts["proxy"] = proxy
  373. if "--disable-quic" not in (launch_opts.get("args") or []):
  374. launch_opts.setdefault("args", []).append("--disable-quic")
  375. self.browser = await playwright.chromium.launch(**launch_opts)
  376. # 设置 HTTP Headers
  377. headers = {
  378. "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
  379. "Referer": "https://channels.weixin.qq.com/platform/post/list",
  380. }
  381. self.context = await self.browser.new_context(
  382. extra_http_headers=headers,
  383. ignore_https_errors=True,
  384. viewport={"width": 1920, "height": 1080},
  385. user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
  386. )
  387. self.page = await self.context.new_page()
  388. # 注入反检测脚本
  389. if hasattr(self, "inject_stealth_if_available"):
  390. await self.inject_stealth_if_available()
  391. return self.page
  392. async def set_schedule_time(self, publish_date: datetime):
  393. """设置定时发布"""
  394. if not self.page:
  395. return
  396. print(f"[{self.platform_name}] 设置定时发布...")
  397. # 点击定时选项
  398. label_element = self.page.locator("label").filter(has_text="定时").nth(1)
  399. await label_element.click()
  400. # 选择日期
  401. await self.page.click('input[placeholder="请选择发表时间"]')
  402. publish_month = f"{publish_date.month:02d}"
  403. current_month = f"{publish_month}月"
  404. # 检查月份
  405. page_month = await self.page.inner_text(
  406. 'span.weui-desktop-picker__panel__label:has-text("月")'
  407. )
  408. if page_month != current_month:
  409. await self.page.click("button.weui-desktop-btn__icon__right")
  410. # 选择日期
  411. elements = await self.page.query_selector_all(
  412. "table.weui-desktop-picker__table a"
  413. )
  414. for element in elements:
  415. class_name = await element.evaluate("el => el.className")
  416. if "weui-desktop-picker__disabled" in class_name:
  417. continue
  418. text = await element.inner_text()
  419. if text.strip() == str(publish_date.day):
  420. await element.click()
  421. break
  422. # 输入时间
  423. await self.page.click('input[placeholder="请选择时间"]')
  424. await self.page.keyboard.press("Control+KeyA")
  425. await self.page.keyboard.type(str(publish_date.hour))
  426. # 点击其他地方确认
  427. await self.page.locator("div.input-editor").click()
  428. async def handle_upload_error(self, video_path: str):
  429. """处理上传错误(含代理下「网络出错」重试优化)"""
  430. if not self.page:
  431. return
  432. using_proxy = isinstance(
  433. getattr(self, "proxy_config", None), dict
  434. ) and self.proxy_config.get("server")
  435. # 代理模式下先等待,给代理/网络恢复时间,避免连续重试加剧失败
  436. if using_proxy:
  437. wait_sec = 25
  438. print(
  439. f"[{self.platform_name}] 代理模式:检测到上传错误,等待 {wait_sec} 秒后重试...",
  440. flush=True,
  441. )
  442. await asyncio.sleep(wait_sec)
  443. print(f"[{self.platform_name}] 视频出错了,重新上传中...")
  444. # 出错时先截一张当前页面的图,方便排查(代理问题、视频格式问题等)
  445. try:
  446. timestamp = int(time.time() * 1000)
  447. screenshot_dir = os.path.join(
  448. os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
  449. "screenshots",
  450. )
  451. os.makedirs(screenshot_dir, exist_ok=True)
  452. screenshot_path = os.path.join(
  453. screenshot_dir, f"weixin_upload_error_{timestamp}.png"
  454. )
  455. await self.page.screenshot(path=screenshot_path, full_page=True)
  456. print(
  457. f"[{self.platform_name}] 上传错误截图已保存: {screenshot_path}",
  458. flush=True,
  459. )
  460. except Exception as e:
  461. print(f"[{self.platform_name}] 保存上传错误截图失败: {e}", flush=True)
  462. # 删除出错的视频重新上传
  463. await self.page.locator(
  464. 'div.media-status-content div.tag-inner:has-text("删除")'
  465. ).click()
  466. await self.page.get_by_role("button", name="删除", exact=True).click()
  467. file_input = self.page.locator('input[type="file"]')
  468. await file_input.set_input_files(video_path)
  469. async def add_title_tags(self, params: PublishParams):
  470. """添加标题和话题"""
  471. if not self.page:
  472. return
  473. print(f"[{self.platform_name}] 开始添加标题: {params.title}", flush=True)
  474. await self.page.locator("div.input-editor").click()
  475. await self.page.keyboard.type(params.title)
  476. if params.tags:
  477. await self.page.keyboard.press("Enter")
  478. for tag in params.tags:
  479. await self.page.keyboard.type("#" + tag)
  480. await self.page.keyboard.press("Space")
  481. print(
  482. f"[{self.platform_name}] ✓ 成功添加标题和 {len(params.tags)} 个话题",
  483. flush=True,
  484. )
  485. # 🔧 设置位置(使用代理地区或默认位置)
  486. print(f"[{self.platform_name}] 准备设置位置: {params.location}", flush=True)
  487. if params.location:
  488. await self.set_location(params.location)
  489. else:
  490. print(f"[{self.platform_name}] ⚠️ 未设置位置,跳过", flush=True)
  491. async def set_location(self, location: str):
  492. """设置发布位置"""
  493. if not self.page or not location:
  494. return
  495. try:
  496. print(f"[{self.platform_name}] 正在设置位置: {location}", flush=True)
  497. # 等待页面稳定
  498. await asyncio.sleep(1)
  499. # 尝试多种方式找到位置设置元素
  500. location_selectors = [
  501. # 位置输入框
  502. 'input[placeholder*="位置"]',
  503. 'input[placeholder*="所在"]',
  504. 'input[placeholder*="地点"]',
  505. # 位置按钮
  506. 'div:has-text("所在位置")',
  507. 'div:has-text("添加位置")',
  508. 'span:has-text("位置")',
  509. ]
  510. location_element = None
  511. for selector in location_selectors:
  512. try:
  513. element = self.page.locator(selector).first
  514. if await element.count() > 0 and await element.is_visible():
  515. location_element = element
  516. print(
  517. f"[{self.platform_name}] 找到位置元素: {selector}",
  518. flush=True,
  519. )
  520. break
  521. except:
  522. continue
  523. if not location_element:
  524. print(f"[{self.platform_name}] 未找到位置设置元素,跳过", flush=True)
  525. return
  526. # 点击位置元素
  527. await location_element.click()
  528. await asyncio.sleep(1)
  529. # 查找位置输入框
  530. input_selectors = [
  531. 'input[placeholder*="搜索"]',
  532. 'input[placeholder*="输入"]',
  533. 'input[type="text"]',
  534. ]
  535. location_input = None
  536. for selector in input_selectors:
  537. try:
  538. element = self.page.locator(selector).first
  539. if await element.count() > 0 and await element.is_visible():
  540. location_input = element
  541. break
  542. except:
  543. continue
  544. if location_input:
  545. # 输入位置
  546. await location_input.fill(location)
  547. await asyncio.sleep(1)
  548. # 查找匹配的位置选项并点击
  549. try:
  550. # 等待位置建议出现
  551. await asyncio.sleep(1)
  552. # 查找包含位置文本的选项
  553. option = self.page.locator(f'text="{location}"').first
  554. if await option.count() > 0:
  555. await option.click()
  556. print(
  557. f"[{self.platform_name}] ✓ 位置设置成功: {location}",
  558. flush=True,
  559. )
  560. else:
  561. # 如果没有精确匹配,选择第一个建议
  562. first_option = self.page.locator(
  563. 'div[class*="location"] li, div[class*="suggest"] div'
  564. ).first
  565. if await first_option.count() > 0:
  566. await first_option.click()
  567. print(
  568. f"[{self.platform_name}] ✓ 位置已设置(自动选择)",
  569. flush=True,
  570. )
  571. except Exception as e:
  572. print(f"[{self.platform_name}] ⚠️ 选择位置失败: {e}", flush=True)
  573. # 按 Escape 关闭位置选择器
  574. await self.page.keyboard.press("Escape")
  575. else:
  576. print(f"[{self.platform_name}] 未找到位置输入框", flush=True)
  577. await self.page.keyboard.press("Escape")
  578. except Exception as e:
  579. print(f"[{self.platform_name}] 设置位置失败: {e}", flush=True)
  580. try:
  581. await self.page.keyboard.press("Escape")
  582. except:
  583. pass
  584. async def add_short_title(self):
  585. """添加短标题"""
  586. if not self.page:
  587. return
  588. try:
  589. short_title_element = (
  590. self.page.get_by_text("短标题", exact=True)
  591. .locator("..")
  592. .locator("xpath=following-sibling::div")
  593. .locator('span input[type="text"]')
  594. )
  595. if await short_title_element.count():
  596. # 获取已有内容作为短标题
  597. pass
  598. except:
  599. pass
  600. async def upload_cover(self, cover_path: str):
  601. """上传封面图"""
  602. if not self.page or not cover_path or not os.path.exists(cover_path):
  603. return
  604. try:
  605. await asyncio.sleep(2)
  606. preview_btn_info = await self.page.locator(
  607. 'div.finder-tag-wrap.btn:has-text("更换封面")'
  608. ).get_attribute("class")
  609. if "disabled" not in preview_btn_info:
  610. await self.page.locator(
  611. 'div.finder-tag-wrap.btn:has-text("更换封面")'
  612. ).click()
  613. await self.page.locator(
  614. "div.single-cover-uploader-wrap > div.wrap"
  615. ).hover()
  616. # 删除现有封面
  617. if await self.page.locator(".del-wrap > .svg-icon").count():
  618. await self.page.locator(".del-wrap > .svg-icon").click()
  619. # 上传新封面
  620. preview_div = self.page.locator(
  621. "div.single-cover-uploader-wrap > div.wrap"
  622. )
  623. async with self.page.expect_file_chooser() as fc_info:
  624. await preview_div.click()
  625. preview_chooser = await fc_info.value
  626. await preview_chooser.set_files(cover_path)
  627. await asyncio.sleep(2)
  628. await self.page.get_by_role("button", name="确定").click()
  629. await asyncio.sleep(1)
  630. await self.page.get_by_role("button", name="确认").click()
  631. print(f"[{self.platform_name}] 封面上传成功")
  632. except Exception as e:
  633. print(f"[{self.platform_name}] 封面上传失败: {e}")
  634. async def check_captcha(self) -> dict:
  635. """检查页面是否需要验证码"""
  636. if not self.page:
  637. return {"need_captcha": False, "captcha_type": ""}
  638. try:
  639. # 检查各种验证码
  640. captcha_selectors = [
  641. 'text="请输入验证码"',
  642. 'text="滑动验证"',
  643. '[class*="captcha"]',
  644. '[class*="verify"]',
  645. ]
  646. for selector in captcha_selectors:
  647. try:
  648. if await self.page.locator(selector).count() > 0:
  649. print(f"[{self.platform_name}] 检测到验证码: {selector}")
  650. return {"need_captcha": True, "captcha_type": "image"}
  651. except:
  652. pass
  653. # 检查登录弹窗
  654. login_selectors = [
  655. 'text="请登录"',
  656. 'text="扫码登录"',
  657. '[class*="login-dialog"]',
  658. ]
  659. for selector in login_selectors:
  660. try:
  661. if await self.page.locator(selector).count() > 0:
  662. print(f"[{self.platform_name}] 检测到需要登录: {selector}")
  663. return {"need_captcha": True, "captcha_type": "login"}
  664. except:
  665. pass
  666. except Exception as e:
  667. print(f"[{self.platform_name}] 验证码检测异常: {e}")
  668. return {"need_captcha": False, "captcha_type": ""}
  669. async def publish(self, cookies: str, params: PublishParams) -> PublishResult:
  670. """发布视频到视频号"""
  671. print(f"\n{'=' * 60}")
  672. print(f"[{self.platform_name}] 开始发布视频")
  673. print(f"[{self.platform_name}] 视频路径: {params.video_path}")
  674. print(f"[{self.platform_name}] 标题: {params.title}")
  675. print(f"[{self.platform_name}] Headless: {self.headless}")
  676. print(f"{'=' * 60}")
  677. self.report_progress(5, "正在初始化浏览器...")
  678. # 初始化浏览器(使用 Chrome)
  679. await self.init_browser()
  680. print(f"[{self.platform_name}] 浏览器初始化完成")
  681. # 解析并设置 cookies
  682. cookie_list = self.parse_cookies(cookies)
  683. print(cookie_list)
  684. print(f"[{self.platform_name}] 解析到 {len(cookie_list)} 个 cookies")
  685. await self.set_cookies(cookie_list)
  686. if not self.page:
  687. raise Exception("Page not initialized")
  688. # 检查视频文件
  689. if not os.path.exists(params.video_path):
  690. raise Exception(f"视频文件不存在: {params.video_path}")
  691. print(
  692. f"[{self.platform_name}] 视频文件存在,大小: {os.path.getsize(params.video_path)} bytes"
  693. )
  694. self.report_progress(10, "正在打开上传页面...")
  695. # 代理模式下拉长超时,避免大文件上传经代理时超时
  696. using_proxy = isinstance(
  697. getattr(self, "proxy_config", None), dict
  698. ) and self.proxy_config.get("server")
  699. if using_proxy:
  700. self.page.set_default_timeout(300000) # 5 分钟
  701. print(f"[{self.platform_name}] 代理模式:已设置 5 分钟操作超时", flush=True)
  702. # 访问上传页面 - 使用 domcontentloaded 替代 networkidle,避免代理慢速导致超时
  703. await self.page.goto(
  704. self.publish_url, wait_until="domcontentloaded", timeout=90000
  705. )
  706. # 等待页面关键元素加载
  707. try:
  708. await self.page.wait_for_load_state("load", timeout=30000)
  709. except Exception:
  710. pass
  711. await asyncio.sleep(3)
  712. # 代理模式下多等几秒,让代理连接稳定后再上传
  713. if using_proxy:
  714. print(
  715. f"[{self.platform_name}] 代理模式:等待 8 秒后开始上传...", flush=True
  716. )
  717. await asyncio.sleep(8)
  718. # 检查是否跳转到登录页
  719. current_url = self.page.url
  720. print(f"[{self.platform_name}] 当前页面: {current_url}")
  721. if "login" in current_url:
  722. screenshot_base64 = await self.capture_screenshot()
  723. return PublishResult(
  724. success=False,
  725. platform=self.platform_name,
  726. error="Cookie 已过期,需要重新登录",
  727. need_captcha=True,
  728. captcha_type="login",
  729. screenshot_base64=screenshot_base64,
  730. page_url=current_url,
  731. status="need_captcha",
  732. )
  733. # 使用 AI 检查验证码
  734. ai_captcha = await self.ai_check_captcha()
  735. if ai_captcha["has_captcha"]:
  736. print(
  737. f"[{self.platform_name}] AI检测到验证码: {ai_captcha['captcha_type']}",
  738. flush=True,
  739. )
  740. screenshot_base64 = await self.capture_screenshot()
  741. return PublishResult(
  742. success=False,
  743. platform=self.platform_name,
  744. error=f"检测到{ai_captcha['captcha_type']}验证码,需要使用有头浏览器完成验证",
  745. need_captcha=True,
  746. captcha_type=ai_captcha["captcha_type"],
  747. screenshot_base64=screenshot_base64,
  748. page_url=current_url,
  749. status="need_captcha",
  750. )
  751. # 传统方式检查验证码
  752. captcha_result = await self.check_captcha()
  753. if captcha_result["need_captcha"]:
  754. screenshot_base64 = await self.capture_screenshot()
  755. return PublishResult(
  756. success=False,
  757. platform=self.platform_name,
  758. error=f"需要{captcha_result['captcha_type']}验证码,请使用有头浏览器完成验证",
  759. need_captcha=True,
  760. captcha_type=captcha_result["captcha_type"],
  761. screenshot_base64=screenshot_base64,
  762. page_url=current_url,
  763. status="need_captcha",
  764. )
  765. self.report_progress(15, "正在选择视频文件...")
  766. # 上传视频
  767. # 说明:视频号发布页在不同账号/地区/灰度下 DOM 结构差异较大,且上传组件可能在 iframe 中。
  768. # 因此这里按 matrix 的思路“点击触发 file chooser”,同时增加“遍历全部 frame + 精确挑选 video input”的兜底。
  769. upload_success = False
  770. if not self.page:
  771. raise Exception("Page not initialized")
  772. # 等待页面把上传区域渲染出来(避免过早判断)
  773. try:
  774. await self.page.wait_for_selector(
  775. "div.upload-content, input[type='file'], iframe", timeout=20000
  776. )
  777. except Exception:
  778. pass
  779. async def _try_set_files_in_frame(frame, frame_name: str) -> bool:
  780. """在指定 frame 中尝试触发上传"""
  781. nonlocal upload_success
  782. if upload_success:
  783. return True
  784. # 方法0:如果用户通过环境变量显式配置了选择器,优先尝试这个
  785. if WEIXIN_UPLOAD_SELECTOR:
  786. try:
  787. el = frame.locator(WEIXIN_UPLOAD_SELECTOR).first
  788. if await el.count() > 0 and await el.is_visible():
  789. print(
  790. f"[{self.platform_name}] [{frame_name}] 使用环境变量 WEIXIN_UPLOAD_SELECTOR: {WEIXIN_UPLOAD_SELECTOR}"
  791. )
  792. try:
  793. async with self.page.expect_file_chooser(
  794. timeout=5000
  795. ) as fc_info:
  796. await el.click()
  797. chooser = await fc_info.value
  798. await chooser.set_files(params.video_path)
  799. upload_success = True
  800. print(
  801. f"[{self.platform_name}] [{frame_name}] 通过环境变量选择器上传成功"
  802. )
  803. return True
  804. except Exception as e:
  805. print(
  806. f"[{self.platform_name}] [{frame_name}] 环境变量选择器点击失败,尝试直接 set_input_files: {e}"
  807. )
  808. try:
  809. await el.set_input_files(params.video_path)
  810. upload_success = True
  811. print(
  812. f"[{self.platform_name}] [{frame_name}] 环境变量选择器 set_input_files 成功"
  813. )
  814. return True
  815. except Exception as e2:
  816. print(
  817. f"[{self.platform_name}] [{frame_name}] 环境变量选择器 set_input_files 仍失败: {e2}"
  818. )
  819. except Exception as e:
  820. print(
  821. f"[{self.platform_name}] [{frame_name}] 使用环境变量选择器定位元素失败: {e}"
  822. )
  823. # 先尝试点击上传区域触发 chooser(最贴近 matrix)
  824. click_selectors = [
  825. "div.upload-content",
  826. "div[class*='upload-content']",
  827. "div[class*='upload']",
  828. "div.add-wrap",
  829. "[class*='uploader']",
  830. "text=点击上传",
  831. "text=上传视频",
  832. "text=选择视频",
  833. ]
  834. for selector in click_selectors:
  835. try:
  836. el = frame.locator(selector).first
  837. if await el.count() > 0 and await el.is_visible():
  838. print(
  839. f"[{self.platform_name}] [{frame_name}] 找到可点击上传区域: {selector}"
  840. )
  841. try:
  842. async with self.page.expect_file_chooser(
  843. timeout=5000
  844. ) as fc_info:
  845. await el.click()
  846. chooser = await fc_info.value
  847. await chooser.set_files(params.video_path)
  848. upload_success = True
  849. print(
  850. f"[{self.platform_name}] [{frame_name}] 通过 file chooser 上传成功"
  851. )
  852. return True
  853. except Exception as e:
  854. print(
  855. f"[{self.platform_name}] [{frame_name}] 点击触发 chooser 失败: {e}"
  856. )
  857. except Exception:
  858. pass
  859. # 再尝试直接设置 input[type=file](iframe/隐藏 input 常见)
  860. try:
  861. inputs = frame.locator("input[type='file']")
  862. cnt = await inputs.count()
  863. if cnt > 0:
  864. best_idx = 0
  865. best_score = -1
  866. for i in range(cnt):
  867. try:
  868. inp = inputs.nth(i)
  869. accept = (await inp.get_attribute("accept")) or ""
  870. multiple = (await inp.get_attribute("multiple")) or ""
  871. score = 0
  872. if "video" in accept:
  873. score += 10
  874. if "mp4" in accept:
  875. score += 3
  876. if multiple:
  877. score += 1
  878. if score > best_score:
  879. best_score = score
  880. best_idx = i
  881. except Exception:
  882. continue
  883. target = inputs.nth(best_idx)
  884. print(
  885. f"[{self.platform_name}] [{frame_name}] 尝试对 input[{best_idx}] set_input_files (score={best_score})"
  886. )
  887. await target.set_input_files(params.video_path)
  888. upload_success = True
  889. print(
  890. f"[{self.platform_name}] [{frame_name}] 通过 file input 上传成功"
  891. )
  892. return True
  893. except Exception as e:
  894. print(f"[{self.platform_name}] [{frame_name}] file input 上传失败: {e}")
  895. # 不直接返回,让后面的 AI 兜底有机会执行
  896. # 方法4: 兜底使用 AI 分析 HTML,猜测上传入口
  897. try:
  898. frame_url = getattr(frame, "url", "")
  899. html_full = await frame.content()
  900. html_for_ai = await self._extract_relevant_html_snippets(html_full)
  901. print(
  902. f"[{self.platform_name}] [{frame_name}] frame_url={frame_url}, html_len={len(html_full)}, html_for_ai_len={len(html_for_ai)}"
  903. )
  904. ai_selector = await self.ai_find_upload_selector(
  905. html_for_ai, frame_name=frame_name
  906. )
  907. if ai_selector:
  908. try:
  909. el = frame.locator(ai_selector).first
  910. if await el.count() > 0:
  911. print(
  912. f"[{self.platform_name}] [{frame_name}] 使用 AI 选择器点击上传入口: {ai_selector}"
  913. )
  914. try:
  915. async with self.page.expect_file_chooser(
  916. timeout=5000
  917. ) as fc_info:
  918. await el.click()
  919. chooser = await fc_info.value
  920. await chooser.set_files(params.video_path)
  921. upload_success = True
  922. print(
  923. f"[{self.platform_name}] [{frame_name}] 通过 AI 选择器上传成功"
  924. )
  925. return True
  926. except Exception as e:
  927. print(
  928. f"[{self.platform_name}] [{frame_name}] AI 选择器点击失败,改为直接 set_input_files: {e}"
  929. )
  930. try:
  931. await el.set_input_files(params.video_path)
  932. upload_success = True
  933. print(
  934. f"[{self.platform_name}] [{frame_name}] AI 选择器直接 set_input_files 成功"
  935. )
  936. return True
  937. except Exception as e2:
  938. print(
  939. f"[{self.platform_name}] [{frame_name}] AI 选择器 set_input_files 仍失败: {e2}"
  940. )
  941. except Exception as e:
  942. print(
  943. f"[{self.platform_name}] [{frame_name}] 使用 AI 选择器定位元素失败: {e}"
  944. )
  945. else:
  946. # 如果 AI 无法从 HTML 推断,退一步:构造候选元素列表交给 AI 选择
  947. try:
  948. candidates = await frame.evaluate("""
  949. () => {
  950. function cssEscape(s) {
  951. try { return CSS.escape(s); } catch (e) { return s.replace(/[^a-zA-Z0-9_-]/g, '\\\\$&'); }
  952. }
  953. function buildSelector(el) {
  954. if (!el || el.nodeType !== 1) return '';
  955. if (el.id) return `#${cssEscape(el.id)}`;
  956. let parts = [];
  957. let cur = el;
  958. for (let depth = 0; cur && cur.nodeType === 1 && depth < 5; depth++) {
  959. let part = cur.tagName.toLowerCase();
  960. const role = cur.getAttribute('role');
  961. const type = cur.getAttribute('type');
  962. if (type) part += `[type="${type}"]`;
  963. if (role) part += `[role="${role}"]`;
  964. const cls = (cur.className || '').toString().trim().split(/\\s+/).filter(Boolean);
  965. if (cls.length) part += '.' + cls.slice(0, 2).map(cssEscape).join('.');
  966. // nth-of-type
  967. let idx = 1;
  968. let sib = cur;
  969. while (sib && (sib = sib.previousElementSibling)) {
  970. if (sib.tagName === cur.tagName) idx++;
  971. }
  972. part += `:nth-of-type(${idx})`;
  973. parts.unshift(part);
  974. cur = cur.parentElement;
  975. }
  976. return parts.join(' > ');
  977. }
  978. const nodes = Array.from(document.querySelectorAll('input, button, a, div, span'))
  979. .filter(el => {
  980. const tag = el.tagName.toLowerCase();
  981. const type = (el.getAttribute('type') || '').toLowerCase();
  982. const role = (el.getAttribute('role') || '').toLowerCase();
  983. const aria = (el.getAttribute('aria-label') || '').toLowerCase();
  984. const txt = (el.innerText || '').trim().slice(0, 60);
  985. const cls = (el.className || '').toString().toLowerCase();
  986. const isFile = tag === 'input' && type === 'file';
  987. const looksClickable =
  988. tag === 'button' || tag === 'a' || role === 'button' || el.onclick ||
  989. cls.includes('upload') || cls.includes('uploader') || cls.includes('drag') ||
  990. aria.includes('上传') || aria.includes('选择') || aria.includes('添加') ||
  991. txt.includes('上传') || txt.includes('选择') || txt.includes('添加') || txt.includes('点击上传');
  992. if (!isFile && !looksClickable) return false;
  993. const r = el.getBoundingClientRect();
  994. const visible = r.width > 5 && r.height > 5;
  995. return visible;
  996. });
  997. const limited = nodes.slice(0, 120).map(el => ({
  998. css: buildSelector(el),
  999. tag: el.tagName.toLowerCase(),
  1000. type: el.getAttribute('type') || '',
  1001. role: el.getAttribute('role') || '',
  1002. ariaLabel: el.getAttribute('aria-label') || '',
  1003. text: (el.innerText || '').trim().slice(0, 80),
  1004. id: el.id || '',
  1005. className: (el.className || '').toString().slice(0, 120),
  1006. accept: el.getAttribute('accept') || '',
  1007. }));
  1008. return limited;
  1009. }
  1010. """)
  1011. ai_selector2 = await self.ai_pick_selector_from_candidates(
  1012. candidates=candidates,
  1013. goal="上传视频入口",
  1014. frame_name=frame_name,
  1015. )
  1016. if ai_selector2:
  1017. el2 = frame.locator(ai_selector2).first
  1018. if await el2.count() > 0:
  1019. print(
  1020. f"[{self.platform_name}] [{frame_name}] 使用 AI 候选选择器点击上传入口: {ai_selector2}"
  1021. )
  1022. try:
  1023. async with self.page.expect_file_chooser(
  1024. timeout=5000
  1025. ) as fc_info:
  1026. await el2.click()
  1027. chooser2 = await fc_info.value
  1028. await chooser2.set_files(params.video_path)
  1029. upload_success = True
  1030. print(
  1031. f"[{self.platform_name}] [{frame_name}] 通过 AI 候选选择器上传成功"
  1032. )
  1033. return True
  1034. except Exception as e:
  1035. print(
  1036. f"[{self.platform_name}] [{frame_name}] AI 候选选择器点击失败,尝试 set_input_files: {e}"
  1037. )
  1038. try:
  1039. await el2.set_input_files(params.video_path)
  1040. upload_success = True
  1041. print(
  1042. f"[{self.platform_name}] [{frame_name}] AI 候选选择器 set_input_files 成功"
  1043. )
  1044. return True
  1045. except Exception as e2:
  1046. print(
  1047. f"[{self.platform_name}] [{frame_name}] AI 候选选择器 set_input_files 仍失败: {e2}"
  1048. )
  1049. except Exception as e:
  1050. print(
  1051. f"[{self.platform_name}] [{frame_name}] 构造候选并交给 AI 失败: {e}"
  1052. )
  1053. except Exception as e:
  1054. print(
  1055. f"[{self.platform_name}] [{frame_name}] AI 上传入口识别整体失败: {e}"
  1056. )
  1057. return False
  1058. # 先尝试主 frame
  1059. try:
  1060. await _try_set_files_in_frame(self.page.main_frame, "main")
  1061. except Exception as e:
  1062. print(f"[{self.platform_name}] main frame 上传尝试异常: {e}")
  1063. # 再遍历所有子 frame
  1064. if not upload_success:
  1065. try:
  1066. frames = self.page.frames
  1067. print(f"[{self.platform_name}] 发现 frames: {len(frames)}")
  1068. for idx, fr in enumerate(frames):
  1069. if upload_success:
  1070. break
  1071. # main_frame 已尝试过
  1072. if fr == self.page.main_frame:
  1073. continue
  1074. name = fr.name or f"frame-{idx}"
  1075. await _try_set_files_in_frame(fr, name)
  1076. except Exception as e:
  1077. print(f"[{self.platform_name}] 遍历 frames 异常: {e}")
  1078. if not upload_success:
  1079. screenshot_base64 = await self.capture_screenshot()
  1080. return PublishResult(
  1081. success=False,
  1082. platform=self.platform_name,
  1083. error="未找到上传入口(可能在 iframe 中或页面结构已变更)",
  1084. screenshot_base64=screenshot_base64,
  1085. page_url=await self.get_page_url(),
  1086. status="failed",
  1087. )
  1088. self.report_progress(20, "正在填充标题和话题...")
  1089. # 添加标题和话题
  1090. await self.add_title_tags(params)
  1091. self.report_progress(30, "等待视频上传完成...")
  1092. # 监控网络请求,捕捉上传相关域名和状态
  1093. _upload_domains_seen = set()
  1094. def _on_request(req):
  1095. url = req.url
  1096. if any(kw in url for kw in ["upload", "cos.", "myqcloud", "finder", "video", "media"]):
  1097. from urllib.parse import urlparse
  1098. domain = urlparse(url).netloc
  1099. method = req.method
  1100. if domain not in _upload_domains_seen:
  1101. _upload_domains_seen.add(domain)
  1102. print(f"[{self.platform_name}] ⭐ 上传相关请求: {method} {domain} ({url[:120]})", flush=True)
  1103. def _on_response(resp):
  1104. url = resp.url
  1105. if any(kw in url for kw in ["upload", "cos.", "myqcloud", "finder", "video", "media"]):
  1106. from urllib.parse import urlparse
  1107. domain = urlparse(url).netloc
  1108. status = resp.status
  1109. if status >= 400 or status == 0:
  1110. print(f"[{self.platform_name}] ❌ 上传响应失败: {status} {domain} ({url[:120]})", flush=True)
  1111. else:
  1112. print(f"[{self.platform_name}] ✅ 上传响应: {status} {domain}", flush=True)
  1113. def _on_request_failed(req):
  1114. url = req.url
  1115. if any(kw in url for kw in ["upload", "cos.", "myqcloud", "finder", "video", "media"]):
  1116. from urllib.parse import urlparse
  1117. domain = urlparse(url).netloc
  1118. failure = req.failure
  1119. print(f"[{self.platform_name}] ❌ 上传请求失败: {domain} failure={failure} ({url[:120]})", flush=True)
  1120. self.page.on("request", _on_request)
  1121. self.page.on("response", _on_response)
  1122. self.page.on("requestfailed", _on_request_failed)
  1123. print(f"[{self.platform_name}] 已启用上传网络请求监控", flush=True)
  1124. # 代理模式下增加重试次数和总时长,应对「网络出错」等不稳定情况
  1125. using_proxy = isinstance(
  1126. getattr(self, "proxy_config", None), dict
  1127. ) and self.proxy_config.get("server")
  1128. max_upload_error_retries = 20 if using_proxy else 5
  1129. loop_count = 300 if using_proxy else 200 # 代理模式约 15 分钟
  1130. if using_proxy:
  1131. print(
  1132. f"[{self.platform_name}] 代理模式:上传重试上限 {max_upload_error_retries} 次,总等待约 15 分钟",
  1133. flush=True,
  1134. )
  1135. upload_completed = False
  1136. upload_error_retry_count = 0
  1137. for i in range(loop_count):
  1138. try:
  1139. # 每 30 秒打印一次进度,避免“卡住”的错觉
  1140. if i > 0 and i % 10 == 0:
  1141. elapsed_s = i * 3
  1142. print(
  1143. f"[{self.platform_name}] 仍在等待上传完成... ({elapsed_s}s)",
  1144. flush=True,
  1145. )
  1146. # 每 60 秒保存一次截图,方便排查上传卡住问题
  1147. if i % 20 == 0:
  1148. try:
  1149. ss_path = await self.save_screenshot_to_file(
  1150. filename_prefix=f"weixin_upload_waiting_{elapsed_s}s"
  1151. )
  1152. if ss_path:
  1153. print(f"[{self.platform_name}] 等待中截图已保存: {ss_path}", flush=True)
  1154. except Exception as ss_err:
  1155. print(f"[{self.platform_name}] 等待中截图失败: {ss_err}", flush=True)
  1156. # 尝试多种选择器定位“发表”按钮(页面结构可能变化)
  1157. publish_btn = None
  1158. for sel in [
  1159. 'div.form-btns button:has-text("发表")',
  1160. 'button:has-text("发表")',
  1161. 'button:has-text("立即发表")',
  1162. '[role="button"]:has-text("发表")',
  1163. ]:
  1164. try:
  1165. el = self.page.locator(sel).first
  1166. if await el.count() > 0 and await el.is_visible():
  1167. publish_btn = el
  1168. break
  1169. except Exception:
  1170. continue
  1171. if publish_btn:
  1172. btn_class = await publish_btn.get_attribute("class") or ""
  1173. if (
  1174. "weui-desktop-btn_disabled" not in btn_class
  1175. and "disabled" not in btn_class.lower()
  1176. ):
  1177. print(f"[{self.platform_name}] 视频上传完毕")
  1178. # 上传封面
  1179. self.report_progress(50, "正在上传封面...")
  1180. await self.upload_cover(params.cover_path)
  1181. upload_completed = True
  1182. break
  1183. # 检查上传错误(div.status-msg.error,含「网络出错了,请稍候上传」)
  1184. has_error = await self.page.locator("div.status-msg.error").count() > 0
  1185. has_delete_btn = (
  1186. await self.page.locator(
  1187. 'div.media-status-content div.tag-inner:has-text("删除")'
  1188. ).count()
  1189. > 0
  1190. )
  1191. if has_error and has_delete_btn:
  1192. upload_error_retry_count += 1
  1193. print(
  1194. f"[{self.platform_name}] 检测到上传错误,第 {upload_error_retry_count} 次重试",
  1195. flush=True,
  1196. )
  1197. if upload_error_retry_count >= max_upload_error_retries:
  1198. print(
  1199. f"[{self.platform_name}] 上传错误重试已达 {max_upload_error_retries} 次,放弃",
  1200. flush=True,
  1201. )
  1202. break
  1203. # 代理模式下,第 6 次失败时尝试整页刷新以重建代理连接
  1204. if using_proxy and upload_error_retry_count == 6:
  1205. print(
  1206. f"[{self.platform_name}] 代理模式:尝试整页刷新以重建连接...",
  1207. flush=True,
  1208. )
  1209. try:
  1210. await self.page.reload(
  1211. wait_until="domcontentloaded", timeout=60000
  1212. )
  1213. await asyncio.sleep(8)
  1214. await self.page.wait_for_selector(
  1215. "div.upload-content, input[type='file']", timeout=20000
  1216. )
  1217. upload_el = self.page.locator("div.upload-content").first
  1218. if (
  1219. await upload_el.count() > 0
  1220. and await upload_el.is_visible()
  1221. ):
  1222. async with self.page.expect_file_chooser(
  1223. timeout=10000
  1224. ) as fc:
  1225. await upload_el.click()
  1226. chooser = await fc.value
  1227. await chooser.set_files(params.video_path)
  1228. print(
  1229. f"[{self.platform_name}] 刷新后重新上传成功",
  1230. flush=True,
  1231. )
  1232. else:
  1233. file_input = self.page.locator(
  1234. 'input[type="file"]'
  1235. ).first
  1236. if await file_input.count() > 0:
  1237. await file_input.set_input_files(params.video_path)
  1238. await asyncio.sleep(2)
  1239. await self.add_title_tags(params)
  1240. upload_error_retry_count = 0
  1241. except Exception as e:
  1242. print(
  1243. f"[{self.platform_name}] 整页刷新重传失败: {e}",
  1244. flush=True,
  1245. )
  1246. await self.handle_upload_error(params.video_path)
  1247. else:
  1248. await self.handle_upload_error(params.video_path)
  1249. else:
  1250. upload_error_retry_count = 0 # 无错误时重置计数
  1251. await asyncio.sleep(3)
  1252. except Exception as e:
  1253. print(f"[{self.platform_name}] 等待上传时异常: {e}", flush=True)
  1254. await asyncio.sleep(3)
  1255. # 如果一直没有等到“发表”按钮可用,认为上传失败,直接返回失败结果并附带截图
  1256. if not upload_completed:
  1257. try:
  1258. screenshot_base64 = await self.capture_screenshot()
  1259. except Exception as e:
  1260. print(f"[{self.platform_name}] 截图失败: {e}", flush=True)
  1261. screenshot_base64 = ""
  1262. try:
  1263. ts = int(time.time() * 1000)
  1264. screenshot_dir = os.path.join(
  1265. os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
  1266. "screenshots",
  1267. )
  1268. os.makedirs(screenshot_dir, exist_ok=True)
  1269. err_path = os.path.join(
  1270. screenshot_dir, f"weixin_upload_timeout_{ts}.png"
  1271. )
  1272. await self.page.screenshot(path=err_path, full_page=True)
  1273. print(
  1274. f"[{self.platform_name}] 超时/失败截图已保存: {err_path}",
  1275. flush=True,
  1276. )
  1277. except Exception as e:
  1278. print(f"[{self.platform_name}] 保存失败截图到文件失败: {e}", flush=True)
  1279. page_url = await self.get_page_url()
  1280. return PublishResult(
  1281. success=False,
  1282. platform=self.platform_name,
  1283. error="视频上传失败,请查看截图",
  1284. screenshot_base64=screenshot_base64,
  1285. page_url=page_url,
  1286. status="failed",
  1287. )
  1288. self.report_progress(60, "处理视频设置...")
  1289. # 添加短标题
  1290. try:
  1291. short_title_el = (
  1292. self.page.get_by_text("短标题", exact=True)
  1293. .locator("..")
  1294. .locator("xpath=following-sibling::div")
  1295. .locator('span input[type="text"]')
  1296. )
  1297. if await short_title_el.count():
  1298. short_title = format_short_title(params.title)
  1299. await short_title_el.fill(short_title)
  1300. except:
  1301. pass
  1302. # 定时发布
  1303. if params.publish_date:
  1304. self.report_progress(70, "设置定时发布...")
  1305. await self.set_schedule_time(params.publish_date)
  1306. self.report_progress(80, "正在发布...")
  1307. # 点击发布 - 参考 matrix
  1308. for i in range(30):
  1309. try:
  1310. # 参考 matrix: div.form-btns button:has-text("发表")
  1311. publish_btn = self.page.locator('div.form-btns button:has-text("发表")')
  1312. if await publish_btn.count():
  1313. print(f"[{self.platform_name}] 点击发布按钮...")
  1314. await publish_btn.click()
  1315. # 等待跳转到作品列表页面 - 参考 matrix
  1316. await self.page.wait_for_url(
  1317. "https://channels.weixin.qq.com/platform/post/list", timeout=10000
  1318. )
  1319. self.report_progress(100, "发布成功")
  1320. print(f"[{self.platform_name}] 视频发布成功!")
  1321. screenshot_base64 = await self.capture_screenshot()
  1322. return PublishResult(
  1323. success=True,
  1324. platform=self.platform_name,
  1325. message="发布成功",
  1326. screenshot_base64=screenshot_base64,
  1327. page_url=self.page.url,
  1328. status="success",
  1329. )
  1330. except Exception as e:
  1331. current_url = self.page.url
  1332. if "https://channels.weixin.qq.com/platform/post/list" in current_url:
  1333. self.report_progress(100, "发布成功")
  1334. print(f"[{self.platform_name}] 视频发布成功!")
  1335. screenshot_base64 = await self.capture_screenshot()
  1336. return PublishResult(
  1337. success=True,
  1338. platform=self.platform_name,
  1339. message="发布成功",
  1340. screenshot_base64=screenshot_base64,
  1341. page_url=current_url,
  1342. status="success",
  1343. )
  1344. else:
  1345. print(
  1346. f"[{self.platform_name}] 视频正在发布中... {i + 1}/30, URL: {current_url}"
  1347. )
  1348. await asyncio.sleep(1)
  1349. # 发布超时
  1350. screenshot_base64 = await self.capture_screenshot()
  1351. page_url = await self.get_page_url()
  1352. return PublishResult(
  1353. success=False,
  1354. platform=self.platform_name,
  1355. error="发布超时,请检查发布状态",
  1356. screenshot_base64=screenshot_base64,
  1357. page_url=page_url,
  1358. status="need_action",
  1359. )
  1360. async def _get_works_fallback_dom(self, page_size: int) -> tuple:
  1361. """API 失败时从当前页面 DOM 抓取作品列表(兼容新账号/不同入口)"""
  1362. works: List[WorkItem] = []
  1363. total = 0
  1364. has_more = False
  1365. try:
  1366. for selector in [
  1367. "div.post-feed-item",
  1368. "[class*='post-feed']",
  1369. "[class*='feed-item']",
  1370. "div[class*='post']",
  1371. ]:
  1372. try:
  1373. await self.page.wait_for_selector(selector, timeout=8000)
  1374. break
  1375. except Exception:
  1376. continue
  1377. post_items = self.page.locator("div.post-feed-item")
  1378. item_count = await post_items.count()
  1379. if item_count == 0:
  1380. post_items = self.page.locator("[class*='post-feed']")
  1381. item_count = await post_items.count()
  1382. for i in range(min(item_count, page_size)):
  1383. try:
  1384. item = post_items.nth(i)
  1385. cover_el = item.locator("div.media img.thumb").first
  1386. cover_url = (
  1387. await cover_el.get_attribute("src") or ""
  1388. if await cover_el.count() > 0
  1389. else ""
  1390. )
  1391. if not cover_url:
  1392. cover_el = item.locator("img").first
  1393. cover_url = (
  1394. await cover_el.get_attribute("src") or ""
  1395. if await cover_el.count() > 0
  1396. else ""
  1397. )
  1398. title_el = item.locator("div.post-title").first
  1399. title = (
  1400. (await title_el.text_content() or "").strip()
  1401. if await title_el.count() > 0
  1402. else ""
  1403. )
  1404. time_el = item.locator("div.post-time span").first
  1405. publish_time = (
  1406. (await time_el.text_content() or "").strip()
  1407. if await time_el.count() > 0
  1408. else ""
  1409. )
  1410. play_count = like_count = comment_count = share_count = (
  1411. collect_count
  1412. ) = 0
  1413. data_items = item.locator("div.post-data div.data-item")
  1414. for j in range(await data_items.count()):
  1415. data_item = data_items.nth(j)
  1416. count_text = (
  1417. await data_item.locator("span.count").text_content() or "0"
  1418. ).strip()
  1419. if (
  1420. await data_item.locator(
  1421. "span.weui-icon-outlined-eyes-on"
  1422. ).count()
  1423. > 0
  1424. ):
  1425. play_count = self._parse_count(count_text)
  1426. elif (
  1427. await data_item.locator(
  1428. "span.weui-icon-outlined-like"
  1429. ).count()
  1430. > 0
  1431. ):
  1432. like_count = self._parse_count(count_text)
  1433. elif (
  1434. await data_item.locator(
  1435. "span.weui-icon-outlined-comment"
  1436. ).count()
  1437. > 0
  1438. ):
  1439. comment_count = self._parse_count(count_text)
  1440. elif (
  1441. await data_item.locator(
  1442. "use[xlink\\:href='#icon-share']"
  1443. ).count()
  1444. > 0
  1445. ):
  1446. share_count = self._parse_count(count_text)
  1447. elif (
  1448. await data_item.locator(
  1449. "use[xlink\\:href='#icon-thumb']"
  1450. ).count()
  1451. > 0
  1452. ):
  1453. collect_count = self._parse_count(count_text)
  1454. work_id = f"weixin_{i}_{hash(title)}_{hash(publish_time)}"
  1455. works.append(
  1456. WorkItem(
  1457. work_id=work_id,
  1458. title=title or "无标题",
  1459. cover_url=cover_url,
  1460. duration=0,
  1461. status="published",
  1462. publish_time=publish_time,
  1463. play_count=play_count,
  1464. like_count=like_count,
  1465. comment_count=comment_count,
  1466. share_count=share_count,
  1467. collect_count=collect_count,
  1468. )
  1469. )
  1470. except Exception as e:
  1471. print(
  1472. f"[{self.platform_name}] DOM 解析作品 {i} 失败: {e}", flush=True
  1473. )
  1474. continue
  1475. total = len(works)
  1476. has_more = item_count > page_size
  1477. print(f"[{self.platform_name}] DOM 回退获取 {len(works)} 条", flush=True)
  1478. except Exception as e:
  1479. print(f"[{self.platform_name}] DOM 回退失败: {e}", flush=True)
  1480. return (works, total, has_more, "")
  1481. async def get_works(
  1482. self, cookies: str, page: int = 0, page_size: int = 20
  1483. ) -> WorksResult:
  1484. """获取视频号作品列表(调用 post_list 接口)
  1485. page: 页码从 0 开始,或上一页返回的 rawKeyBuff/lastBuff 字符串
  1486. """
  1487. # 分页:首页 currentPage=1/rawKeyBuff=null,下一页用 currentPage 递增或 rawKeyBuff
  1488. if page is None or page == "" or (isinstance(page, int) and page == 0):
  1489. current_page = 1
  1490. raw_key_buff = None
  1491. elif isinstance(page, int):
  1492. current_page = page + 1
  1493. raw_key_buff = None
  1494. else:
  1495. current_page = 1
  1496. raw_key_buff = str(page)
  1497. ts_ms = str(int(time.time() * 1000))
  1498. print(f"\n{'=' * 60}")
  1499. print(
  1500. f"[{self.platform_name}] 获取作品列表 currentPage={current_page}, pageSize={page_size}, rawKeyBuff={raw_key_buff[:40] if raw_key_buff else 'null'}..."
  1501. )
  1502. print(f"{'=' * 60}")
  1503. works: List[WorkItem] = []
  1504. total = 0
  1505. has_more = False
  1506. next_page = ""
  1507. try:
  1508. await self.init_browser()
  1509. cookie_list = self.parse_cookies(cookies)
  1510. await self.set_cookies(cookie_list)
  1511. if not self.page:
  1512. raise Exception("Page not initialized")
  1513. await self.page.goto(
  1514. "https://channels.weixin.qq.com/platform/post/list", timeout=30000
  1515. )
  1516. await asyncio.sleep(3)
  1517. current_url = self.page.url
  1518. if "login" in current_url:
  1519. raise Exception("Cookie 已过期,请重新登录")
  1520. api_url = "https://channels.weixin.qq.com/micro/content/cgi-bin/mmfinderassistant-bin/post/post_list"
  1521. req_body = {
  1522. "pageSize": page_size,
  1523. "currentPage": current_page,
  1524. "userpageType": 11,
  1525. "stickyOrder": True,
  1526. "timestamp": ts_ms,
  1527. "_log_finder_uin": "",
  1528. "_log_finder_id": "",
  1529. "rawKeyBuff": raw_key_buff,
  1530. "pluginSessionId": None,
  1531. "scene": 7,
  1532. "reqScene": 7,
  1533. }
  1534. body_str = json.dumps(req_body)
  1535. response = await self.page.evaluate(
  1536. """
  1537. async ([url, bodyStr]) => {
  1538. try {
  1539. const resp = await fetch(url, {
  1540. method: 'POST',
  1541. credentials: 'include',
  1542. headers: {
  1543. 'Content-Type': 'application/json',
  1544. 'Accept': '*/*',
  1545. 'Referer': 'https://channels.weixin.qq.com/platform/post/list'
  1546. },
  1547. body: bodyStr
  1548. });
  1549. return await resp.json();
  1550. } catch (e) {
  1551. return { error: e.toString() };
  1552. }
  1553. }
  1554. """,
  1555. [api_url, body_str],
  1556. )
  1557. is_first_page = current_page == 1 and raw_key_buff is None
  1558. if response.get("error"):
  1559. print(
  1560. f"[{self.platform_name}] API 请求失败: {response.get('error')}",
  1561. flush=True,
  1562. )
  1563. if is_first_page:
  1564. (
  1565. works,
  1566. total,
  1567. has_more,
  1568. next_page,
  1569. ) = await self._get_works_fallback_dom(page_size)
  1570. if works:
  1571. return WorksResult(
  1572. success=True,
  1573. platform=self.platform_name,
  1574. works=works,
  1575. total=total,
  1576. has_more=has_more,
  1577. next_page=next_page,
  1578. )
  1579. return WorksResult(
  1580. success=False,
  1581. platform=self.platform_name,
  1582. error=response.get("error", "API 请求失败"),
  1583. )
  1584. err_code = response.get("errCode", -1)
  1585. if err_code != 0:
  1586. err_msg = response.get("errMsg", "unknown")
  1587. print(
  1588. f"[{self.platform_name}] API errCode={err_code}, errMsg={err_msg}, 完整响应(前800字): {json.dumps(response, ensure_ascii=False)[:800]}",
  1589. flush=True,
  1590. )
  1591. if is_first_page:
  1592. (
  1593. works,
  1594. total,
  1595. has_more,
  1596. next_page,
  1597. ) = await self._get_works_fallback_dom(page_size)
  1598. if works:
  1599. return WorksResult(
  1600. success=True,
  1601. platform=self.platform_name,
  1602. works=works,
  1603. total=total,
  1604. has_more=has_more,
  1605. next_page=next_page,
  1606. )
  1607. return WorksResult(
  1608. success=False,
  1609. platform=self.platform_name,
  1610. error=f"errCode={err_code}, errMsg={err_msg}",
  1611. )
  1612. data = response.get("data") or {}
  1613. raw_list = data.get("list") or []
  1614. total = int(data.get("totalCount") or 0)
  1615. has_more = bool(data.get("continueFlag", False))
  1616. next_page = (data.get("lastBuff") or "").strip()
  1617. print(
  1618. f"[{self.platform_name}] API 响应: list_len={len(raw_list)}, totalCount={total}, continueFlag={has_more}, lastBuff={next_page[:50] if next_page else ''}..."
  1619. )
  1620. if is_first_page and len(raw_list) == 0:
  1621. works_fb, total_fb, has_more_fb, _ = await self._get_works_fallback_dom(
  1622. page_size
  1623. )
  1624. if works_fb:
  1625. return WorksResult(
  1626. success=True,
  1627. platform=self.platform_name,
  1628. works=works_fb,
  1629. total=total_fb,
  1630. has_more=has_more_fb,
  1631. next_page="",
  1632. )
  1633. for item in raw_list:
  1634. try:
  1635. # 存 works.platform_video_id 统一用 post_list 接口回参中的 exportId(如 export/xxx)
  1636. work_id = str(
  1637. item.get("exportId")
  1638. or item.get("objectId")
  1639. or item.get("id")
  1640. or ""
  1641. ).strip()
  1642. if not work_id:
  1643. work_id = f"weixin_{hash(item.get('createTime', 0))}_{hash(item.get('desc', {}).get('description', ''))}"
  1644. desc = item.get("desc") or {}
  1645. title = (desc.get("description") or "").strip() or "无标题"
  1646. cover_url = ""
  1647. duration = 0
  1648. media_list = desc.get("media") or []
  1649. if media_list and isinstance(media_list[0], dict):
  1650. m = media_list[0]
  1651. cover_url = (
  1652. m.get("coverUrl") or m.get("thumbUrl") or ""
  1653. ).strip()
  1654. duration = int(m.get("videoPlayLen") or 0)
  1655. create_ts = item.get("createTime") or 0
  1656. if isinstance(create_ts, (int, float)) and create_ts:
  1657. publish_time = datetime.fromtimestamp(create_ts).strftime(
  1658. "%Y-%m-%d %H:%M:%S"
  1659. )
  1660. else:
  1661. publish_time = str(create_ts) if create_ts else ""
  1662. # likeCount=推荐, favCount=点赞
  1663. read_count = int(item.get("readCount") or 0)
  1664. like_count = int(item.get("favCount") or 0)
  1665. comment_count = int(item.get("commentCount") or 0)
  1666. forward_count = int(item.get("forwardCount") or 0)
  1667. works.append(
  1668. WorkItem(
  1669. work_id=work_id,
  1670. title=title,
  1671. cover_url=cover_url,
  1672. duration=duration,
  1673. status="published",
  1674. publish_time=publish_time,
  1675. play_count=read_count,
  1676. like_count=like_count,
  1677. comment_count=comment_count,
  1678. share_count=forward_count,
  1679. collect_count=0,
  1680. )
  1681. )
  1682. except Exception as e:
  1683. print(f"[{self.platform_name}] 解析作品项失败: {e}", flush=True)
  1684. continue
  1685. if total == 0 and works:
  1686. total = len(works)
  1687. print(
  1688. f"[{self.platform_name}] 本页获取 {len(works)} 条,totalCount={total}, next_page={bool(next_page)}"
  1689. )
  1690. except Exception as e:
  1691. import traceback
  1692. traceback.print_exc()
  1693. return WorksResult(success=False, platform=self.platform_name, error=str(e))
  1694. return WorksResult(
  1695. success=True,
  1696. platform=self.platform_name,
  1697. works=works,
  1698. total=total,
  1699. has_more=has_more,
  1700. next_page=next_page,
  1701. )
  1702. async def sync_work_daily_stats_via_browser(
  1703. self, cookies: str, work_id: int, platform_video_id: str
  1704. ) -> dict:
  1705. """
  1706. 通过浏览器自动化同步单个作品的每日数据到 work_day_statistics。
  1707. 流程:
  1708. 1. 打开 statistic/post 页,点击单篇视频 tab,点击近30天
  1709. 2. 监听 post_list 接口,根据 exportId 匹配 platform_video_id 得到 objectId
  1710. 3. 找到 data-row-key=objectId 的行,点击「查看」
  1711. 4. 进入详情页,点击数据详情的近30天,点击下载表格
  1712. 5. 解析 CSV 并返回 statistics 列表(供 Node 保存)
  1713. """
  1714. import csv
  1715. import tempfile
  1716. from pathlib import Path
  1717. result = {
  1718. "success": False,
  1719. "error": "",
  1720. "statistics": [],
  1721. "inserted": 0,
  1722. "updated": 0,
  1723. }
  1724. post_list_data = {"list": []}
  1725. async def handle_response(response):
  1726. try:
  1727. if (
  1728. "statistic/post_list" in response.url
  1729. and response.request.method == "POST"
  1730. ):
  1731. try:
  1732. body = await response.json()
  1733. if body.get("errCode") == 0 and body.get("data"):
  1734. post_list_data["list"] = body.get("data", {}).get(
  1735. "list", []
  1736. )
  1737. except Exception:
  1738. pass
  1739. except Exception:
  1740. pass
  1741. try:
  1742. await self.init_browser()
  1743. cookie_list = self.parse_cookies(cookies)
  1744. await self.set_cookies(cookie_list)
  1745. if not self.page:
  1746. raise Exception("Page not initialized")
  1747. self.page.on("response", handle_response)
  1748. # 1. 打开数据分析-作品数据页
  1749. print(f"[{self.platform_name}] 打开数据分析页...", flush=True)
  1750. await self.page.goto(
  1751. "https://channels.weixin.qq.com/platform/statistic/post", timeout=30000
  1752. )
  1753. if not self.headless:
  1754. print(
  1755. f"[{self.platform_name}] 浏览器已打开,请将窗口置于前台观看操作(等待 5 秒)...",
  1756. flush=True,
  1757. )
  1758. await asyncio.sleep(5)
  1759. else:
  1760. await asyncio.sleep(3)
  1761. if "login" in self.page.url:
  1762. raise Exception("Cookie 已过期,请重新登录")
  1763. # 2. 点击「单篇视频」tab
  1764. tab_sel = "div.weui-desktop-tab__navs ul li:nth-child(2) a"
  1765. try:
  1766. await self.page.wait_for_selector(tab_sel, timeout=8000)
  1767. await self.page.click(tab_sel)
  1768. except Exception:
  1769. tab_sel = "a:has-text('单篇视频')"
  1770. await self.page.click(tab_sel)
  1771. await asyncio.sleep(2)
  1772. # 3. 点击「近30天」(单篇视频页的日期范围筛选)
  1773. # 选择器优先级:精确匹配单篇视频区域内的日期范围 radio 组
  1774. radio_selectors = [
  1775. "div.post-single-wrap div.weui-desktop-radio-group.radio-group label:has-text('近30天')",
  1776. "div.post-single-wrap div.filter-wrap div.weui-desktop-radio-group label:nth-child(2)",
  1777. "div.post-single-wrap div.card-body div.filter-wrap div:nth-child(2) label:nth-child(2)",
  1778. "div.post-single-wrap label:has-text('近30天')",
  1779. "div.weui-desktop-radio-group label:has-text('近30天')",
  1780. "label:has-text('近30天')",
  1781. ]
  1782. clicked = False
  1783. for sel in radio_selectors:
  1784. try:
  1785. el = self.page.locator(sel).first
  1786. if await el.count() > 0:
  1787. await el.click()
  1788. clicked = True
  1789. print(
  1790. f"[{self.platform_name}] 已点击近30天按钮 (selector: {sel[:50]}...)",
  1791. flush=True,
  1792. )
  1793. break
  1794. except Exception as e:
  1795. continue
  1796. if not clicked:
  1797. print(
  1798. f"[{self.platform_name}] 警告: 未找到近30天按钮,继续尝试...",
  1799. flush=True,
  1800. )
  1801. await asyncio.sleep(3)
  1802. # 4. 从 post_list 响应中找 exportId -> objectId
  1803. export_id_to_object = {}
  1804. for item in post_list_data["list"]:
  1805. eid = (item.get("exportId") or "").strip()
  1806. oid = (item.get("objectId") or "").strip()
  1807. if eid and oid:
  1808. export_id_to_object[eid] = oid
  1809. object_id = export_id_to_object.get(
  1810. platform_video_id
  1811. ) or export_id_to_object.get(platform_video_id.strip())
  1812. if not object_id:
  1813. # 尝试宽松匹配(platform_video_id 可能带前缀)
  1814. for eid, oid in export_id_to_object.items():
  1815. if platform_video_id in eid or eid in platform_video_id:
  1816. object_id = oid
  1817. break
  1818. if not object_id:
  1819. result["error"] = (
  1820. f"未在 post_list 中匹配到 exportId={platform_video_id}"
  1821. )
  1822. print(f"[{self.platform_name}] {result['error']}", flush=True)
  1823. return result
  1824. # 5. 找到 data-row-key=objectId 的行,点击「查看」
  1825. view_btn = self.page.locator(
  1826. f'tr[data-row-key="{object_id}"] a.detail-wrap, tr[data-row-key="{object_id}"] a:has-text("查看")'
  1827. )
  1828. try:
  1829. await view_btn.first.wait_for(timeout=5000)
  1830. await view_btn.first.click()
  1831. except Exception as e:
  1832. view_btn = self.page.locator(f'tr[data-row-key="{object_id}"] a')
  1833. if await view_btn.count() > 0:
  1834. await view_btn.first.click()
  1835. else:
  1836. raise Exception(f"未找到 objectId={object_id} 的查看按钮: {e}")
  1837. await asyncio.sleep(3)
  1838. # 6. 详情页:点击数据详情的「近30天」,再点击「下载表格」
  1839. detail_radio = (
  1840. "div.post-statistic-common div.filter-wrap label:nth-child(2)"
  1841. )
  1842. for sel in [detail_radio, "div.main-body label:has-text('近30天')"]:
  1843. try:
  1844. el = self.page.locator(sel).first
  1845. if await el.count() > 0:
  1846. await el.click()
  1847. break
  1848. except Exception:
  1849. continue
  1850. await asyncio.sleep(2)
  1851. # 保存到 server/tmp 目录
  1852. download_dir = Path(__file__).resolve().parent.parent.parent / "tmp"
  1853. download_dir.mkdir(parents=True, exist_ok=True)
  1854. async with self.page.expect_download(timeout=15000) as download_info:
  1855. download_btn = self.page.locator(
  1856. "div.post-statistic-common div.filter-extra a, a:has-text('下载表格')"
  1857. )
  1858. if await download_btn.count() == 0:
  1859. raise Exception("未找到「下载表格」按钮")
  1860. await download_btn.first.click()
  1861. download = await download_info.value
  1862. save_path = download_dir / f"work_{work_id}_{int(time.time())}.csv"
  1863. await download.save_as(save_path)
  1864. # 7. 解析 CSV -> statistics
  1865. stats_list = []
  1866. with open(save_path, "r", encoding="utf-8-sig", errors="replace") as f:
  1867. reader = csv.DictReader(f)
  1868. rows = list(reader)
  1869. for row in rows:
  1870. date_val = (
  1871. row.get("日期")
  1872. or row.get("date")
  1873. or row.get("时间")
  1874. or row.get("时间周期", "")
  1875. ).strip()
  1876. if not date_val:
  1877. continue
  1878. dt = None
  1879. norm = (
  1880. date_val[:10]
  1881. .replace("年", "-")
  1882. .replace("月", "-")
  1883. .replace("日", "-")
  1884. .replace("/", "-")
  1885. )
  1886. if len(norm) >= 8 and norm.count("-") >= 2:
  1887. parts = norm.split("-")
  1888. if len(parts) == 3:
  1889. try:
  1890. y, m, d = int(parts[0]), int(parts[1]), int(parts[2])
  1891. if 2000 <= y <= 2100 and 1 <= m <= 12 and 1 <= d <= 31:
  1892. dt = datetime(y, m, d)
  1893. except (ValueError, IndexError):
  1894. pass
  1895. if not dt:
  1896. for fmt in ["%Y-%m-%d", "%Y/%m/%d", "%m/%d/%Y"]:
  1897. try:
  1898. dt = datetime.strptime(
  1899. (date_val.split()[0] if date_val else "")[:10], fmt
  1900. )
  1901. break
  1902. except (ValueError, IndexError):
  1903. dt = None
  1904. if not dt:
  1905. continue
  1906. rec_date = dt.strftime("%Y-%m-%d")
  1907. play = self._parse_count(
  1908. row.get("播放", "")
  1909. or row.get("播放量", "")
  1910. or row.get("play_count", "0")
  1911. )
  1912. like = self._parse_count(
  1913. row.get("点赞", "") or row.get("like_count", "0")
  1914. )
  1915. comment = self._parse_count(
  1916. row.get("评论", "") or row.get("comment_count", "0")
  1917. )
  1918. share = self._parse_count(
  1919. row.get("分享", "") or row.get("share_count", "0")
  1920. )
  1921. collect = self._parse_count(
  1922. row.get("收藏", "") or row.get("collect_count", "0")
  1923. )
  1924. comp_rate = (
  1925. row.get("完播率", "") or row.get("completion_rate", "0")
  1926. ).strip().rstrip("%") or "0"
  1927. avg_dur = (
  1928. row.get("平均播放时长", "") or row.get("avg_watch_duration", "0")
  1929. ).strip()
  1930. stats_list.append(
  1931. {
  1932. "work_id": work_id,
  1933. "record_date": rec_date,
  1934. "play_count": play,
  1935. "like_count": like,
  1936. "comment_count": comment,
  1937. "share_count": share,
  1938. "collect_count": collect,
  1939. "completion_rate": comp_rate,
  1940. "avg_watch_duration": avg_dur,
  1941. }
  1942. )
  1943. result["statistics"] = stats_list
  1944. result["success"] = True
  1945. try:
  1946. os.remove(save_path)
  1947. except Exception:
  1948. pass
  1949. except Exception as e:
  1950. import traceback
  1951. traceback.print_exc()
  1952. result["error"] = str(e)
  1953. finally:
  1954. try:
  1955. await self.close_browser()
  1956. except Exception:
  1957. pass
  1958. return result
  1959. async def sync_account_works_daily_stats_via_browser(
  1960. self,
  1961. cookies: str,
  1962. works: List[dict],
  1963. save_fn=None,
  1964. update_works_fn=None,
  1965. headless: bool = True,
  1966. ) -> dict:
  1967. """
  1968. 纯浏览器批量同步账号下所有作品(在库的)的每日数据到 work_day_statistics。
  1969. 流程:
  1970. 1. 打开 statistic/post → 点击单篇视频 → 点击近30天
  1971. 2. 【首次】监听 post_list 接口 → 解析响应更新 works 表 yesterday_* 字段
  1972. 3. 监听 post_list 获取 exportId->objectId 映射
  1973. 4. 遍历 post_list 的每一条:
  1974. - 若 exportId 在 works 的 platform_video_id 中无匹配 → 跳过
  1975. - 若匹配 → 找到 data-row-key=objectId 的行,点击「查看」
  1976. - 详情页:默认近7天,直接监听 feed_aggreagate_data_by_tab_type 接口
  1977. - 从「全部」tab 解析 browse/like/comment/forward/fav/follow,日期从昨天往前推
  1978. - 通过 save_fn 存入 work_day_statistics
  1979. - 返回列表页,继续下一条
  1980. works: [{"work_id": int, "platform_video_id": str}, ...]
  1981. save_fn: (stats_list: List[dict]) -> {inserted, updated},由调用方传入,用于调用 Node batch-dates
  1982. update_works_fn: (updates: List[dict]) -> {updated},由调用方传入,用于将 post_list 解析数据更新到 works 表(仅首次调用)
  1983. """
  1984. from pathlib import Path
  1985. from datetime import timedelta
  1986. result = {
  1987. "success": True,
  1988. "error": "",
  1989. "total_processed": 0,
  1990. "total_skipped": 0,
  1991. "inserted": 0,
  1992. "updated": 0,
  1993. "works_updated": 0,
  1994. }
  1995. # platform_video_id(exportId) -> work_id
  1996. export_id_to_work = {}
  1997. for w in works:
  1998. pvid = (
  1999. w.get("platform_video_id") or w.get("platformVideoId") or ""
  2000. ).strip()
  2001. wid = w.get("work_id") or w.get("workId")
  2002. if pvid and wid is not None:
  2003. export_id_to_work[pvid] = int(wid)
  2004. # 兼容可能带/不带前缀(如 export/xxx vs xxx)
  2005. if "/" in pvid:
  2006. export_id_to_work[pvid.split("/")[-1]] = int(wid)
  2007. post_list_data = {"list": []}
  2008. feed_aggreagate_data = {"body": None}
  2009. async def handle_response(response):
  2010. try:
  2011. url = response.url
  2012. if "statistic/post_list" in url:
  2013. try:
  2014. body = await response.json()
  2015. if body.get("errCode") == 0 and body.get("data"):
  2016. post_list_data["list"] = body.get("data", {}).get(
  2017. "list", []
  2018. )
  2019. except Exception:
  2020. pass
  2021. elif "feed_aggreagate_data_by_tab_type" in url:
  2022. try:
  2023. body = await response.json()
  2024. if body.get("errCode") == 0 and body.get("data"):
  2025. feed_aggreagate_data["body"] = body
  2026. except Exception:
  2027. pass
  2028. except Exception:
  2029. pass
  2030. try:
  2031. await self.init_browser()
  2032. cookie_list = self.parse_cookies(cookies)
  2033. await self.set_cookies(cookie_list)
  2034. if not self.page:
  2035. raise Exception("Page not initialized")
  2036. self.page.on("response", handle_response)
  2037. # 1. 打开数据分析-作品数据页
  2038. print(f"[{self.platform_name}] 打开数据分析页...", flush=True)
  2039. await self.page.goto(
  2040. "https://channels.weixin.qq.com/platform/statistic/post", timeout=30000
  2041. )
  2042. if not headless:
  2043. print(
  2044. f"[{self.platform_name}] 浏览器已打开,请将窗口置于前台观看操作(等待 5 秒)...",
  2045. flush=True,
  2046. )
  2047. await asyncio.sleep(5)
  2048. else:
  2049. await asyncio.sleep(3)
  2050. if "login" in self.page.url:
  2051. raise Exception("Cookie 已过期,请重新登录")
  2052. # 2. 点击「单篇视频」tab
  2053. tab_sel = "div.weui-desktop-tab__navs ul li:nth-child(2) a"
  2054. try:
  2055. await self.page.wait_for_selector(tab_sel, timeout=8000)
  2056. await self.page.click(tab_sel)
  2057. except Exception:
  2058. tab_sel = "a:has-text('单篇视频')"
  2059. await self.page.click(tab_sel)
  2060. await asyncio.sleep(2)
  2061. # 3. 点击「近30天」前清空 list,点击后等待 handler 捕获带 fullPlayRate 的 post_list
  2062. post_list_data["list"] = []
  2063. radio_selectors = [
  2064. "div.post-single-wrap div.weui-desktop-radio-group.radio-group label:has-text('近30天')",
  2065. "div.post-single-wrap div.filter-wrap div.weui-desktop-radio-group label:nth-child(2)",
  2066. "div.post-single-wrap label:has-text('近30天')",
  2067. "div.weui-desktop-radio-group label:has-text('近30天')",
  2068. "label:has-text('近30天')",
  2069. ]
  2070. clicked = False
  2071. for sel in radio_selectors:
  2072. try:
  2073. el = self.page.locator(sel).first
  2074. if await el.count() > 0:
  2075. await el.click()
  2076. clicked = True
  2077. print(
  2078. f"[{self.platform_name}] 已点击近30天 (selector: {sel[:40]}...)",
  2079. flush=True,
  2080. )
  2081. break
  2082. except Exception:
  2083. continue
  2084. if not clicked:
  2085. print(f"[{self.platform_name}] 警告: 未找到近30天按钮", flush=True)
  2086. await asyncio.sleep(5)
  2087. # 4. 从 post_list 获取列表
  2088. items = post_list_data["list"]
  2089. if not items:
  2090. result["error"] = "未监听到 post_list 或列表为空"
  2091. print(f"[{self.platform_name}] {result['error']}", flush=True)
  2092. return result
  2093. # 4.5 【仅首次】从 post_list 接口响应解析数据 → 更新 works 表(不再下载 CSV)
  2094. # post_list 返回字段映射: readCount->播放量, likeCount->点赞, commentCount->评论, forwardCount->分享,
  2095. # fullPlayRate->完播率(0-1小数), avgPlayTimeSec->平均播放时长(秒), exportId->匹配 work_id
  2096. if update_works_fn and items:
  2097. try:
  2098. updates = []
  2099. for it in items:
  2100. eid = (it.get("exportId") or "").strip()
  2101. if not eid:
  2102. continue
  2103. work_id = export_id_to_work.get(eid)
  2104. if work_id is None:
  2105. for k, v in export_id_to_work.items():
  2106. if eid in k or k in eid:
  2107. work_id = v
  2108. break
  2109. if work_id is None:
  2110. continue
  2111. # likeCount=推荐, favCount=点赞
  2112. read_count = int(it.get("readCount") or 0)
  2113. recommend_count = int(it.get("likeCount") or 0)
  2114. like_count = int(it.get("favCount") or 0)
  2115. comment_count = int(it.get("commentCount") or 0)
  2116. forward_count = int(it.get("forwardCount") or 0)
  2117. follow_count = int(it.get("followCount") or 0)
  2118. full_play_rate = it.get("fullPlayRate")
  2119. if full_play_rate is not None:
  2120. comp_rate = f"{float(full_play_rate) * 100:.2f}%"
  2121. else:
  2122. comp_rate = "0"
  2123. avg_sec = it.get("avgPlayTimeSec")
  2124. if avg_sec is not None:
  2125. avg_dur = f"{float(avg_sec):.2f}秒"
  2126. else:
  2127. avg_dur = "0"
  2128. updates.append(
  2129. {
  2130. "work_id": work_id,
  2131. "yesterday_play_count": read_count,
  2132. "yesterday_like_count": like_count,
  2133. "yesterday_recommend_count": recommend_count,
  2134. "yesterday_comment_count": comment_count,
  2135. "yesterday_share_count": forward_count,
  2136. "yesterday_follow_count": follow_count,
  2137. "yesterday_completion_rate": comp_rate,
  2138. "yesterday_avg_watch_duration": avg_dur,
  2139. }
  2140. )
  2141. if updates:
  2142. try:
  2143. save_result = update_works_fn(updates)
  2144. result["works_updated"] = save_result.get("updated", 0)
  2145. except Exception as api_err:
  2146. import traceback
  2147. traceback.print_exc()
  2148. except Exception as e:
  2149. import traceback
  2150. traceback.print_exc()
  2151. print(
  2152. f"[{self.platform_name}] 解析 post_list 更新 works 失败: {e}",
  2153. flush=True,
  2154. )
  2155. # 辅助:点击单篇视频 + 近30天,恢复列表视图(go_back 后会回到全部视频页)
  2156. async def ensure_single_video_near30():
  2157. tab_sel = "div.weui-desktop-tab__navs ul li:nth-child(2) a"
  2158. try:
  2159. await self.page.wait_for_selector(tab_sel, timeout=8000)
  2160. await self.page.click(tab_sel)
  2161. except Exception:
  2162. await self.page.click("a:has-text('单篇视频')")
  2163. await asyncio.sleep(2)
  2164. for sel in [
  2165. "div.post-single-wrap div.weui-desktop-radio-group.radio-group label:has-text('近30天')",
  2166. "div.post-single-wrap label:has-text('近30天')",
  2167. "div.weui-desktop-radio-group label:has-text('近30天')",
  2168. "label:has-text('近30天')",
  2169. ]:
  2170. try:
  2171. el = self.page.locator(sel).first
  2172. if await el.count() > 0:
  2173. await el.click()
  2174. break
  2175. except Exception:
  2176. continue
  2177. await asyncio.sleep(3)
  2178. # 5. 遍历每一条,按 exportId 匹配作品
  2179. processed_export_ids = set()
  2180. for idx, item in enumerate(items):
  2181. eid = (item.get("exportId") or "").strip()
  2182. oid = (item.get("objectId") or "").strip()
  2183. if not oid:
  2184. continue
  2185. # 已处理过的跳过(理论上循环顺序即处理顺序,此处做双重保险)
  2186. if eid in processed_export_ids:
  2187. print(
  2188. f"[{self.platform_name}] 跳过 [{idx + 1}] exportId={eid} (已处理)",
  2189. flush=True,
  2190. )
  2191. continue
  2192. # go_back 后回到全部视频页,需重新点击单篇视频+近30天
  2193. if idx > 0:
  2194. await ensure_single_video_near30()
  2195. # 匹配 work_id
  2196. work_id = export_id_to_work.get(eid)
  2197. if work_id is None:
  2198. for k, v in export_id_to_work.items():
  2199. if eid in k or k in eid:
  2200. work_id = v
  2201. break
  2202. if work_id is None:
  2203. result["total_skipped"] += 1
  2204. print(
  2205. f"[{self.platform_name}] 跳过 [{idx + 1}] exportId={eid} (库中无对应作品)",
  2206. flush=True,
  2207. )
  2208. continue
  2209. # 点击「查看」:Ant Design 表格 tr[data-row-key] > td > div.slot-wrap > a.detail-wrap
  2210. # 操作列可能在 ant-table-fixed-right 内,优先尝试
  2211. view_selectors = [
  2212. f'div.ant-table-fixed-right tr[data-row-key="{oid}"] a.detail-wrap',
  2213. f'tr[data-row-key="{oid}"] a.detail-wrap',
  2214. f'tr[data-row-key="{oid}"] td a.detail-wrap',
  2215. f'tr[data-row-key="{oid}"] a:has-text("查看")',
  2216. f'tr[data-row-key="{oid}"] a',
  2217. ]
  2218. clicked = False
  2219. for sel in view_selectors:
  2220. view_btn = self.page.locator(sel)
  2221. if await view_btn.count() > 0:
  2222. try:
  2223. await view_btn.first.wait_for(timeout=3000)
  2224. await view_btn.first.click()
  2225. clicked = True
  2226. print(
  2227. f"[{self.platform_name}] 已点击查看 (selector: {sel[:40]}...)",
  2228. flush=True,
  2229. )
  2230. break
  2231. except Exception as e:
  2232. continue
  2233. if not clicked:
  2234. print(
  2235. f"[{self.platform_name}] 未找到 objectId={oid} 的查看按钮",
  2236. flush=True,
  2237. )
  2238. result["total_skipped"] += 1
  2239. continue
  2240. await asyncio.sleep(3)
  2241. # 详情页:默认展示近7天,页面加载时自动请求 feed_aggreagate,不清空 body 避免覆盖已监听到的响应
  2242. await asyncio.sleep(4)
  2243. # 从 feed_aggreagate 响应解析「全部」数据
  2244. # 数据结构: data.dataByFanstype[].dataByTabtype[] 中 tabTypeName="全部" 或 tabType=999
  2245. # 日期:从昨天往前推 N 天(含昨天),数组从最早到最晚排列
  2246. body = feed_aggreagate_data.get("body")
  2247. if not body or not body.get("data"):
  2248. print(
  2249. f"[{self.platform_name}] work_id={work_id} 未监听到 feed_aggreagate 有效响应",
  2250. flush=True,
  2251. )
  2252. await self.page.go_back()
  2253. await asyncio.sleep(2)
  2254. continue
  2255. tab_all = None
  2256. for fan_item in body.get("data", {}).get("dataByFanstype", []):
  2257. for tab_item in fan_item.get("dataByTabtype", []):
  2258. if (
  2259. tab_item.get("tabTypeName") == "全部"
  2260. or tab_item.get("tabType") == 999
  2261. ):
  2262. tab_all = tab_item.get("data")
  2263. break
  2264. if tab_all is not None:
  2265. break
  2266. if not tab_all:
  2267. tab_all = (
  2268. body.get("data", {}).get("feedData", [{}])[0].get("totalData")
  2269. )
  2270. if not tab_all:
  2271. print(
  2272. f"[{self.platform_name}] work_id={work_id} 未找到「全部」数据",
  2273. flush=True,
  2274. )
  2275. await self.page.go_back()
  2276. await asyncio.sleep(2)
  2277. continue
  2278. browse = tab_all.get("browse", [])
  2279. n = len(browse)
  2280. if n == 0:
  2281. print(
  2282. f"[{self.platform_name}] work_id={work_id} browse 为空",
  2283. flush=True,
  2284. )
  2285. await self.page.go_back()
  2286. await asyncio.sleep(2)
  2287. continue
  2288. # 日期:昨天往前推 n 天,index 0 = 最早日
  2289. today = datetime.now().replace(
  2290. hour=0, minute=0, second=0, microsecond=0
  2291. )
  2292. yesterday = today - timedelta(days=1)
  2293. start_date = yesterday - timedelta(days=n - 1)
  2294. # like=推荐, fav=点赞
  2295. like_arr = tab_all.get("like", [])
  2296. comment_arr = tab_all.get("comment", [])
  2297. forward_arr = tab_all.get("forward", [])
  2298. fav_arr = tab_all.get("fav", [])
  2299. follow_arr = tab_all.get("follow", [])
  2300. stats_list = []
  2301. for i in range(n):
  2302. rec_dt = start_date + timedelta(days=i)
  2303. rec_date = rec_dt.strftime("%Y-%m-%d")
  2304. play = self._parse_count(browse[i] if i < len(browse) else "0")
  2305. recommend = self._parse_count(
  2306. like_arr[i] if i < len(like_arr) else "0"
  2307. )
  2308. like = self._parse_count(fav_arr[i] if i < len(fav_arr) else "0")
  2309. comment = self._parse_count(
  2310. comment_arr[i] if i < len(comment_arr) else "0"
  2311. )
  2312. share = self._parse_count(
  2313. forward_arr[i] if i < len(forward_arr) else "0"
  2314. )
  2315. follow = self._parse_count(
  2316. follow_arr[i] if i < len(follow_arr) else "0"
  2317. )
  2318. stats_list.append(
  2319. {
  2320. "work_id": work_id,
  2321. "record_date": rec_date,
  2322. "play_count": play,
  2323. "like_count": like,
  2324. "recommend_count": recommend,
  2325. "comment_count": comment,
  2326. "share_count": share,
  2327. "collect_count": 0,
  2328. "follow_count": follow,
  2329. "completion_rate": "0",
  2330. "avg_watch_duration": "0",
  2331. }
  2332. )
  2333. print(
  2334. f"[{self.platform_name}] work_id={work_id} 从 feed_aggreagate 解析得到 {len(stats_list)} 条日统计",
  2335. flush=True,
  2336. )
  2337. # 存入 work_day_statistics(通过 save_fn 调用 Node)
  2338. if save_fn and stats_list:
  2339. try:
  2340. save_result = save_fn(stats_list)
  2341. result["inserted"] += save_result.get("inserted", 0)
  2342. result["updated"] += save_result.get("updated", 0)
  2343. except Exception as e:
  2344. print(
  2345. f"[{self.platform_name}] work_id={work_id} 保存失败: {e}",
  2346. flush=True,
  2347. )
  2348. result["total_processed"] += 1
  2349. processed_export_ids.add(eid)
  2350. # 返回列表页,继续下一条(会回到全部视频页,下次循环会重新点击单篇视频+近30天)
  2351. await self.page.go_back()
  2352. await asyncio.sleep(2)
  2353. print(
  2354. f"[{self.platform_name}] 批量同步完成: 处理 {result['total_processed']} 个作品, 跳过 {result['total_skipped']} 个",
  2355. flush=True,
  2356. )
  2357. except Exception as e:
  2358. import traceback
  2359. traceback.print_exc()
  2360. result["success"] = False
  2361. result["error"] = str(e)
  2362. finally:
  2363. try:
  2364. await self.close_browser()
  2365. except Exception:
  2366. pass
  2367. return result
  2368. async def get_comments(
  2369. self, cookies: str, work_id: str, cursor: str = ""
  2370. ) -> CommentsResult:
  2371. """
  2372. 获取视频号作品评论(完全参考 get_weixin_work_comments.py 的接口监听逻辑)
  2373. 支持递归提取二级评论,正确处理 parent_comment_id
  2374. """
  2375. print(f"\n{'=' * 60}")
  2376. print(f"[{self.platform_name}] 获取作品评论")
  2377. print(f"[{self.platform_name}] work_id={work_id}")
  2378. print(f"{'=' * 60}")
  2379. comments: List[CommentItem] = []
  2380. total = 0
  2381. has_more = False
  2382. try:
  2383. await self.init_browser()
  2384. cookie_list = self.parse_cookies(cookies)
  2385. await self.set_cookies(cookie_list)
  2386. if not self.page:
  2387. raise Exception("Page not initialized")
  2388. # 访问评论管理页面
  2389. print(f"[{self.platform_name}] 正在打开评论页面...")
  2390. await self.page.goto(
  2391. "https://channels.weixin.qq.com/platform/interaction/comment",
  2392. timeout=30000,
  2393. )
  2394. await asyncio.sleep(2)
  2395. # 检查登录状态
  2396. current_url = self.page.url
  2397. if "login" in current_url:
  2398. raise Exception("Cookie 已过期,请重新登录")
  2399. # === 步骤1: 监听 post_list 接口获取作品列表 ===
  2400. posts = []
  2401. try:
  2402. async with self.page.expect_response(
  2403. lambda res: "/post/post_list" in res.url, timeout=20000
  2404. ) as post_resp_info:
  2405. await self.page.wait_for_selector(
  2406. ".scroll-list .comment-feed-wrap", timeout=15000
  2407. )
  2408. post_resp = await post_resp_info.value
  2409. post_data = await post_resp.json()
  2410. if post_data.get("errCode") == 0:
  2411. posts = post_data.get("data", {}).get("list", [])
  2412. print(f"[{self.platform_name}] ✅ 获取 {len(posts)} 个作品")
  2413. else:
  2414. err_msg = post_data.get("errMsg", "未知错误")
  2415. print(f"[{self.platform_name}] ❌ post_list 业务错误: {err_msg}")
  2416. return CommentsResult(
  2417. success=False,
  2418. platform=self.platform_name,
  2419. work_id=work_id,
  2420. error=f"post_list 业务错误: {err_msg}",
  2421. )
  2422. except Exception as e:
  2423. print(f"[{self.platform_name}] ❌ 获取 post_list 失败: {e}")
  2424. return CommentsResult(
  2425. success=False,
  2426. platform=self.platform_name,
  2427. work_id=work_id,
  2428. error=f"获取 post_list 失败: {e}",
  2429. )
  2430. # === 步骤2: 在 DOM 中查找目标作品 ===
  2431. feed_wraps = await self.page.query_selector_all(
  2432. ".scroll-list .comment-feed-wrap"
  2433. )
  2434. target_feed = None
  2435. target_post = None
  2436. target_index = -1
  2437. for i, feed in enumerate(feed_wraps):
  2438. if i >= len(posts):
  2439. break
  2440. post = posts[i]
  2441. object_nonce = post.get("objectNonce", "")
  2442. post_work_id = post.get("objectId", "") or object_nonce
  2443. # 匹配 work_id(支持 objectId 或 objectNonce 匹配)
  2444. if (
  2445. work_id in [post_work_id, object_nonce]
  2446. or post_work_id in work_id
  2447. or object_nonce in work_id
  2448. ):
  2449. target_feed = feed
  2450. target_post = post
  2451. target_index = i
  2452. work_title = post.get("desc", {}).get("description", "无标题")
  2453. print(f"[{self.platform_name}] ✅ 找到目标作品: {work_title}")
  2454. continue
  2455. if not target_feed or not target_post:
  2456. print(f"[{self.platform_name}] ❌ 未找到 work_id={work_id} 对应的作品")
  2457. return CommentsResult(
  2458. success=True,
  2459. platform=self.platform_name,
  2460. work_id=work_id,
  2461. comments=[],
  2462. total=0,
  2463. has_more=False,
  2464. )
  2465. # 准备作品信息(用于递归函数)
  2466. object_nonce = target_post.get("objectNonce", f"nonce_{target_index}")
  2467. work_title = target_post.get("desc", {}).get(
  2468. "description", f"作品{target_index + 1}"
  2469. )
  2470. work_info = {"work_id": object_nonce, "work_title": work_title}
  2471. # === 步骤3: 点击作品触发 comment_list 接口 ===
  2472. content_wrap = (
  2473. await target_feed.query_selector(".feed-content") or target_feed
  2474. )
  2475. try:
  2476. async with self.page.expect_response(
  2477. lambda res: "/comment/comment_list" in res.url, timeout=15000
  2478. ) as comment_resp_info:
  2479. await content_wrap.click()
  2480. await asyncio.sleep(0.8)
  2481. comment_resp = await comment_resp_info.value
  2482. comment_data = await comment_resp.json()
  2483. if comment_data.get("errCode") != 0:
  2484. err_msg = comment_data.get("errMsg", "未知错误")
  2485. print(f"[{self.platform_name}] ❌ 评论接口错误: {err_msg}")
  2486. return CommentsResult(
  2487. success=False,
  2488. platform=self.platform_name,
  2489. work_id=work_id,
  2490. error=f"评论接口错误: {err_msg}",
  2491. )
  2492. raw_comments = comment_data.get("data", {}).get("comment", [])
  2493. total = comment_data.get("data", {}).get(
  2494. "totalCount", len(raw_comments)
  2495. )
  2496. print(
  2497. f"[{self.platform_name}] 📊 原始评论数: {len(raw_comments)}, 总数: {total}"
  2498. )
  2499. # === 步骤4: 递归提取所有评论(含子评论)===
  2500. extracted = self._extract_comments(
  2501. raw_comments, parent_id="", work_info=work_info
  2502. )
  2503. # === 步骤5: 转换为 CommentItem 列表(保留 weixin.py 的数据结构)===
  2504. for c in extracted:
  2505. # 使用接口返回的 comment_id
  2506. comment_id = c.get("comment_id", "")
  2507. parent_comment_id = c.get("parent_comment_id", "")
  2508. # 构建 CommentItem(保留原有数据结构用于数据库入库)
  2509. comment_item = CommentItem(
  2510. comment_id=comment_id,
  2511. parent_comment_id=parent_comment_id,
  2512. work_id=work_id,
  2513. content=c.get("content", ""),
  2514. author_id=c.get("username", ""), # 使用 username 作为 author_id
  2515. author_name=c.get("nickname", ""),
  2516. author_avatar=c.get("avatar", ""),
  2517. like_count=c.get("like_count", 0),
  2518. reply_count=0,
  2519. create_time=c.get("create_time", ""),
  2520. )
  2521. # 添加扩展字段(用于数据库存储和后续处理)
  2522. # comment_item.parent_comment_id = c.get("parent_comment_id", "")
  2523. comment_item.is_author = c.get("is_author", False)
  2524. comment_item.create_time_unix = c.get("create_time_unix", 0)
  2525. comment_item.work_title = c.get("work_title", "")
  2526. print(comment_item)
  2527. comments.append(comment_item)
  2528. # 打印日志
  2529. author_tag = " 👤(作者)" if c.get("is_author") else ""
  2530. parent_tag = (
  2531. f" [回复: {c.get('parent_comment_id', '')}]"
  2532. if c.get("parent_comment_id")
  2533. else ""
  2534. )
  2535. print(
  2536. f"[{self.platform_name}] - [{c.get('nickname', '')}] {c.get('content', '')[:30]}... "
  2537. f"({c.get('create_time', '')}){author_tag}{parent_tag}"
  2538. )
  2539. # 判断是否还有更多(优先使用接口返回的 continueFlag,否则根据数量判断)
  2540. has_more = (
  2541. comment_data.get("data", {}).get("continueFlag", False)
  2542. or len(extracted) < total
  2543. )
  2544. print(
  2545. f"[{self.platform_name}] ✅ 共提取 {len(comments)} 条评论(含子评论)"
  2546. )
  2547. except Exception as e:
  2548. print(f"[{self.platform_name}] ❌ 获取评论失败: {e}")
  2549. import traceback
  2550. traceback.print_exc()
  2551. return CommentsResult(
  2552. success=False,
  2553. platform=self.platform_name,
  2554. work_id=work_id,
  2555. error=f"获取评论失败: {e}",
  2556. )
  2557. except Exception as e:
  2558. import traceback
  2559. traceback.print_exc()
  2560. return CommentsResult(
  2561. success=False,
  2562. platform=self.platform_name,
  2563. work_id=work_id,
  2564. error=str(e),
  2565. )
  2566. return CommentsResult(
  2567. success=True,
  2568. platform=self.platform_name,
  2569. work_id=work_id,
  2570. comments=comments,
  2571. total=total,
  2572. has_more=has_more,
  2573. )
  2574. def _extract_comments(
  2575. self, comment_list: list, parent_id: str = "", work_info: dict = None
  2576. ) -> list:
  2577. """
  2578. 递归提取一级和二级评论(完全参考 get_weixin_work_comments.py 的 extract_comments 函数)
  2579. Args:
  2580. comment_list: 评论列表(原始接口数据)
  2581. parent_id: 父评论ID(一级评论为空字符串"",二级评论为父级评论ID)
  2582. work_info: 作品信息字典
  2583. Returns:
  2584. list: 扁平化的评论列表,包含一级和二级评论
  2585. """
  2586. result = []
  2587. # 获取当前用户 username(用于判断是否为作者)
  2588. # 优先从环境变量获取,也可通过其他方式配置
  2589. my_username = getattr(self, "my_username", "") or os.environ.get(
  2590. "WEIXIN_MY_USERNAME", ""
  2591. )
  2592. for cmt in comment_list:
  2593. # 处理时间戳
  2594. create_ts = int(cmt.get("commentCreatetime", 0) or 0)
  2595. readable_time = (
  2596. datetime.fromtimestamp(create_ts).strftime("%Y-%m-%d %H:%M:%S")
  2597. if create_ts > 0
  2598. else ""
  2599. )
  2600. # 判断是否作者(如果配置了 my_username)
  2601. username = cmt.get("username", "") or ""
  2602. is_author = (my_username != "") and (username == my_username)
  2603. # 构建评论条目 - 完全参考 get_weixin_work_comments.py 的字段
  2604. entry = {
  2605. "work_id": work_info.get("work_id", "") if work_info else "",
  2606. "work_title": work_info.get("work_title", "") if work_info else "",
  2607. "comment_id": cmt.get("commentId"),
  2608. "parent_comment_id": parent_id, # 关键:一级评论为空字符串"",二级评论为父评论ID
  2609. "username": username,
  2610. "nickname": cmt.get("commentNickname", ""),
  2611. "avatar": cmt.get("commentHeadurl", ""),
  2612. "content": cmt.get("commentContent", ""),
  2613. "create_time_unix": create_ts,
  2614. "create_time": readable_time,
  2615. "is_author": is_author,
  2616. "like_count": cmt.get("commentLikeCount", 0) or 0,
  2617. }
  2618. result.append(entry)
  2619. # 递归处理二级评论(levelTwoComment)
  2620. # 关键:二级评论的 parent_id 应该是当前这条评论的 comment_id
  2621. level_two = cmt.get("levelTwoComment", []) or []
  2622. if level_two and isinstance(level_two, list) and len(level_two) > 0:
  2623. # 当前评论的 ID 作为其子评论的 parent_id
  2624. current_comment_id = cmt.get("commentId", "")
  2625. result.extend(
  2626. self._extract_comments(
  2627. level_two, parent_id=current_comment_id, work_info=work_info
  2628. )
  2629. )
  2630. return result
  2631. async def auto_reply_private_messages(self, cookies: str) -> dict:
  2632. """自动回复私信 - 集成自 pw3.py"""
  2633. print(f"\n{'=' * 60}")
  2634. print(f"[{self.platform_name}] 开始自动回复私信")
  2635. print(f"{'=' * 60}")
  2636. try:
  2637. await self.init_browser()
  2638. cookie_list = self.parse_cookies(cookies)
  2639. await self.set_cookies(cookie_list)
  2640. if not self.page:
  2641. raise Exception("Page not initialized")
  2642. # 访问私信页面
  2643. await self.page.goto(
  2644. "https://channels.weixin.qq.com/platform/private_msg", timeout=30000
  2645. )
  2646. await asyncio.sleep(3)
  2647. # 检查登录状态
  2648. current_url = self.page.url
  2649. print(f"[{self.platform_name}] 当前 URL: {current_url}")
  2650. if "login" in current_url:
  2651. raise Exception("Cookie 已过期,请重新登录")
  2652. # 等待私信页面加载(使用多个选择器容错)
  2653. try:
  2654. await self.page.wait_for_selector(
  2655. ".private-msg-list-header", timeout=15000
  2656. )
  2657. except:
  2658. # 尝试其他选择器
  2659. try:
  2660. await self.page.wait_for_selector(
  2661. ".weui-desktop-tab__navs__inner", timeout=10000
  2662. )
  2663. print(f"[{self.platform_name}] 使用备用选择器加载成功")
  2664. except:
  2665. # 截图调试
  2666. screenshot_dir = os.path.join(
  2667. os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
  2668. "screenshots",
  2669. )
  2670. os.makedirs(screenshot_dir, exist_ok=True)
  2671. screenshot_path = os.path.join(
  2672. screenshot_dir,
  2673. f"weixin_private_msg_{int(asyncio.get_event_loop().time())}.png",
  2674. )
  2675. await self.page.screenshot(path=screenshot_path)
  2676. print(
  2677. f"[{self.platform_name}] 页面加载失败,截图: {screenshot_path}"
  2678. )
  2679. raise Exception(f"私信页面加载超时,当前 URL: {current_url}")
  2680. print(f"[{self.platform_name}] 私信页面加载完成")
  2681. # 处理两个 tab
  2682. total_replied = 0
  2683. for tab_name in ["打招呼消息", "私信"]:
  2684. replied_count = await self._process_tab_sessions(tab_name)
  2685. total_replied += replied_count
  2686. print(f"[{self.platform_name}] 自动回复完成,共回复 {total_replied} 条消息")
  2687. return {
  2688. "success": True,
  2689. "platform": self.platform_name,
  2690. "replied_count": total_replied,
  2691. "message": f"成功回复 {total_replied} 条私信",
  2692. }
  2693. except Exception as e:
  2694. import traceback
  2695. traceback.print_exc()
  2696. return {"success": False, "platform": self.platform_name, "error": str(e)}
  2697. async def _process_tab_sessions(self, tab_name: str) -> int:
  2698. """处理指定 tab 下的所有会话"""
  2699. print(f"\n🔄 正在处理「{tab_name}」中的所有会话...")
  2700. if not self.page:
  2701. return 0
  2702. replied_count = 0
  2703. try:
  2704. # 点击 tab
  2705. if tab_name == "私信":
  2706. tab_link = self.page.locator(
  2707. ".weui-desktop-tab__navs__inner li"
  2708. ).first.locator("a")
  2709. elif tab_name == "打招呼消息":
  2710. tab_link = (
  2711. self.page.locator(".weui-desktop-tab__navs__inner li")
  2712. .nth(1)
  2713. .locator("a")
  2714. )
  2715. else:
  2716. return 0
  2717. if await tab_link.is_visible():
  2718. await tab_link.click()
  2719. print(f" ➤ 已点击「{tab_name}」tab")
  2720. else:
  2721. print(f" ❌ 「{tab_name}」tab 不可见")
  2722. return 0
  2723. # 等待会话列表加载
  2724. try:
  2725. await self.page.wait_for_function(
  2726. """
  2727. () => {
  2728. const hasSession = document.querySelectorAll('.session-wrap').length > 0;
  2729. const hasEmpty = !!document.querySelector('.empty-text');
  2730. return hasSession || hasEmpty;
  2731. }
  2732. """,
  2733. timeout=8000,
  2734. )
  2735. print(" ✅ 会话列表区域已加载")
  2736. except:
  2737. print(" ⚠️ 等待会话列表超时,继续尝试读取...")
  2738. # 获取会话
  2739. session_wraps = self.page.locator(".session-wrap")
  2740. session_count = await session_wraps.count()
  2741. print(f" 💬 共找到 {session_count} 个会话")
  2742. if session_count == 0:
  2743. return 0
  2744. # 遍历每个会话
  2745. for idx in range(session_count):
  2746. try:
  2747. current_sessions = self.page.locator(".session-wrap")
  2748. if idx >= await current_sessions.count():
  2749. break
  2750. session = current_sessions.nth(idx)
  2751. user_name = await session.locator(".name").inner_text()
  2752. last_preview = await session.locator(".feed-info").inner_text()
  2753. print(
  2754. f"\n ➤ [{idx + 1}/{session_count}] 正在处理: {user_name} | 最后消息: {last_preview}"
  2755. )
  2756. await session.click()
  2757. await asyncio.sleep(2)
  2758. # 提取聊天历史
  2759. history = await self._extract_chat_history()
  2760. need_reply = (not history) or (not history[-1]["is_author"])
  2761. if need_reply:
  2762. reply_text = await self._generate_reply_with_ai(history)
  2763. if reply_text == "":
  2764. reply_text = self._generate_reply(history)
  2765. # # 生成回复
  2766. # if history and history[-1]["is_author"]:
  2767. # reply_text = await self._generate_reply_with_ai(history)
  2768. # else:
  2769. # reply_text = self._generate_reply(history)
  2770. if reply_text:
  2771. print(f" 📝 回复内容: {reply_text}")
  2772. try:
  2773. textarea = self.page.locator(".edit_area").first
  2774. send_btn = self.page.locator(
  2775. 'button:has-text("发送")'
  2776. ).first
  2777. if (
  2778. await textarea.is_visible()
  2779. and await send_btn.is_visible()
  2780. ):
  2781. await textarea.fill(reply_text)
  2782. await asyncio.sleep(0.5)
  2783. await send_btn.click()
  2784. print(" ✅ 已发送")
  2785. replied_count += 1
  2786. await asyncio.sleep(1.5)
  2787. else:
  2788. print(" ❌ 输入框或发送按钮不可见")
  2789. except Exception as e:
  2790. print(f" ❌ 发送失败: {e}")
  2791. else:
  2792. print(" ➤ 无需回复")
  2793. else:
  2794. print(" ➤ 最后一条是我发的,跳过回复")
  2795. except Exception as e:
  2796. print(f" ❌ 处理会话 {idx + 1} 时出错: {e}")
  2797. continue
  2798. except Exception as e:
  2799. print(f"❌ 处理「{tab_name}」失败: {e}")
  2800. return replied_count
  2801. async def _extract_chat_history(self) -> list:
  2802. """精准提取聊天记录,区分作者(自己)和用户"""
  2803. if not self.page:
  2804. return []
  2805. history = []
  2806. message_wrappers = self.page.locator(
  2807. ".session-content-wrapper > div:not(.footer) > .text-wrapper"
  2808. )
  2809. count = await message_wrappers.count()
  2810. for i in range(count):
  2811. try:
  2812. wrapper = message_wrappers.nth(i)
  2813. # 判断方向
  2814. is_right = await wrapper.locator(".content-right").count() > 0
  2815. is_left = await wrapper.locator(".content-left").count() > 0
  2816. if not (is_left or is_right):
  2817. continue
  2818. # 提取消息文本
  2819. pre_el = wrapper.locator("pre.message-plain")
  2820. content = ""
  2821. if await pre_el.count() > 0:
  2822. content = await pre_el.inner_text()
  2823. content = content.strip()
  2824. if not content:
  2825. continue
  2826. # 获取头像
  2827. avatar_img = wrapper.locator(".avatar").first
  2828. avatar_src = ""
  2829. if await avatar_img.count() > 0:
  2830. avatar_src = await avatar_img.get_attribute("src") or ""
  2831. # 右侧 = 作者(自己)
  2832. is_author = is_right
  2833. # 获取用户名
  2834. if is_left:
  2835. name_el = wrapper.locator(".profile .name")
  2836. author_name = "用户"
  2837. if await name_el.count() > 0:
  2838. author_name = await name_el.inner_text()
  2839. else:
  2840. author_name = "我"
  2841. history.append(
  2842. {
  2843. "author": author_name,
  2844. "content": content,
  2845. "is_author": is_author,
  2846. "avatar": avatar_src,
  2847. }
  2848. )
  2849. except Exception as e:
  2850. print(f" ⚠️ 解析第 {i + 1} 条消息失败: {e}")
  2851. continue
  2852. return history
  2853. async def _generate_reply_with_ai(self, chat_history: list) -> str:
  2854. """使用 AI 生成智能回复"""
  2855. import requests
  2856. import json
  2857. try:
  2858. # 获取 AI 配置
  2859. ai_api_key = os.environ.get("DASHSCOPE_API_KEY", "")
  2860. ai_base_url = os.environ.get(
  2861. "DASHSCOPE_BASE_URL",
  2862. "https://dashscope.aliyuncs.com/compatible-mode/v1",
  2863. )
  2864. ai_model = os.environ.get("AI_MODEL", "qwen-plus")
  2865. if not ai_api_key:
  2866. print("⚠️ 未配置 AI API Key,使用规则回复")
  2867. return self._generate_reply(chat_history)
  2868. # 构建对话上下文
  2869. messages = [
  2870. {
  2871. "role": "system",
  2872. "content": "你是一个友好的微信视频号创作者助手,负责回复粉丝私信。请保持简洁、友好、专业的语气。回复长度不超过20字。",
  2873. }
  2874. ]
  2875. for msg in chat_history:
  2876. role = "assistant" if msg["is_author"] else "user"
  2877. messages.append({"role": role, "content": msg["content"]})
  2878. # 调用 AI API
  2879. headers = {
  2880. "Authorization": f"Bearer {ai_api_key}",
  2881. "Content-Type": "application/json",
  2882. }
  2883. payload = {
  2884. "model": ai_model,
  2885. "messages": messages,
  2886. "max_tokens": 150,
  2887. "temperature": 0.8,
  2888. }
  2889. print(" 🤖 正在调用 AI 生成回复...")
  2890. response = requests.post(
  2891. f"{ai_base_url}/chat/completions",
  2892. headers=headers,
  2893. json=payload,
  2894. timeout=30,
  2895. )
  2896. if response.status_code != 200:
  2897. print(f" ⚠️ AI API 返回错误 {response.status_code},使用规则回复")
  2898. return self._generate_reply(chat_history)
  2899. result = response.json()
  2900. ai_reply = (
  2901. result.get("choices", [{}])[0]
  2902. .get("message", {})
  2903. .get("content", "")
  2904. .strip()
  2905. )
  2906. if ai_reply:
  2907. print(f" ✅ AI 生成回复: {ai_reply}")
  2908. return ai_reply
  2909. else:
  2910. print(" ⚠️ AI 返回空内容,使用规则回复")
  2911. return self._generate_reply(chat_history)
  2912. except Exception as e:
  2913. print(f" ⚠️ AI 回复生成失败: {e},使用规则回复")
  2914. return self._generate_reply(chat_history)
  2915. def _generate_reply(self, chat_history: list) -> str:
  2916. """根据完整聊天历史生成回复(规则回复方式)"""
  2917. if not chat_history:
  2918. return "你好!感谢联系~"
  2919. # 检查最后一条是否是作者发的
  2920. if chat_history[-1]["is_author"]:
  2921. return "" # 不回复
  2922. # 找最后一条用户消息
  2923. last_user_msg = chat_history[-1]["content"]
  2924. # 简单规则回复
  2925. if "谢谢" in last_user_msg or "感谢" in last_user_msg:
  2926. return "不客气!欢迎常来交流~"
  2927. elif "你好" in last_user_msg or "在吗" in last_user_msg:
  2928. return "你好!请问有什么可以帮您的?"
  2929. elif "视频" in last_user_msg or "怎么拍" in last_user_msg:
  2930. return "视频是用手机拍摄的,注意光线和稳定哦!"
  2931. else:
  2932. return "收到!我会认真阅读您的留言~"