baijiahao.py 157 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191
  1. # -*- coding: utf-8 -*-
  2. """
  3. 百家号视频发布器
  4. """
  5. import asyncio
  6. import json
  7. from typing import List
  8. from datetime import datetime
  9. from .base import (
  10. BasePublisher, PublishParams, PublishResult,
  11. WorkItem, WorksResult, CommentItem, CommentsResult
  12. )
  13. class BaijiahaoPublisher(BasePublisher):
  14. """
  15. 百家号视频发布器
  16. 使用 Playwright 自动化操作百家号创作者中心
  17. """
  18. platform_name = "baijiahao"
  19. login_url = "https://baijiahao.baidu.com/"
  20. publish_url = "https://baijiahao.baidu.com/builder/rc/edit?type=video"
  21. cookie_domain = ".baidu.com"
  22. # 登录检测配置
  23. login_check_url = "https://baijiahao.baidu.com/builder/rc/home"
  24. login_indicators = ["passport.baidu.com", "/login", "wappass.baidu.com"]
  25. login_selectors = ['text="登录"', 'text="请登录"', '[class*="login-btn"]']
  26. async def get_account_info(self, cookies: str) -> dict:
  27. """
  28. 获取百家号账号信息
  29. 使用直接 HTTP API 调用,不使用浏览器
  30. """
  31. import aiohttp
  32. print(f"\n{'='*60}")
  33. print(f"[{self.platform_name}] 获取账号信息 (使用 API)")
  34. print(f"{'='*60}")
  35. try:
  36. # 解析 cookies
  37. cookie_list = self.parse_cookies(cookies)
  38. cookie_dict = {c['name']: c['value'] for c in cookie_list}
  39. # 重要:百家号需要先访问主页建立会话上下文
  40. print(f"[{self.platform_name}] 第一步:访问主页建立会话...")
  41. session_headers = {
  42. 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
  43. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
  44. # Cookie 由 session 管理,不手动设置
  45. 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
  46. 'Accept-Encoding': 'gzip, deflate, br',
  47. 'Connection': 'keep-alive',
  48. 'Upgrade-Insecure-Requests': '1',
  49. 'Sec-Fetch-Dest': 'document',
  50. 'Sec-Fetch-Mode': 'navigate',
  51. 'Sec-Fetch-Site': 'none',
  52. 'Sec-Fetch-User': '?1',
  53. 'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
  54. 'sec-ch-ua-mobile': '?0',
  55. 'sec-ch-ua-platform': '"Windows"'
  56. }
  57. headers = {
  58. 'Accept': 'application/json, text/plain, */*',
  59. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
  60. # Cookie 由 session 管理,不手动设置
  61. 'Referer': 'https://baijiahao.baidu.com/builder/rc/home',
  62. 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
  63. 'Accept-Encoding': 'gzip, deflate, br',
  64. 'Connection': 'keep-alive',
  65. 'Sec-Fetch-Dest': 'empty',
  66. 'Sec-Fetch-Mode': 'cors',
  67. 'Sec-Fetch-Site': 'same-origin',
  68. 'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
  69. 'sec-ch-ua-mobile': '?0',
  70. 'sec-ch-ua-platform': '"Windows"'
  71. }
  72. # 使用 cookies 参数初始化 session,让 aiohttp 自动管理 cookie 更新
  73. async with aiohttp.ClientSession(cookies=cookie_dict) as session:
  74. # 步骤 0: 先访问主页建立会话上下文(关键步骤!)
  75. print(f"[{self.platform_name}] [0/4] 访问主页建立会话上下文...")
  76. async with session.get(
  77. 'https://baijiahao.baidu.com/builder/rc/home',
  78. headers=session_headers,
  79. timeout=aiohttp.ClientTimeout(total=30)
  80. ) as home_response:
  81. home_status = home_response.status
  82. print(f"[{self.platform_name}] 主页访问状态: {home_status}")
  83. # 获取响应头中的新cookies(如果有)
  84. if 'Set-Cookie' in home_response.headers:
  85. new_cookies = home_response.headers['Set-Cookie']
  86. print(f"[{self.platform_name}] 获取到新的会话Cookie")
  87. # 这里可以处理新的cookies,但暂时跳过复杂处理
  88. # 短暂等待确保会话建立
  89. await asyncio.sleep(1)
  90. # 步骤 1: 获取账号基本信息
  91. print(f"[{self.platform_name}] [1/4] 调用 appinfo API...")
  92. async with session.get(
  93. 'https://baijiahao.baidu.com/builder/app/appinfo',
  94. headers=headers,
  95. timeout=aiohttp.ClientTimeout(total=30)
  96. ) as response:
  97. appinfo_result = await response.json()
  98. print(f"[{self.platform_name}] appinfo API 完整响应: {json.dumps(appinfo_result, ensure_ascii=False)[:500]}")
  99. print(f"[{self.platform_name}] appinfo API 响应: errno={appinfo_result.get('errno')}")
  100. # 检查登录状态
  101. if appinfo_result.get('errno') != 0:
  102. error_msg = appinfo_result.get('errmsg', '未知错误')
  103. errno = appinfo_result.get('errno')
  104. print(f"[{self.platform_name}] API 返回错误: errno={errno}, msg={error_msg}")
  105. # errno 110 表示未登录
  106. if errno == 110:
  107. return {
  108. "success": False,
  109. "error": "Cookie 已失效,需要重新登录",
  110. "need_login": True
  111. }
  112. # errno 10001402 表示分散认证问题,尝试重新访问主页后重试
  113. if errno == 10001402:
  114. print(f"[{self.platform_name}] 检测到分散认证问题,尝试重新访问主页...")
  115. await asyncio.sleep(2)
  116. # 重新访问主页
  117. async with session.get(
  118. 'https://baijiahao.baidu.com/builder/rc/home',
  119. headers=session_headers,
  120. timeout=aiohttp.ClientTimeout(total=30)
  121. ) as retry_home_response:
  122. print(f"[{self.platform_name}] 重新访问主页状态: {retry_home_response.status}")
  123. await asyncio.sleep(1)
  124. # 重试 API 调用
  125. async with session.get(
  126. 'https://baijiahao.baidu.com/builder/app/appinfo',
  127. headers=headers,
  128. timeout=aiohttp.ClientTimeout(total=30)
  129. ) as retry_response:
  130. retry_result = await retry_response.json()
  131. if retry_result.get('errno') == 0:
  132. print(f"[{self.platform_name}] 分散认证问题已解决")
  133. # 使用重试成功的结果继续处理
  134. appinfo_result = retry_result
  135. else:
  136. print(f"[{self.platform_name}] 重试仍然失败")
  137. return {
  138. "success": False,
  139. "error": f"分散认证问题: {error_msg}",
  140. "need_login": True
  141. }
  142. return {
  143. "success": False,
  144. "error": error_msg,
  145. "need_login": True
  146. }
  147. # 获取用户数据
  148. user_data = appinfo_result.get('data', {}).get('user', {})
  149. if not user_data:
  150. return {
  151. "success": False,
  152. "error": "无法获取用户信息",
  153. "need_login": True
  154. }
  155. # 检查账号状态
  156. status = user_data.get('status', '')
  157. # 有效的账号状态:audit(审核中), pass(已通过), normal(正常), newbie(新手)
  158. valid_statuses = ['audit', 'pass', 'normal', 'newbie']
  159. if status not in valid_statuses:
  160. print(f"[{self.platform_name}] 账号状态异常: {status}")
  161. # 提取基本信息
  162. account_name = user_data.get('name') or user_data.get('uname') or '百家号账号'
  163. app_id = user_data.get('app_id') or user_data.get('id', 0)
  164. account_id = str(app_id) if app_id else f"baijiahao_{int(datetime.now().timestamp() * 1000)}"
  165. # 处理头像 URL
  166. avatar_url = user_data.get('avatar') or user_data.get('avatar_unify', '')
  167. if avatar_url and avatar_url.startswith('//'):
  168. avatar_url = 'https:' + avatar_url
  169. print(f"[{self.platform_name}] 账号名称: {account_name}, ID: {account_id}")
  170. # 步骤 2: 获取粉丝数(非关键,失败不影响整体)
  171. fans_count = 0
  172. try:
  173. print(f"[{self.platform_name}] [2/3] 调用 growth/get_info API 获取粉丝数...")
  174. async with session.get(
  175. 'https://baijiahao.baidu.com/cms-ui/rights/growth/get_info',
  176. headers=headers,
  177. timeout=aiohttp.ClientTimeout(total=10)
  178. ) as response:
  179. growth_result = await response.json()
  180. if growth_result.get('errno') == 0:
  181. growth_data = growth_result.get('data', {})
  182. fans_count = int(growth_data.get('fans_num', 0))
  183. print(f"[{self.platform_name}] 粉丝数: {fans_count}")
  184. else:
  185. print(f"[{self.platform_name}] 获取粉丝数失败: {growth_result.get('errmsg')}")
  186. except Exception as e:
  187. print(f"[{self.platform_name}] 获取粉丝数异常(非关键): {e}")
  188. # 步骤 3: 获取作品数量(使用与 Node 端一致的 API)
  189. works_count = 0
  190. try:
  191. print(f"[{self.platform_name}] [3/3] 调用 article/lists API 获取作品数...")
  192. # 使用与 Node 端一致的 API 参数
  193. list_url = 'https://baijiahao.baidu.com/pcui/article/lists?currentPage=1&pageSize=20&search=&type=&collection=&startDate=&endDate=&clearBeforeFetch=false&dynamic=0'
  194. async with session.get(
  195. list_url,
  196. headers={
  197. 'accept': '*/*',
  198. 'user-agent': 'PostmanRuntime/7.51.0',
  199. # cookie 由 session 管理
  200. 'referer': 'https://baijiahao.baidu.com/builder/rc/content',
  201. 'connection': 'keep-alive',
  202. 'accept-encoding': 'gzip, deflate, br',
  203. },
  204. timeout=aiohttp.ClientTimeout(total=30)
  205. ) as response:
  206. response_text = await response.text()
  207. print(f"[{self.platform_name}] ========== Works API Response ==========")
  208. print(f"[{self.platform_name}] Full response: {response_text[:1000]}...") # 只打印前1000字符
  209. print(f"[{self.platform_name}] =========================================")
  210. works_result = json.loads(response_text)
  211. # 处理分散认证问题 (errno=10001402),重试一次
  212. if works_result.get('errno') == 10001402:
  213. print(f"[{self.platform_name}] 分散认证问题 (errno=10001402),3秒后重试...")
  214. await asyncio.sleep(3)
  215. # 重试一次,使用更完整的请求头
  216. retry_headers = headers.copy()
  217. retry_headers.update({
  218. 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
  219. 'Cache-Control': 'max-age=0',
  220. 'Upgrade-Insecure-Requests': '1',
  221. })
  222. async with session.get(
  223. list_url,
  224. headers=retry_headers,
  225. timeout=aiohttp.ClientTimeout(total=30)
  226. ) as retry_response:
  227. retry_text = await retry_response.text()
  228. print(f"[{self.platform_name}] ========== Works API Retry Response ==========")
  229. print(f"[{self.platform_name}] Full retry response: {retry_text[:1000]}...")
  230. print(f"[{self.platform_name}] ===============================================")
  231. works_result = json.loads(retry_text)
  232. if works_result.get('errno') == 10001402:
  233. print(f"[{self.platform_name}] 重试仍然失败,返回已获取的账号信息")
  234. works_result = None
  235. if works_result and works_result.get('errno') == 0:
  236. works_data = works_result.get('data', {})
  237. # 优先使用 data.page.totalCount,如果没有则使用 data.total(兼容旧格式)
  238. page_info = works_data.get('page', {})
  239. works_count = int(page_info.get('totalCount', works_data.get('total', 0)))
  240. print(f"[{self.platform_name}] 作品数: {works_count} (from page.totalCount: {page_info.get('totalCount')}, from total: {works_data.get('total')})")
  241. else:
  242. errno = works_result.get('errno') if works_result else 'unknown'
  243. errmsg = works_result.get('errmsg', 'unknown error') if works_result else 'no response'
  244. print(f"[{self.platform_name}] 获取作品数失败: errno={errno}, errmsg={errmsg}")
  245. except Exception as e:
  246. import traceback
  247. print(f"[{self.platform_name}] 获取作品数异常(非关键): {e}")
  248. traceback.print_exc()
  249. # 返回账号信息
  250. account_info = {
  251. "success": True,
  252. "account_id": account_id,
  253. "account_name": account_name,
  254. "avatar_url": avatar_url,
  255. "fans_count": fans_count,
  256. "works_count": works_count,
  257. }
  258. print(f"[{self.platform_name}] ✓ 获取成功: {account_name} (粉丝: {fans_count}, 作品: {works_count})")
  259. return account_info
  260. except Exception as e:
  261. import traceback
  262. traceback.print_exc()
  263. return {
  264. "success": False,
  265. "error": str(e)
  266. }
  267. async def check_captcha(self) -> dict:
  268. """检查页面是否需要验证码"""
  269. if not self.page:
  270. return {'need_captcha': False, 'captcha_type': ''}
  271. try:
  272. # 检查各种验证码
  273. captcha_selectors = [
  274. 'text="请输入验证码"',
  275. 'text="滑动验证"',
  276. '[class*="captcha"]',
  277. '[class*="verify"]',
  278. ]
  279. for selector in captcha_selectors:
  280. try:
  281. if await self.page.locator(selector).count() > 0:
  282. print(f"[{self.platform_name}] 检测到验证码: {selector}")
  283. return {'need_captcha': True, 'captcha_type': 'image'}
  284. except:
  285. pass
  286. # 检查登录弹窗
  287. login_selectors = [
  288. 'text="请登录"',
  289. 'text="登录后继续"',
  290. '[class*="login-dialog"]',
  291. ]
  292. for selector in login_selectors:
  293. try:
  294. if await self.page.locator(selector).count() > 0:
  295. print(f"[{self.platform_name}] 检测到需要登录: {selector}")
  296. return {'need_captcha': True, 'captcha_type': 'login'}
  297. except:
  298. pass
  299. except Exception as e:
  300. print(f"[{self.platform_name}] 验证码检测异常: {e}")
  301. return {'need_captcha': False, 'captcha_type': ''}
  302. async def _ai_analyze_upload_state(self, screenshot_base64: str = None) -> dict:
  303. """
  304. 使用 AI 识别当前上传状态,返回:
  305. {
  306. status: completed|uploading|failed|unknown,
  307. progress: int|None,
  308. confidence: int,
  309. reason: str,
  310. should_enter_publish_form: bool
  311. }
  312. """
  313. import os
  314. import ast
  315. import re
  316. import requests
  317. result = {
  318. "status": "unknown",
  319. "progress": None,
  320. "confidence": 0,
  321. "reason": "",
  322. "should_enter_publish_form": False,
  323. }
  324. try:
  325. if not screenshot_base64:
  326. screenshot_base64 = await self.capture_screenshot()
  327. if not screenshot_base64:
  328. result["reason"] = "no-screenshot"
  329. return result
  330. ai_api_key = os.environ.get('DASHSCOPE_API_KEY', '')
  331. ai_base_url = os.environ.get('DASHSCOPE_BASE_URL', 'https://dashscope.aliyuncs.com/compatible-mode/v1')
  332. ai_vision_model = os.environ.get('AI_VISION_MODEL', 'qwen-vl-plus')
  333. if not ai_api_key:
  334. result["reason"] = "no-ai-key"
  335. return result
  336. prompt = """请分析这张“百家号视频发布页”截图,判断视频上传状态。
  337. 请只返回 JSON:
  338. {
  339. "status": "completed|uploading|failed|unknown",
  340. "progress": 0-100 或 null,
  341. "confidence": 0-100,
  342. "reason": "一句话证据",
  343. "should_enter_publish_form": true/false
  344. }
  345. 判定规则:
  346. 1) status=completed:
  347. - 出现“上传完成/处理完成/可发布/可填写标题描述/发布按钮可用”等信号
  348. - 或者明显已进入可填写发布信息的阶段
  349. 2) status=uploading:
  350. - 出现“上传中/处理中/转码中/xx%/请稍候”等
  351. 3) status=failed:
  352. - 出现“上传失败/处理失败/格式不支持/文件异常”等明确失败文案
  353. 4) should_enter_publish_form=true:
  354. - 画面显示“去发布/下一步/继续/完成编辑”等入口,且看起来应点击进入正式发布表单
  355. """
  356. headers = {
  357. 'Authorization': f'Bearer {ai_api_key}',
  358. 'Content-Type': 'application/json'
  359. }
  360. payload = {
  361. "model": ai_vision_model,
  362. "messages": [
  363. {
  364. "role": "user",
  365. "content": [
  366. {
  367. "type": "image_url",
  368. "image_url": {
  369. "url": f"data:image/jpeg;base64,{screenshot_base64}"
  370. }
  371. },
  372. {
  373. "type": "text",
  374. "text": prompt
  375. }
  376. ]
  377. }
  378. ],
  379. "max_tokens": 400
  380. }
  381. response = requests.post(
  382. f"{ai_base_url}/chat/completions",
  383. headers=headers,
  384. json=payload,
  385. timeout=30
  386. )
  387. if response.status_code != 200:
  388. result["reason"] = f"ai-http-{response.status_code}"
  389. return result
  390. response_json = response.json()
  391. ai_response = response_json.get('choices', [{}])[0].get('message', {}).get('content', '')
  392. json_match = re.search(r'```json\\s*([\\s\\S]*?)\\s*```', ai_response)
  393. if json_match:
  394. json_str = json_match.group(1)
  395. else:
  396. json_match = re.search(r'\\{[\\s\\S]*\\}', ai_response)
  397. json_str = json_match.group(0) if json_match else '{}'
  398. try:
  399. data = json.loads(json_str)
  400. except Exception:
  401. try:
  402. data = ast.literal_eval(json_str) if json_str and json_str != '{}' else {}
  403. if not isinstance(data, dict):
  404. data = {}
  405. except Exception:
  406. data = {}
  407. # 兼容中文 key / 非标准结构
  408. status_hint = str(
  409. data.get("status")
  410. or data.get("状态")
  411. or ""
  412. ).strip()
  413. status_raw = status_hint.lower()
  414. if (
  415. status_raw in ["complete", "completed", "success", "done", "finished", "ready"]
  416. or any(k in status_hint for k in ["完成", "成功", "可发布", "已上传"])
  417. ):
  418. status = "completed"
  419. elif (
  420. status_raw in ["uploading", "processing", "in_progress", "progress", "running"]
  421. or any(k in status_hint for k in ["上传中", "处理中", "转码", "进行中", "上传"])
  422. ):
  423. status = "uploading"
  424. elif (
  425. status_raw in ["failed", "error", "fail"]
  426. or any(k in status_hint for k in ["失败", "错误", "异常"])
  427. ):
  428. status = "failed"
  429. else:
  430. status = "unknown"
  431. progress = data.get("progress", data.get("进度", None))
  432. parsed_progress = None
  433. try:
  434. if progress is not None and str(progress).strip() != "":
  435. parsed_progress = max(0, min(100, int(float(progress))))
  436. except Exception:
  437. parsed_progress = None
  438. if parsed_progress is None:
  439. try:
  440. p_match = re.search(r'(\d{1,3})\s*%', ai_response or '')
  441. if p_match:
  442. parsed_progress = max(0, min(100, int(p_match.group(1))))
  443. except Exception:
  444. parsed_progress = None
  445. confidence = 0
  446. try:
  447. confidence = max(0, min(100, int(float(data.get("confidence", data.get("置信度", 0)) or 0))))
  448. except Exception:
  449. confidence = 0
  450. reason = str(data.get("reason", data.get("原因", "")) or "").strip()
  451. should_enter_raw = data.get(
  452. "should_enter_publish_form",
  453. data.get("是否进入发布表单", False)
  454. )
  455. if isinstance(should_enter_raw, bool):
  456. should_enter = should_enter_raw
  457. else:
  458. should_enter_text = str(should_enter_raw or "").strip().lower()
  459. should_enter = should_enter_text in ["true", "1", "yes", "y", "是"]
  460. # 当 AI 响应不是严格 JSON 时,按全文关键词推断
  461. response_text = str(ai_response or "")
  462. response_lower = response_text.lower()
  463. if status == "unknown":
  464. if any(k in response_text for k in ["上传完成", "处理完成", "上传成功", "可发布", "已完成"]):
  465. status = "completed"
  466. elif any(k in response_text for k in ["上传失败", "处理失败", "格式不支持", "文件异常", "失败"]):
  467. status = "failed"
  468. elif any(k in response_text for k in ["上传中", "处理中", "转码中", "请稍候"]) or re.search(r'(\d{1,3})\s*%', response_text):
  469. status = "uploading"
  470. if not should_enter and any(k in response_text for k in ["去发布", "下一步", "继续", "完成编辑"]):
  471. should_enter = True
  472. if not reason and response_text:
  473. reason = response_text.replace("\n", " ").strip()[:120]
  474. if confidence <= 0 and status != "unknown":
  475. confidence = 60
  476. # 二次语义修正
  477. if status == "uploading" and parsed_progress is not None and parsed_progress >= 100:
  478. status = "completed"
  479. should_enter = True
  480. # AI 有时会把 99/100 仍写成 uploading,这里做语义修正
  481. if status == "uploading" and parsed_progress is not None and parsed_progress >= 99 and confidence >= 60:
  482. status = "completed"
  483. should_enter = True
  484. return {
  485. "status": status,
  486. "progress": parsed_progress,
  487. "confidence": confidence,
  488. "reason": reason,
  489. "should_enter_publish_form": should_enter,
  490. }
  491. except Exception as e:
  492. result["reason"] = f"ai-exception:{e}"
  493. return result
  494. async def _extract_bjh_token(self) -> str:
  495. """从页面上下文提取百家号接口 token。"""
  496. if not self.page:
  497. return ""
  498. try:
  499. token = await self.page.evaluate(
  500. """
  501. () => {
  502. const isJwtLike = (v) => {
  503. if (!v || typeof v !== 'string') return false;
  504. const s = v.trim();
  505. if (s.length < 60) return false;
  506. const parts = s.split('.');
  507. if (parts.length !== 3) return false;
  508. return parts.every(p => /^[A-Za-z0-9_-]+$/.test(p) && p.length > 10);
  509. };
  510. const pickFromStorage = (storage) => {
  511. try {
  512. const keys = Object.keys(storage || {});
  513. for (const k of keys) {
  514. const v = storage.getItem(k);
  515. if (isJwtLike(v)) return v;
  516. }
  517. } catch {}
  518. return "";
  519. };
  520. let t = pickFromStorage(window.localStorage);
  521. if (t) return t;
  522. t = pickFromStorage(window.sessionStorage);
  523. if (t) return t;
  524. const meta = document.querySelector('meta[name="token"], meta[name="bjh-token"]');
  525. const metaToken = meta && meta.getAttribute('content');
  526. if (isJwtLike(metaToken)) return metaToken;
  527. const candidates = [
  528. (window.__INITIAL_STATE__ && window.__INITIAL_STATE__.token) || "",
  529. (window.__PRELOADED_STATE__ && window.__PRELOADED_STATE__.token) || "",
  530. (window.__NUXT__ && window.__NUXT__.state && window.__NUXT__.state.token) || "",
  531. ];
  532. for (const c of candidates) {
  533. if (isJwtLike(c)) return c;
  534. }
  535. return "";
  536. }
  537. """
  538. )
  539. if token:
  540. return str(token)
  541. except Exception:
  542. pass
  543. try:
  544. import re
  545. html = await self.page.content()
  546. m = re.search(r'([A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,})', html)
  547. if m:
  548. return m.group(1)
  549. except Exception:
  550. pass
  551. return ""
  552. async def _verify_publish_from_content_page(self, expected_title: str, page_size: int = 20) -> bool:
  553. """
  554. 到内容管理页调用列表接口,按标题二次确认是否已发布。
  555. """
  556. if not self.page:
  557. return False
  558. try:
  559. content_url = (
  560. "https://baijiahao.baidu.com/builder/rc/content"
  561. f"?currentPage=1&pageSize={int(page_size)}"
  562. "&search=&type=&collection=&startDate=&endDate="
  563. )
  564. await self.page.goto(content_url, wait_until="domcontentloaded", timeout=60000)
  565. await asyncio.sleep(2)
  566. token = await self._extract_bjh_token()
  567. expected = (expected_title or "").strip()
  568. if not expected:
  569. return False
  570. fetch_result = await self.page.evaluate(
  571. """
  572. async ({ token, pageSize }) => {
  573. const url =
  574. "https://baijiahao.baidu.com/pcui/article/lists" +
  575. "?currentPage=1" +
  576. `&pageSize=${pageSize}` +
  577. "&search=&type=&collection=&startDate=&endDate=" +
  578. "&clearBeforeFetch=false&dynamic=1";
  579. const r = await fetch(url, {
  580. method: "GET",
  581. credentials: "include",
  582. headers: {
  583. "accept": "application/json, text/plain, */*",
  584. ...(token ? { token } : {}),
  585. },
  586. });
  587. const text = await r.text();
  588. return { ok: r.ok, status: r.status, text };
  589. }
  590. """,
  591. {"token": token, "pageSize": int(page_size)}
  592. )
  593. if not fetch_result or not fetch_result.get("ok"):
  594. status = fetch_result.get("status") if isinstance(fetch_result, dict) else "unknown"
  595. print(f"[{self.platform_name}] 内容页校验接口失败: HTTP {status}")
  596. return False
  597. data = json.loads(fetch_result.get("text") or "{}")
  598. if data.get("errno") != 0:
  599. print(f"[{self.platform_name}] 内容页校验接口错误: errno={data.get('errno')}, msg={data.get('errmsg')}")
  600. return False
  601. items = ((data.get("data") or {}).get("list") or [])
  602. if not isinstance(items, list) or not items:
  603. print(f"[{self.platform_name}] 内容页校验:当前列表为空")
  604. return False
  605. # 标题匹配采用“全量相等 + 前缀包含”双策略,兼容平台侧自动截断。
  606. expected_variants = {expected}
  607. if len(expected) > 12:
  608. expected_variants.add(expected[:12])
  609. if len(expected) > 20:
  610. expected_variants.add(expected[:20])
  611. for item in items:
  612. title = str(item.get("title") or "").strip()
  613. if not title:
  614. continue
  615. for needle in expected_variants:
  616. if needle and (title == needle or needle in title):
  617. print(f"[{self.platform_name}] 内容页校验命中标题: {title}")
  618. return True
  619. print(f"[{self.platform_name}] 内容页校验未命中标题,expected={expected}")
  620. return False
  621. except Exception as e:
  622. print(f"[{self.platform_name}] 内容页校验异常: {e}")
  623. return False
  624. async def publish(self, cookies: str, params: PublishParams) -> PublishResult:
  625. """发布视频到百家号"""
  626. import os
  627. import re
  628. import shutil
  629. print(f"\n{'='*60}")
  630. print(f"[{self.platform_name}] 开始发布视频")
  631. print(f"[{self.platform_name}] 视频路径: {params.video_path}")
  632. print(f"[{self.platform_name}] 标题: {params.title}")
  633. print(f"[{self.platform_name}] 描述: {(params.description or '')[:120]}")
  634. print(f"[{self.platform_name}] Headless: {self.headless}")
  635. print(f"{'='*60}")
  636. self.report_progress(5, "正在初始化浏览器...")
  637. # 初始化浏览器
  638. await self.init_browser()
  639. print(f"[{self.platform_name}] 浏览器初始化完成")
  640. # 解析并设置 cookies
  641. cookie_list = self.parse_cookies(cookies)
  642. print(f"[{self.platform_name}] 解析到 {len(cookie_list)} 个 cookies")
  643. await self.set_cookies(cookie_list)
  644. if not self.page:
  645. raise Exception("Page not initialized")
  646. # 检查视频文件
  647. if not os.path.exists(params.video_path):
  648. raise Exception(f"视频文件不存在: {params.video_path}")
  649. print(f"[{self.platform_name}] 视频文件存在,大小: {os.path.getsize(params.video_path)} bytes")
  650. # 关键兜底:百家号在标题框不可编辑时会将“文件名主干”作为默认标题。
  651. # 因此上传前为视频创建“标题别名文件”(优先硬链接,失败再复制),确保默认标题可控。
  652. upload_video_path = params.video_path
  653. try:
  654. raw_title = (params.title or "").strip()
  655. if raw_title:
  656. safe_title = re.sub(r'[<>:"/\\\\|?*\\x00-\\x1F]', '', raw_title)
  657. safe_title = re.sub(r'\\s+', ' ', safe_title).strip().rstrip('.')
  658. if not safe_title:
  659. safe_title = "video"
  660. safe_title = safe_title[:30]
  661. src_ext = os.path.splitext(params.video_path)[1] or ".mp4"
  662. alias_dir = os.path.join(os.path.dirname(params.video_path), "_bjh_upload_alias")
  663. os.makedirs(alias_dir, exist_ok=True)
  664. # 轻量清理:删除 24h 前的旧别名文件,避免长期累积
  665. try:
  666. now_ts = datetime.now().timestamp()
  667. for fn in os.listdir(alias_dir):
  668. full = os.path.join(alias_dir, fn)
  669. if not os.path.isfile(full):
  670. continue
  671. if now_ts - os.path.getmtime(full) > 24 * 3600:
  672. try:
  673. os.remove(full)
  674. except Exception:
  675. pass
  676. except Exception:
  677. pass
  678. alias_name = f"{safe_title}{src_ext}"
  679. alias_path = os.path.join(alias_dir, alias_name)
  680. if os.path.abspath(alias_path) != os.path.abspath(params.video_path):
  681. if os.path.exists(alias_path):
  682. try:
  683. os.remove(alias_path)
  684. except Exception:
  685. pass
  686. try:
  687. os.link(params.video_path, alias_path)
  688. upload_video_path = alias_path
  689. print(f"[{self.platform_name}] 上传别名已创建(硬链接): {upload_video_path}")
  690. except Exception:
  691. shutil.copy2(params.video_path, alias_path)
  692. upload_video_path = alias_path
  693. print(f"[{self.platform_name}] 上传别名已创建(复制): {upload_video_path}")
  694. except Exception as e:
  695. upload_video_path = params.video_path
  696. print(f"[{self.platform_name}] 创建上传别名失败,回退原文件: {e}")
  697. self.report_progress(10, "正在打开上传页面...")
  698. # 访问视频发布页面(使用新视频发布界面)
  699. video_publish_url = "https://baijiahao.baidu.com/builder/rc/edit?type=videoV2&is_from_cms=1"
  700. await self.page.goto(video_publish_url, wait_until="domcontentloaded", timeout=60000)
  701. await asyncio.sleep(3)
  702. # 检查是否跳转到登录页
  703. current_url = self.page.url
  704. print(f"[{self.platform_name}] 当前页面: {current_url}")
  705. for indicator in self.login_indicators:
  706. if indicator in current_url:
  707. screenshot_base64 = await self.capture_screenshot()
  708. return PublishResult(
  709. success=False,
  710. platform=self.platform_name,
  711. error="Cookie 已过期,需要重新登录",
  712. need_captcha=True,
  713. captcha_type='login',
  714. screenshot_base64=screenshot_base64,
  715. page_url=current_url,
  716. status='need_captcha'
  717. )
  718. # 使用 AI 检查验证码
  719. ai_captcha = await self.ai_check_captcha()
  720. if ai_captcha['has_captcha']:
  721. print(f"[{self.platform_name}] AI检测到验证码: {ai_captcha['captcha_type']}", flush=True)
  722. screenshot_base64 = await self.capture_screenshot()
  723. return PublishResult(
  724. success=False,
  725. platform=self.platform_name,
  726. error=f"检测到{ai_captcha['captcha_type']}验证码,需要使用有头浏览器完成验证",
  727. need_captcha=True,
  728. captcha_type=ai_captcha['captcha_type'],
  729. screenshot_base64=screenshot_base64,
  730. page_url=current_url,
  731. status='need_captcha'
  732. )
  733. # 传统方式检查验证码
  734. captcha_result = await self.check_captcha()
  735. if captcha_result['need_captcha']:
  736. screenshot_base64 = await self.capture_screenshot()
  737. return PublishResult(
  738. success=False,
  739. platform=self.platform_name,
  740. error=f"需要{captcha_result['captcha_type']}验证码,请使用有头浏览器完成验证",
  741. need_captcha=True,
  742. captcha_type=captcha_result['captcha_type'],
  743. screenshot_base64=screenshot_base64,
  744. page_url=current_url,
  745. status='need_captcha'
  746. )
  747. self.report_progress(15, "正在选择视频文件...")
  748. # 等待页面加载完成
  749. await asyncio.sleep(2)
  750. # 关闭可能的弹窗
  751. try:
  752. close_buttons = [
  753. 'button:has-text("我知道了")',
  754. 'button:has-text("知道了")',
  755. '[class*="close"]',
  756. '[class*="modal-close"]',
  757. ]
  758. for btn_selector in close_buttons:
  759. try:
  760. btn = self.page.locator(btn_selector).first
  761. if await btn.count() > 0 and await btn.is_visible():
  762. await btn.click()
  763. await asyncio.sleep(0.5)
  764. except:
  765. pass
  766. except:
  767. pass
  768. # 上传视频 - 尝试多种方式
  769. upload_triggered = False
  770. # 方法1: 直接通过 file input 上传
  771. try:
  772. file_inputs = await self.page.query_selector_all('input[type="file"]')
  773. print(f"[{self.platform_name}] 找到 {len(file_inputs)} 个文件输入")
  774. for file_input in file_inputs:
  775. try:
  776. await file_input.set_input_files(upload_video_path)
  777. upload_triggered = True
  778. print(f"[{self.platform_name}] 通过 file input 上传成功")
  779. break
  780. except Exception as e:
  781. print(f"[{self.platform_name}] file input 上传失败: {e}")
  782. except Exception as e:
  783. print(f"[{self.platform_name}] 查找 file input 失败: {e}")
  784. # 方法2: 点击上传区域
  785. if not upload_triggered:
  786. upload_selectors = [
  787. 'div[class*="upload-box"]',
  788. 'div[class*="drag-upload"]',
  789. 'div[class*="uploader"]',
  790. 'div:has-text("点击上传")',
  791. 'div:has-text("选择文件")',
  792. '[class*="upload-area"]',
  793. ]
  794. for selector in upload_selectors:
  795. if upload_triggered:
  796. break
  797. try:
  798. upload_area = self.page.locator(selector).first
  799. if await upload_area.count() > 0:
  800. print(f"[{self.platform_name}] 尝试点击上传区域: {selector}")
  801. async with self.page.expect_file_chooser(timeout=10000) as fc_info:
  802. await upload_area.click()
  803. file_chooser = await fc_info.value
  804. await file_chooser.set_files(upload_video_path)
  805. upload_triggered = True
  806. print(f"[{self.platform_name}] 通过点击上传区域成功")
  807. break
  808. except Exception as e:
  809. print(f"[{self.platform_name}] 选择器 {selector} 失败: {e}")
  810. if not upload_triggered:
  811. screenshot_base64 = await self.capture_screenshot()
  812. return PublishResult(
  813. success=False,
  814. platform=self.platform_name,
  815. error="未找到上传入口",
  816. screenshot_base64=screenshot_base64,
  817. page_url=await self.get_page_url(),
  818. status='failed'
  819. )
  820. self.report_progress(20, "等待视频上传...")
  821. # 等待视频上传完成(百家号大文件+处理可能较慢)
  822. upload_timeout = 900
  823. start_time = asyncio.get_event_loop().time()
  824. last_heartbeat_time = start_time
  825. last_signal_time = start_time
  826. last_stall_log_time = start_time
  827. last_ai_upload_check_time = start_time - 60
  828. ai_upload_check_interval = 20
  829. ai_upload_poll_count = 0
  830. ai_upload_unknown_streak = 0
  831. last_pct = -1
  832. forced_continue_after = 180 # 无进度信号时,3 分钟后执行兜底继续
  833. processing_since = None
  834. processing_selector_hit = ""
  835. processing_stale_continue_after = 300 # 处理态持续 5 分钟仍无明确变化,执行兜底继续
  836. has_progress_signal = False
  837. progress_signal_lost_continue_after = 90 # 已看到进度后,若信号中断 90s,直接进入下一步
  838. hard_cutover_signal_gap_after = 120 # 已出现过进度后,信号中断超过该值则硬切下一阶段
  839. hard_cutover_elapsed_after = 210 # 上传总耗时超过该值时,硬切下一阶段
  840. async def _attempt_enter_publish_form_from_upload(stage: str) -> bool:
  841. enter_selectors = [
  842. 'button:has-text("去发布")',
  843. '[role="button"]:has-text("去发布")',
  844. 'button:has-text("发布视频")',
  845. '[role="button"]:has-text("发布视频")',
  846. 'button:has-text("下一步")',
  847. '[role="button"]:has-text("下一步")',
  848. 'button:has-text("继续")',
  849. '[role="button"]:has-text("继续")',
  850. 'button:has-text("完成编辑")',
  851. '[role="button"]:has-text("完成编辑")',
  852. '[class*="next"] button',
  853. '[class*="step"] button',
  854. ]
  855. blocked_exact = {"发布", "定时发布", "立即发布", "取消", "返回", "关闭"}
  856. blocked_contains = ["定时发布", "立即发布", "取消", "返回", "关闭", "删除", "重传", "重新上传", "清空"]
  857. for selector in enter_selectors:
  858. try:
  859. btns = self.page.locator(selector)
  860. count = await btns.count()
  861. for idx in range(min(count, 6)):
  862. btn = btns.nth(idx)
  863. if not await btn.is_visible():
  864. continue
  865. text = (await btn.text_content() or "").strip()
  866. compact = re.sub(r"\s+", "", text)
  867. if compact in blocked_exact or any(w in compact for w in blocked_contains):
  868. continue
  869. disabled_attr = await btn.get_attribute('disabled')
  870. aria_disabled = (await btn.get_attribute('aria-disabled') or '').lower()
  871. if disabled_attr is not None or aria_disabled == 'true':
  872. continue
  873. try:
  874. await btn.scroll_into_view_if_needed(timeout=1200)
  875. except Exception:
  876. pass
  877. try:
  878. await btn.click(timeout=2500)
  879. except Exception:
  880. await btn.click(force=True, timeout=2500)
  881. print(f"[{self.platform_name}] 上传阶段尝试切换到发布表单: stage={stage}, selector={selector}, text={compact or text}, idx={idx}")
  882. await asyncio.sleep(1)
  883. return True
  884. except Exception:
  885. pass
  886. # 深层 DOM 兜底(含 shadowRoot),应对常规选择器无法命中
  887. try:
  888. deep_clicked = await self.page.evaluate(
  889. """
  890. () => {
  891. const wanted = ['去发布', '发布视频', '下一步', '继续', '完成编辑'];
  892. const blockedExact = new Set(['发布', '定时发布', '立即发布', '取消', '返回', '关闭']);
  893. const blockedContains = ['定时发布', '立即发布', '取消', '返回', '关闭', '删除', '重传', '重新上传', '清空'];
  894. const roots = [document];
  895. const visited = new Set();
  896. const allNodes = [];
  897. while (roots.length) {
  898. const root = roots.pop();
  899. if (!root || visited.has(root)) continue;
  900. visited.add(root);
  901. const nodes = root.querySelectorAll('*');
  902. for (const n of nodes) {
  903. allNodes.push(n);
  904. if (n && n.shadowRoot) roots.push(n.shadowRoot);
  905. }
  906. }
  907. const isVisible = (el) => {
  908. try {
  909. const style = window.getComputedStyle(el);
  910. if (style.display === 'none' || style.visibility === 'hidden' || style.pointerEvents === 'none') return false;
  911. const rect = el.getBoundingClientRect();
  912. return !!rect && rect.width > 8 && rect.height > 8;
  913. } catch {
  914. return false;
  915. }
  916. };
  917. for (const el of allNodes) {
  918. const text = String(el.innerText || el.textContent || '').replace(/\\s+/g, '').trim();
  919. if (!text) continue;
  920. if (blockedExact.has(text)) continue;
  921. if (blockedContains.some(x => text.includes(x))) continue;
  922. if (!wanted.some(x => text.includes(x))) continue;
  923. if (!isVisible(el)) continue;
  924. const tag = String(el.tagName || '').toLowerCase();
  925. const role = String(el.getAttribute && el.getAttribute('role') || '').toLowerCase();
  926. const cls = String(el.className || '').toLowerCase();
  927. const clickable = tag === 'button' || tag === 'a' || role === 'button' || /btn|button|next|step/.test(cls);
  928. if (!clickable) continue;
  929. try {
  930. el.click();
  931. return { ok: true, text };
  932. } catch {}
  933. }
  934. return { ok: false, text: '' };
  935. }
  936. """
  937. )
  938. if deep_clicked and deep_clicked.get("ok"):
  939. clicked_text = str(deep_clicked.get("text") or "").strip()
  940. print(f"[{self.platform_name}] 上传阶段深层DOM切换发布表单成功: stage={stage}, text={clicked_text}")
  941. await asyncio.sleep(1.2)
  942. return True
  943. except Exception:
  944. pass
  945. return False
  946. while asyncio.get_event_loop().time() - start_time < upload_timeout:
  947. now = asyncio.get_event_loop().time()
  948. elapsed = int(now - start_time)
  949. status_parts = []
  950. # 检查上传进度
  951. pct = None
  952. try:
  953. progress_nodes = self.page.locator('[class*="progress"], [class*="percent"], div:has-text("%"), span:has-text("%")')
  954. node_count = await progress_nodes.count()
  955. for idx in range(min(node_count, 6)):
  956. text = await progress_nodes.nth(idx).text_content()
  957. if not text:
  958. continue
  959. match = re.search(r'(\d{1,3})\s*%', text)
  960. if match:
  961. pct = max(0, min(100, int(match.group(1))))
  962. break
  963. except Exception:
  964. pass
  965. if pct is not None:
  966. status_parts.append(f"progress={pct}%")
  967. last_signal_time = now
  968. has_progress_signal = True
  969. if pct != last_pct:
  970. self.report_progress(20 + min(35, int(pct * 0.35)), f"视频上传中 {pct}%...")
  971. last_pct = pct
  972. if pct >= 100:
  973. print(f"[{self.platform_name}] 上传完成(进度达到 100%)")
  974. break
  975. # 明确的上传完成提示
  976. upload_done = False
  977. upload_done_selectors = [
  978. 'div:has-text("上传完成")',
  979. 'div:has-text("处理完成")',
  980. 'div:has-text("上传成功")',
  981. 'span:has-text("上传完成")',
  982. '[class*="upload-success"]',
  983. ]
  984. try:
  985. for selector in upload_done_selectors:
  986. loc = self.page.locator(selector).first
  987. if await loc.count() > 0 and await loc.is_visible():
  988. upload_done = True
  989. print(f"[{self.platform_name}] 检测到上传完成提示: {selector}")
  990. break
  991. except Exception:
  992. pass
  993. if upload_done:
  994. last_signal_time = now
  995. break
  996. # 检查处理态
  997. is_processing = False
  998. processing_selectors = [
  999. 'div:has-text("上传中")',
  1000. 'span:has-text("上传中")',
  1001. 'div:has-text("处理中")',
  1002. 'span:has-text("处理中")',
  1003. 'div:has-text("转码中")',
  1004. 'span:has-text("转码中")',
  1005. 'div:has-text("请稍候")',
  1006. 'span:has-text("请稍候")',
  1007. 'div:has-text("正在上传")',
  1008. 'div:has-text("正在处理")',
  1009. 'text="上传中"',
  1010. 'text="处理中"',
  1011. ]
  1012. try:
  1013. for selector in processing_selectors:
  1014. loc = self.page.locator(selector).first
  1015. if await loc.count() > 0 and await loc.is_visible():
  1016. is_processing = True
  1017. processing_selector_hit = selector
  1018. break
  1019. except Exception:
  1020. pass
  1021. if is_processing:
  1022. if processing_since is None:
  1023. processing_since = now
  1024. processing_elapsed = int(now - processing_since)
  1025. status_parts.append(f"processing={processing_elapsed}s")
  1026. if processing_selector_hit:
  1027. status_parts.append(f"by={processing_selector_hit}")
  1028. # 处理态短时间内视为有效信号;超过阈值后不再持续刷新 signal_gap,避免卡死
  1029. if processing_elapsed <= 180:
  1030. last_signal_time = now
  1031. else:
  1032. processing_since = None
  1033. processing_selector_hit = ""
  1034. # 检查是否出现标题输入框(部分页面会在上传阶段就显示,需结合时间/处理态判断)
  1035. title_input_visible = False
  1036. try:
  1037. title_input = self.page.locator('input[placeholder*="标题"], textarea[placeholder*="标题"], [class*="title-input"] input').first
  1038. title_input_visible = await title_input.count() > 0 and await title_input.is_visible()
  1039. except Exception:
  1040. title_input_visible = False
  1041. if title_input_visible and (
  1042. (not is_processing and elapsed >= 45) or
  1043. (processing_since is not None and (now - processing_since) >= 180) or
  1044. elapsed >= 360
  1045. ):
  1046. print(f"[{self.platform_name}] 检测到可编辑标题,继续后续步骤")
  1047. break
  1048. # 检查是否有错误提示
  1049. error_text = ''
  1050. try:
  1051. error_nodes = self.page.locator('[class*="error"], [class*="fail"], div:has-text("上传失败"), div:has-text("处理失败")')
  1052. err_count = await error_nodes.count()
  1053. for idx in range(min(err_count, 6)):
  1054. txt = (await error_nodes.nth(idx).text_content() or '').strip()
  1055. if txt and any(k in txt for k in ['失败', '错误', '异常', '中断']):
  1056. error_text = txt
  1057. break
  1058. except Exception:
  1059. error_text = ''
  1060. if error_text:
  1061. screenshot_base64 = await self.capture_screenshot()
  1062. return PublishResult(
  1063. success=False,
  1064. platform=self.platform_name,
  1065. error=f"上传失败: {error_text}",
  1066. screenshot_base64=screenshot_base64,
  1067. page_url=await self.get_page_url(),
  1068. status='failed'
  1069. )
  1070. # AI 上传状态判定(节流),用于弥补 DOM/文案信号缺失
  1071. should_run_ai_upload_check = (now - last_ai_upload_check_time) >= ai_upload_check_interval
  1072. if should_run_ai_upload_check:
  1073. ai_upload_poll_count += 1
  1074. ai_upload_state = await self._ai_analyze_upload_state()
  1075. last_ai_upload_check_time = now
  1076. ai_status = str(ai_upload_state.get("status") or "unknown").strip().lower()
  1077. ai_progress = ai_upload_state.get("progress")
  1078. ai_confidence = int(ai_upload_state.get("confidence") or 0)
  1079. ai_reason = str(ai_upload_state.get("reason") or "").strip()
  1080. ai_should_enter_form = bool(ai_upload_state.get("should_enter_publish_form"))
  1081. print(
  1082. f"[{self.platform_name}] AI上传轮询#{ai_upload_poll_count}: elapsed={elapsed}s, "
  1083. f"status={ai_status}, progress={ai_progress}, confidence={ai_confidence}, "
  1084. f"enter_form={ai_should_enter_form}, reason={ai_reason or '-'}"
  1085. )
  1086. if ai_status == "unknown":
  1087. ai_upload_unknown_streak += 1
  1088. else:
  1089. ai_upload_unknown_streak = 0
  1090. if ai_status == "failed":
  1091. screenshot_base64 = await self.capture_screenshot()
  1092. return PublishResult(
  1093. success=False,
  1094. platform=self.platform_name,
  1095. error=f"上传失败(AI判定): {ai_reason or '检测到上传失败信号'}",
  1096. screenshot_base64=screenshot_base64,
  1097. page_url=await self.get_page_url(),
  1098. status='failed'
  1099. )
  1100. if ai_status == "completed":
  1101. if ai_should_enter_form:
  1102. await _attempt_enter_publish_form_from_upload("ai-completed")
  1103. print(f"[{self.platform_name}] AI判定上传已完成,进入下一阶段")
  1104. last_signal_time = now
  1105. break
  1106. if ai_status == "uploading":
  1107. has_progress_signal = True
  1108. last_signal_time = now
  1109. if isinstance(ai_progress, (int, float)):
  1110. ai_pct = max(0, min(100, int(ai_progress)))
  1111. status_parts.append(f"ai-progress={ai_pct}%")
  1112. if ai_pct != last_pct and ai_pct > 0:
  1113. self.report_progress(20 + min(35, int(ai_pct * 0.35)), f"视频上传中 {ai_pct}%...")
  1114. last_pct = ai_pct
  1115. if ai_pct >= 99 and ai_confidence >= 60:
  1116. if ai_should_enter_form:
  1117. await _attempt_enter_publish_form_from_upload("ai-upload-99")
  1118. print(f"[{self.platform_name}] AI判定上传接近完成,进入下一阶段")
  1119. break
  1120. else:
  1121. status_parts.append("ai=uploading")
  1122. if ai_should_enter_form and elapsed >= 60:
  1123. await _attempt_enter_publish_form_from_upload("ai-uploading-enter-form")
  1124. elif ai_status == "unknown" and ai_should_enter_form and elapsed >= 60:
  1125. await _attempt_enter_publish_form_from_upload("ai-unknown-enter-form")
  1126. elif ai_status == "unknown" and ai_upload_unknown_streak >= 3 and elapsed >= 90:
  1127. await _attempt_enter_publish_form_from_upload("ai-unknown-streak")
  1128. # 心跳日志,便于定位“卡住”
  1129. if now - last_heartbeat_time >= 15:
  1130. signal_gap = int(now - last_signal_time)
  1131. extra = ", ".join(status_parts) if status_parts else "no-visible-signal"
  1132. print(f"[{self.platform_name}] 上传等待中: elapsed={elapsed}s, signal_gap={signal_gap}s, {extra}")
  1133. last_heartbeat_time = now
  1134. # 已经出现过进度后,如果进度信号中断较久,进入下一步兜底
  1135. dynamic_signal_lost_after = progress_signal_lost_continue_after
  1136. if last_pct >= 95:
  1137. # 95%+ 阶段可能有短暂静默,适度放宽
  1138. dynamic_signal_lost_after = max(progress_signal_lost_continue_after, 150)
  1139. elif last_pct >= 80:
  1140. # 中后段进度(80%+)可能进入转码/校验静默期,但不应无限等待
  1141. dynamic_signal_lost_after = max(progress_signal_lost_continue_after, 150)
  1142. elif last_pct >= 60:
  1143. dynamic_signal_lost_after = max(progress_signal_lost_continue_after, 120)
  1144. if has_progress_signal and (now - last_signal_time) >= dynamic_signal_lost_after:
  1145. signal_gap = int(now - last_signal_time)
  1146. if last_pct >= 95 or title_input_visible or elapsed >= max(780, upload_timeout - 60):
  1147. print(f"[{self.platform_name}] 上传进度信号中断过久({signal_gap}s>={dynamic_signal_lost_after}s),继续后续步骤(兜底)")
  1148. break
  1149. if (last_pct >= 70 and signal_gap >= hard_cutover_signal_gap_after) or elapsed >= hard_cutover_elapsed_after:
  1150. await _attempt_enter_publish_form_from_upload("hard-cutover-signal")
  1151. print(f"[{self.platform_name}] 上传长时间无新信号,执行硬切换到标题阶段: elapsed={elapsed}s, signal_gap={signal_gap}s, last_pct={last_pct}")
  1152. break
  1153. if now - last_stall_log_time >= 30:
  1154. print(f"[{self.platform_name}] 上传信号中断({signal_gap}s)但进度不足/标题未就绪,继续等待上传完成...")
  1155. last_stall_log_time = now
  1156. # 额外硬切策略:出现过中后段进度但长时间无新增信号时,不再继续卡住
  1157. if has_progress_signal and last_pct >= 70 and (now - last_signal_time) >= hard_cutover_signal_gap_after:
  1158. signal_gap = int(now - last_signal_time)
  1159. await _attempt_enter_publish_form_from_upload("hard-cutover-progress")
  1160. print(f"[{self.platform_name}] 中后段上传信号停滞,强制切换到标题阶段: elapsed={elapsed}s, signal_gap={signal_gap}s, last_pct={last_pct}")
  1161. break
  1162. # 从未出现可见进度信号时,不再长时间卡在 20%
  1163. if (not has_progress_signal) and elapsed >= forced_continue_after and (now - last_signal_time) >= 120:
  1164. if title_input_visible or elapsed >= max(600, upload_timeout - 90):
  1165. print(f"[{self.platform_name}] 上传阶段长时间无可见进度信号,继续后续步骤(兜底)")
  1166. break
  1167. if elapsed >= 480:
  1168. await _attempt_enter_publish_form_from_upload("hard-cutover-no-signal")
  1169. print(f"[{self.platform_name}] 上传持续无可见信号,执行硬切换到标题阶段: elapsed={elapsed}s")
  1170. break
  1171. if now - last_stall_log_time >= 30:
  1172. print(f"[{self.platform_name}] 上传暂无可见信号且标题未就绪,继续等待...")
  1173. last_stall_log_time = now
  1174. # 处理态持续过久时兜底继续,避免固定 DOM 文案导致无限等待
  1175. if processing_since is not None and (now - processing_since) >= processing_stale_continue_after:
  1176. if last_pct >= 95 or title_input_visible or elapsed >= max(780, upload_timeout - 60):
  1177. print(f"[{self.platform_name}] 上传阶段处理态持续过久,继续后续步骤(兜底)")
  1178. break
  1179. if elapsed >= hard_cutover_elapsed_after:
  1180. await _attempt_enter_publish_form_from_upload("hard-cutover-processing")
  1181. print(f"[{self.platform_name}] 处理态持续过久且总耗时较长,执行硬切换到标题阶段: elapsed={elapsed}s")
  1182. break
  1183. if now - last_stall_log_time >= 30:
  1184. print(f"[{self.platform_name}] 处理态持续较久但标题未就绪,继续等待上传收尾...")
  1185. last_stall_log_time = now
  1186. await asyncio.sleep(3)
  1187. self.report_progress(60, "正在填写标题...")
  1188. await asyncio.sleep(2)
  1189. # 填写标题(严格校验写入结果,避免填错输入框)
  1190. desired_title = (params.title or "").strip()[:30] # 百家号标题限制 30 字
  1191. video_stem = os.path.splitext(os.path.basename(params.video_path or ""))[0].strip().lower()
  1192. def _normalize_title_for_match(value: str) -> str:
  1193. v = re.sub(r"\s+", "", str(value or "")).strip().lower()
  1194. v = re.sub(r"[`~!@#$%^&*()_+=\[\]{}\\|;:'\",.<>/?,。!?;:、()【】《》\-\u3000]", "", v)
  1195. return v
  1196. def _looks_like_non_title_value(value: str) -> bool:
  1197. raw = str(value or "").strip()
  1198. if not raw:
  1199. return True
  1200. compact = raw.lower()
  1201. # 典型 UUID(平台内部资源ID/文件名)
  1202. if re.fullmatch(r"[0-9a-f]{8}-[0-9a-f]{4}-[1-5]?[0-9a-f]{3}-[89ab]?[0-9a-f]{3}-[0-9a-f]{12}", compact):
  1203. return True
  1204. # 纯英文数字/连接符且较长,通常是资源ID而不是标题
  1205. if len(compact) >= 24 and re.fullmatch(r"[a-z0-9_-]+", compact):
  1206. return True
  1207. # 与视频文件名主干一致时,视为误填
  1208. if video_stem and compact == video_stem:
  1209. return True
  1210. # 文件路径或带扩展名文本,视为误填
  1211. if "\\" in raw or "/" in raw:
  1212. return True
  1213. if re.search(r"\.(mp4|mov|avi|mkv|wmv|flv|m4v)$", compact):
  1214. return True
  1215. return False
  1216. def _title_matches_expected(current_value: str) -> bool:
  1217. if not desired_title:
  1218. return False
  1219. current = str(current_value or "").strip()
  1220. if not current:
  1221. return False
  1222. if _looks_like_non_title_value(current):
  1223. return False
  1224. expected_norm = _normalize_title_for_match(desired_title)
  1225. current_norm = _normalize_title_for_match(current)
  1226. if not expected_norm or not current_norm:
  1227. return False
  1228. if expected_norm == current_norm:
  1229. return True
  1230. if len(expected_norm) >= 4 and (expected_norm in current_norm or current_norm in expected_norm):
  1231. return True
  1232. prefix_len = min(8, len(expected_norm))
  1233. if prefix_len >= 4 and expected_norm[:prefix_len] in current_norm:
  1234. return True
  1235. return False
  1236. title_filled = False
  1237. title_verified_value = ""
  1238. title_failure_reason = ""
  1239. title_selectors = [
  1240. 'input[placeholder*="标题"]',
  1241. 'textarea[placeholder*="标题"]',
  1242. 'input[aria-label*="标题"]',
  1243. 'textarea[aria-label*="标题"]',
  1244. 'input[data-placeholder*="标题"]',
  1245. 'textarea[data-placeholder*="标题"]',
  1246. 'input[name*="title"]',
  1247. 'textarea[name*="title"]',
  1248. 'input[id*="title"]',
  1249. 'textarea[id*="title"]',
  1250. '[class*="title-input"] input',
  1251. '[class*="title"] textarea',
  1252. '[class*="title"] input',
  1253. '[class*="headline"] input',
  1254. '[class*="headline"] textarea',
  1255. '[class*="name"] input',
  1256. '[contenteditable="true"][placeholder*="标题"]',
  1257. '[contenteditable="true"][aria-label*="标题"]',
  1258. '[contenteditable="plaintext-only"][placeholder*="标题"]',
  1259. '[data-placeholder*="标题"][contenteditable="true"]',
  1260. '[class*="title"] [contenteditable="true"]',
  1261. '[role="textbox"][aria-label*="标题"]',
  1262. '[role="textbox"][placeholder*="标题"]',
  1263. ]
  1264. async def _has_editable_title_input() -> bool:
  1265. for frame in self.page.frames:
  1266. for selector in title_selectors:
  1267. try:
  1268. nodes = frame.locator(selector)
  1269. count = await nodes.count()
  1270. for idx in range(min(count, 10)):
  1271. node = nodes.nth(idx)
  1272. if not await node.is_visible():
  1273. continue
  1274. node_type = (await node.get_attribute('type') or '').strip().lower()
  1275. if node_type in ['file', 'hidden', 'checkbox', 'radio', 'button', 'submit']:
  1276. continue
  1277. try:
  1278. if await node.is_disabled():
  1279. continue
  1280. except Exception:
  1281. pass
  1282. return True
  1283. except Exception:
  1284. pass
  1285. # 深层 DOM 检查(含 shadowRoot)
  1286. for frame in self.page.frames:
  1287. try:
  1288. deep_found = await frame.evaluate(
  1289. """
  1290. () => {
  1291. const roots = [document];
  1292. const visited = new Set();
  1293. while (roots.length) {
  1294. const root = roots.pop();
  1295. if (!root || visited.has(root)) continue;
  1296. visited.add(root);
  1297. const nodes = root.querySelectorAll('*');
  1298. for (const n of nodes) {
  1299. if (n && n.shadowRoot) roots.push(n.shadowRoot);
  1300. const tag = String(n.tagName || '').toLowerCase();
  1301. if (!['input', 'textarea'].includes(tag) && String(n.getAttribute && n.getAttribute('contenteditable') || '').toLowerCase() !== 'true' && String(n.getAttribute && n.getAttribute('role') || '').toLowerCase() !== 'textbox') {
  1302. continue;
  1303. }
  1304. const type = String(n.getAttribute && n.getAttribute('type') || '').toLowerCase();
  1305. if (tag === 'input' && ['file', 'hidden', 'checkbox', 'radio', 'button', 'submit'].includes(type)) continue;
  1306. if (n.disabled || n.readOnly) continue;
  1307. const style = window.getComputedStyle(n);
  1308. if (style.display === 'none' || style.visibility === 'hidden') continue;
  1309. const rect = n.getBoundingClientRect();
  1310. if (!rect || rect.width < 8 || rect.height < 8) continue;
  1311. return true;
  1312. }
  1313. }
  1314. return false;
  1315. }
  1316. """
  1317. )
  1318. if deep_found:
  1319. return True
  1320. except Exception:
  1321. pass
  1322. return False
  1323. async def _try_enter_publish_form(stage: str) -> bool:
  1324. action_selectors = [
  1325. 'button:has-text("去发布")',
  1326. '[role="button"]:has-text("去发布")',
  1327. 'button:has-text("发布视频")',
  1328. '[role="button"]:has-text("发布视频")',
  1329. 'button:has-text("下一步")',
  1330. '[role="button"]:has-text("下一步")',
  1331. 'button:has-text("继续")',
  1332. '[role="button"]:has-text("继续")',
  1333. 'button:has-text("完成编辑")',
  1334. '[role="button"]:has-text("完成编辑")',
  1335. '[class*="next"] button',
  1336. '[class*="step"] button',
  1337. ]
  1338. blocked_exact = {"发布", "定时发布", "立即发布", "取消", "返回", "关闭"}
  1339. blocked_contains = ["定时发布", "立即发布", "取消", "返回", "关闭", "删除", "重传", "重新上传", "清空"]
  1340. for frame in self.page.frames:
  1341. frame_url = frame.url or "about:blank"
  1342. for selector in action_selectors:
  1343. try:
  1344. btns = frame.locator(selector)
  1345. btn_count = await btns.count()
  1346. for idx in range(min(btn_count, 6)):
  1347. btn = btns.nth(idx)
  1348. if not await btn.is_visible():
  1349. continue
  1350. text = (await btn.text_content() or "").strip()
  1351. compact = re.sub(r"\s+", "", text)
  1352. if compact in blocked_exact or any(t in compact for t in blocked_contains):
  1353. continue
  1354. disabled_attr = await btn.get_attribute('disabled')
  1355. aria_disabled = (await btn.get_attribute('aria-disabled') or '').lower()
  1356. if disabled_attr is not None or aria_disabled == 'true':
  1357. continue
  1358. try:
  1359. await btn.scroll_into_view_if_needed(timeout=1500)
  1360. except Exception:
  1361. pass
  1362. try:
  1363. await btn.click(timeout=3000)
  1364. except Exception:
  1365. await btn.click(force=True, timeout=3000)
  1366. print(f"[{self.platform_name}] 尝试进入发布表单: stage={stage}, frame={frame_url}, selector={selector}, text={compact or text}, idx={idx}")
  1367. await asyncio.sleep(1.2)
  1368. if await _has_editable_title_input():
  1369. print(f"[{self.platform_name}] 已进入可编辑发布表单: stage={stage}")
  1370. return True
  1371. except Exception:
  1372. pass
  1373. # 深层 DOM 兜底(含 shadowRoot)
  1374. try:
  1375. deep_clicked = await self.page.evaluate(
  1376. """
  1377. () => {
  1378. const wanted = ['去发布', '发布视频', '下一步', '继续', '完成编辑'];
  1379. const blockedExact = new Set(['发布', '定时发布', '立即发布', '取消', '返回', '关闭']);
  1380. const blockedContains = ['定时发布', '立即发布', '取消', '返回', '关闭', '删除', '重传', '重新上传', '清空'];
  1381. const roots = [document];
  1382. const visited = new Set();
  1383. const allNodes = [];
  1384. while (roots.length) {
  1385. const root = roots.pop();
  1386. if (!root || visited.has(root)) continue;
  1387. visited.add(root);
  1388. const nodes = root.querySelectorAll('*');
  1389. for (const n of nodes) {
  1390. allNodes.push(n);
  1391. if (n && n.shadowRoot) roots.push(n.shadowRoot);
  1392. }
  1393. }
  1394. const isVisible = (el) => {
  1395. try {
  1396. const style = window.getComputedStyle(el);
  1397. if (style.display === 'none' || style.visibility === 'hidden' || style.pointerEvents === 'none') return false;
  1398. const rect = el.getBoundingClientRect();
  1399. return !!rect && rect.width > 8 && rect.height > 8;
  1400. } catch {
  1401. return false;
  1402. }
  1403. };
  1404. for (const el of allNodes) {
  1405. const text = String(el.innerText || el.textContent || '').replace(/\\s+/g, '').trim();
  1406. if (!text) continue;
  1407. if (blockedExact.has(text)) continue;
  1408. if (blockedContains.some(x => text.includes(x))) continue;
  1409. if (!wanted.some(x => text.includes(x))) continue;
  1410. if (!isVisible(el)) continue;
  1411. const tag = String(el.tagName || '').toLowerCase();
  1412. const role = String(el.getAttribute && el.getAttribute('role') || '').toLowerCase();
  1413. const cls = String(el.className || '').toLowerCase();
  1414. const clickable = tag === 'button' || tag === 'a' || role === 'button' || /btn|button|next|step/.test(cls);
  1415. if (!clickable) continue;
  1416. try {
  1417. el.click();
  1418. return { ok: true, text };
  1419. } catch {}
  1420. }
  1421. return { ok: false, text: '' };
  1422. }
  1423. """
  1424. )
  1425. if deep_clicked and deep_clicked.get("ok"):
  1426. print(f"[{self.platform_name}] 深层DOM进入发布表单成功: stage={stage}, text={str(deep_clicked.get('text') or '').strip()}")
  1427. await asyncio.sleep(1.2)
  1428. if await _has_editable_title_input():
  1429. print(f"[{self.platform_name}] 已进入可编辑发布表单(深层DOM): stage={stage}")
  1430. return True
  1431. except Exception:
  1432. pass
  1433. return False
  1434. # 先等待可编辑标题框出现,避免上传兜底后立即进入导致误命中 file input
  1435. await _try_enter_publish_form("pre-title")
  1436. title_ready = False
  1437. title_wait_deadline = asyncio.get_event_loop().time() + 180
  1438. last_title_wait_log = 0.0
  1439. last_enter_publish_try = 0.0
  1440. while asyncio.get_event_loop().time() < title_wait_deadline and not title_ready:
  1441. try:
  1442. if await _has_editable_title_input():
  1443. title_ready = True
  1444. break
  1445. except Exception:
  1446. pass
  1447. for frame in self.page.frames:
  1448. if title_ready:
  1449. break
  1450. for selector in title_selectors:
  1451. if title_ready:
  1452. break
  1453. try:
  1454. title_nodes = frame.locator(selector)
  1455. node_count = await title_nodes.count()
  1456. for idx in range(min(node_count, 8)):
  1457. node = title_nodes.nth(idx)
  1458. if not await node.is_visible():
  1459. continue
  1460. node_type = (await node.get_attribute('type') or '').strip().lower()
  1461. if node_type in ['file', 'hidden', 'checkbox', 'radio', 'button', 'submit']:
  1462. continue
  1463. try:
  1464. if await node.is_disabled():
  1465. continue
  1466. except Exception:
  1467. pass
  1468. title_ready = True
  1469. break
  1470. except Exception:
  1471. pass
  1472. if title_ready:
  1473. break
  1474. now_wait = asyncio.get_event_loop().time()
  1475. if now_wait - last_title_wait_log >= 10:
  1476. print(f"[{self.platform_name}] 等待可编辑标题输入框... frames={len(self.page.frames)}")
  1477. last_title_wait_log = now_wait
  1478. if now_wait - last_enter_publish_try >= 15:
  1479. await _try_enter_publish_form("title-wait")
  1480. last_enter_publish_try = now_wait
  1481. await asyncio.sleep(2)
  1482. if not title_ready:
  1483. title_failure_reason = "title-not-ready"
  1484. print(f"[{self.platform_name}] 未检测到明确标题输入框,进入兜底识别模式")
  1485. for frame in self.page.frames:
  1486. if title_filled:
  1487. break
  1488. frame_url = frame.url or "about:blank"
  1489. for selector in title_selectors:
  1490. if title_filled:
  1491. break
  1492. try:
  1493. title_nodes = frame.locator(selector)
  1494. node_count = await title_nodes.count()
  1495. for idx in range(min(node_count, 8)):
  1496. node = title_nodes.nth(idx)
  1497. if not await node.is_visible():
  1498. continue
  1499. node_type = (await node.get_attribute('type') or '').strip().lower()
  1500. if node_type in ['file', 'hidden', 'checkbox', 'radio', 'button', 'submit']:
  1501. continue
  1502. try:
  1503. if await node.is_disabled():
  1504. continue
  1505. except Exception:
  1506. pass
  1507. node_tag = ""
  1508. try:
  1509. node_tag = ((await node.evaluate("el => (el.tagName || '').toLowerCase()")) or "").strip()
  1510. except Exception:
  1511. node_tag = ""
  1512. contenteditable_attr = (await node.get_attribute('contenteditable') or '').strip().lower()
  1513. role_attr = (await node.get_attribute('role') or '').strip().lower()
  1514. is_text_input = node_tag in ['input', 'textarea']
  1515. is_editable_block = contenteditable_attr == 'true' or role_attr == 'textbox'
  1516. try:
  1517. await node.click(timeout=2000)
  1518. except Exception:
  1519. pass
  1520. if is_text_input:
  1521. try:
  1522. await node.fill(desired_title, timeout=5000)
  1523. except Exception:
  1524. try:
  1525. await self.page.keyboard.press("Control+KeyA")
  1526. await self.page.keyboard.press("Backspace")
  1527. await self.page.keyboard.type(desired_title)
  1528. except Exception:
  1529. continue
  1530. elif is_editable_block:
  1531. try:
  1532. await self.page.keyboard.press("Control+KeyA")
  1533. await self.page.keyboard.press("Backspace")
  1534. await self.page.keyboard.type(desired_title)
  1535. except Exception:
  1536. try:
  1537. await node.evaluate(
  1538. """
  1539. (el, title) => {
  1540. el.focus();
  1541. el.textContent = title;
  1542. el.dispatchEvent(new Event('input', { bubbles: true }));
  1543. el.dispatchEvent(new Event('change', { bubbles: true }));
  1544. }
  1545. """,
  1546. desired_title
  1547. )
  1548. except Exception:
  1549. continue
  1550. else:
  1551. continue
  1552. await asyncio.sleep(0.2)
  1553. current_value = ""
  1554. if is_text_input:
  1555. try:
  1556. current_value = (await node.input_value() or "").strip()
  1557. except Exception:
  1558. current_value = ""
  1559. else:
  1560. try:
  1561. current_value = ((await node.evaluate("el => (el.innerText || el.textContent || '')")) or "").strip()
  1562. except Exception:
  1563. current_value = ""
  1564. if _title_matches_expected(current_value):
  1565. title_filled = True
  1566. title_verified_value = current_value
  1567. print(f"[{self.platform_name}] 标题填写成功: frame={frame_url}, selector={selector}, idx={idx}, value={current_value}")
  1568. break
  1569. elif current_value:
  1570. title_failure_reason = "candidate-mismatch"
  1571. # 对同一节点再做一次 JS 强制赋值,处理键盘输入未生效的情况
  1572. forced_value = ""
  1573. try:
  1574. forced_value = (
  1575. (await node.evaluate(
  1576. """
  1577. (el, title) => {
  1578. const tag = String(el.tagName || '').toLowerCase();
  1579. const type = String((el.getAttribute('type') || '')).toLowerCase();
  1580. if (tag === 'input' && ['file', 'hidden', 'checkbox', 'radio', 'button', 'submit'].includes(type)) return '';
  1581. const ce = String(el.getAttribute('contenteditable') || '').toLowerCase();
  1582. const role = String(el.getAttribute('role') || '').toLowerCase();
  1583. const isTextInput = tag === 'input' || tag === 'textarea';
  1584. const isEditableBlock = ce === 'true' || role === 'textbox';
  1585. const emit = () => {
  1586. el.dispatchEvent(new Event('input', { bubbles: true }));
  1587. el.dispatchEvent(new Event('change', { bubbles: true }));
  1588. };
  1589. try { el.focus(); } catch {}
  1590. if (isTextInput) {
  1591. try {
  1592. const proto = tag === 'textarea' ? window.HTMLTextAreaElement.prototype : window.HTMLInputElement.prototype;
  1593. const setter = Object.getOwnPropertyDescriptor(proto, 'value')?.set;
  1594. if (setter) {
  1595. setter.call(el, '');
  1596. emit();
  1597. setter.call(el, title);
  1598. emit();
  1599. } else {
  1600. el.value = '';
  1601. emit();
  1602. el.value = title;
  1603. emit();
  1604. }
  1605. } catch {
  1606. el.value = title;
  1607. emit();
  1608. }
  1609. return String(el.value || '').trim();
  1610. }
  1611. if (isEditableBlock) {
  1612. el.textContent = '';
  1613. emit();
  1614. el.textContent = title;
  1615. emit();
  1616. return String(el.innerText || el.textContent || '').trim();
  1617. }
  1618. return '';
  1619. }
  1620. """,
  1621. desired_title
  1622. )) or ""
  1623. ).strip()
  1624. except Exception:
  1625. forced_value = ""
  1626. if _title_matches_expected(forced_value):
  1627. title_filled = True
  1628. title_verified_value = forced_value
  1629. print(f"[{self.platform_name}] 标题强制写入成功: frame={frame_url}, selector={selector}, idx={idx}, value={forced_value}")
  1630. break
  1631. print(f"[{self.platform_name}] 标题候选值不匹配,已忽略: frame={frame_url}, selector={selector}, idx={idx}, value={current_value}")
  1632. except Exception as e:
  1633. print(f"[{self.platform_name}] 标题选择器失败: frame={frame_url}, selector={selector}, err={e}")
  1634. # 深层 DOM 兜底(含 shadowRoot)
  1635. if not title_filled and desired_title:
  1636. for frame in self.page.frames:
  1637. if title_filled:
  1638. break
  1639. frame_url = frame.url or "about:blank"
  1640. try:
  1641. deep_result = await frame.evaluate(
  1642. """
  1643. (title) => {
  1644. const roots = [document];
  1645. const visited = new Set();
  1646. const candidates = [];
  1647. while (roots.length) {
  1648. const root = roots.pop();
  1649. if (!root || visited.has(root)) continue;
  1650. visited.add(root);
  1651. const nodes = root.querySelectorAll('*');
  1652. for (const n of nodes) {
  1653. if (n && n.shadowRoot) roots.push(n.shadowRoot);
  1654. const tag = String(n.tagName || '').toLowerCase();
  1655. const type = String(n.getAttribute && n.getAttribute('type') || '').toLowerCase();
  1656. const ce = String(n.getAttribute && n.getAttribute('contenteditable') || '').toLowerCase();
  1657. const role = String(n.getAttribute && n.getAttribute('role') || '').toLowerCase();
  1658. const isTextInput = tag === 'input' || tag === 'textarea';
  1659. const isEditableBlock = ce === 'true' || role === 'textbox';
  1660. if (!isTextInput && !isEditableBlock) continue;
  1661. if (tag === 'input' && ['file', 'hidden', 'checkbox', 'radio', 'button', 'submit'].includes(type)) continue;
  1662. if (n.disabled || n.readOnly) continue;
  1663. const style = window.getComputedStyle(n);
  1664. if (style.display === 'none' || style.visibility === 'hidden') continue;
  1665. const rect = n.getBoundingClientRect();
  1666. if (!rect || rect.width < 8 || rect.height < 8) continue;
  1667. const ph = String(n.getAttribute && n.getAttribute('placeholder') || '');
  1668. const aria = String(n.getAttribute && n.getAttribute('aria-label') || '');
  1669. const name = String(n.getAttribute && n.getAttribute('name') || '');
  1670. const id = String(n.getAttribute && n.getAttribute('id') || '');
  1671. const cls = String(n.className || '');
  1672. const maxLen = parseInt(String(n.getAttribute && n.getAttribute('maxlength') || '0'), 10) || 0;
  1673. const container = n.closest && n.closest('label, [class*="form"], [class*="item"], [class*="field"], [class*="title"]');
  1674. const ctx = String((container && container.innerText) || '').slice(0, 80);
  1675. let score = 0;
  1676. if (/标题|title/i.test(ph)) score += 7;
  1677. if (/标题|title/i.test(aria)) score += 6;
  1678. if (/标题|title/i.test(name)) score += 5;
  1679. if (/标题|title/i.test(id)) score += 5;
  1680. if (/title|标题/i.test(cls)) score += 4;
  1681. if (/标题|title/i.test(ctx)) score += 5;
  1682. if (maxLen > 0 && maxLen <= 40) score += 3;
  1683. if (isTextInput) score += 2;
  1684. if (isEditableBlock) score += 1;
  1685. candidates.push({ n, score, isTextInput, isEditableBlock });
  1686. }
  1687. }
  1688. candidates.sort((a, b) => b.score - a.score);
  1689. if (!candidates.length) return { ok: false, value: '', reason: 'no-candidate' };
  1690. const emit = (el) => {
  1691. el.dispatchEvent(new Event('input', { bubbles: true }));
  1692. el.dispatchEvent(new Event('change', { bubbles: true }));
  1693. };
  1694. let lastError = '';
  1695. for (const item of candidates.slice(0, 12)) {
  1696. const el = item.n;
  1697. try {
  1698. el.focus();
  1699. if (item.isTextInput) {
  1700. const tag = String(el.tagName || '').toLowerCase();
  1701. const proto = tag === 'textarea' ? window.HTMLTextAreaElement.prototype : window.HTMLInputElement.prototype;
  1702. const setter = Object.getOwnPropertyDescriptor(proto, 'value')?.set;
  1703. if (setter) {
  1704. setter.call(el, '');
  1705. emit(el);
  1706. setter.call(el, title);
  1707. emit(el);
  1708. } else {
  1709. el.value = '';
  1710. emit(el);
  1711. el.value = title;
  1712. emit(el);
  1713. }
  1714. const v = String(el.value || '').trim();
  1715. if (v) return { ok: true, value: v, score: item.score };
  1716. } else if (item.isEditableBlock) {
  1717. el.textContent = '';
  1718. emit(el);
  1719. el.textContent = title;
  1720. emit(el);
  1721. const v = String(el.innerText || el.textContent || '').trim();
  1722. if (v) return { ok: true, value: v, score: item.score };
  1723. }
  1724. } catch (e) {
  1725. lastError = String(e || '');
  1726. }
  1727. }
  1728. return { ok: false, value: '', reason: lastError || 'set-value-failed' };
  1729. }
  1730. """,
  1731. desired_title
  1732. )
  1733. if deep_result and deep_result.get('ok'):
  1734. deep_written = str(deep_result.get('value') or '').strip()
  1735. if _title_matches_expected(deep_written):
  1736. title_filled = True
  1737. title_verified_value = deep_written
  1738. print(f"[{self.platform_name}] 标题深层DOM填写成功: frame={frame_url}, value={deep_written}")
  1739. break
  1740. elif deep_written:
  1741. title_failure_reason = "deep-dom-mismatch"
  1742. print(f"[{self.platform_name}] 标题深层DOM命中但值不匹配: frame={frame_url}, value={deep_written}")
  1743. except Exception:
  1744. pass
  1745. # JS 兜底写入标题
  1746. if not title_filled and desired_title:
  1747. fallback_reason = ""
  1748. for frame in self.page.frames:
  1749. if title_filled:
  1750. break
  1751. frame_url = frame.url or "about:blank"
  1752. try:
  1753. fallback = await frame.evaluate(
  1754. """
  1755. (title) => {
  1756. const nodes = Array.from(document.querySelectorAll(
  1757. 'input:not([type="file"]):not([type="hidden"]), textarea, [contenteditable="true"], [role="textbox"]'
  1758. ));
  1759. const scored = nodes
  1760. .map((el) => {
  1761. const tag = String(el.tagName || '').toLowerCase();
  1762. const type = String((el.getAttribute('type') || '')).toLowerCase();
  1763. if (tag === 'input' && ['file', 'hidden', 'checkbox', 'radio', 'button', 'submit'].includes(type)) return null;
  1764. if (el.disabled || el.readOnly) return null;
  1765. const style = window.getComputedStyle(el);
  1766. if (style.display === 'none' || style.visibility === 'hidden') return null;
  1767. const rect = el.getBoundingClientRect();
  1768. if (!rect || rect.width < 8 || rect.height < 8) return null;
  1769. const ph = String(el.getAttribute('placeholder') || '');
  1770. const aria = String(el.getAttribute('aria-label') || '');
  1771. const name = String(el.getAttribute('name') || '');
  1772. const id = String(el.getAttribute('id') || '');
  1773. const cls = String(el.className || '');
  1774. const ce = String(el.getAttribute('contenteditable') || '').toLowerCase();
  1775. const role = String(el.getAttribute('role') || '').toLowerCase();
  1776. const maxLen = parseInt(String(el.getAttribute('maxlength') || '0'), 10) || 0;
  1777. const container = el.closest('label, [class*="form"], [class*="item"], [class*="field"], [class*="title"]');
  1778. const ctx = String((container && container.innerText) || '').slice(0, 80);
  1779. let score = 0;
  1780. if (ph.includes('标题')) score += 6;
  1781. if (aria.includes('标题')) score += 5;
  1782. if (/title|标题/i.test(name)) score += 4;
  1783. if (/title|标题/i.test(id)) score += 4;
  1784. if (/title|标题/i.test(cls)) score += 3;
  1785. if (/标题|title/i.test(ctx)) score += 4;
  1786. if (maxLen > 0 && maxLen <= 40) score += 3;
  1787. if (tag === 'input' || tag === 'textarea') score += 1;
  1788. if (ce === 'true' || role === 'textbox') score += 2;
  1789. return { el, score, maxLen };
  1790. })
  1791. .filter(x => x && x.score > 0)
  1792. .sort((a, b) => b.score - a.score);
  1793. // 没有明显标题线索时,回退到短输入框(常见标题长度限制)
  1794. const candidates = scored.length
  1795. ? scored
  1796. : nodes
  1797. .map((el) => {
  1798. const tag = String(el.tagName || '').toLowerCase();
  1799. const type = String((el.getAttribute('type') || '')).toLowerCase();
  1800. if (tag === 'input' && ['file', 'hidden', 'checkbox', 'radio', 'button', 'submit'].includes(type)) return null;
  1801. if (el.disabled || el.readOnly) return null;
  1802. const style = window.getComputedStyle(el);
  1803. if (style.display === 'none' || style.visibility === 'hidden') return null;
  1804. const rect = el.getBoundingClientRect();
  1805. if (!rect || rect.width < 8 || rect.height < 8) return null;
  1806. const maxLen = parseInt(String(el.getAttribute('maxlength') || '0'), 10) || 0;
  1807. const score = (maxLen > 0 && maxLen <= 40 ? 3 : 0) + (tag === 'input' || tag === 'textarea' ? 1 : 0);
  1808. return score > 0 ? { el, score, maxLen } : null;
  1809. })
  1810. .filter(Boolean)
  1811. .sort((a, b) => b.score - a.score);
  1812. if (!candidates.length) return { ok: false, value: '', reason: 'no-scored-input' };
  1813. let lastError = '';
  1814. for (const item of candidates.slice(0, 10)) {
  1815. const target = item.el;
  1816. const tag = String(target.tagName || '').toLowerCase();
  1817. const ce = String(target.getAttribute('contenteditable') || '').toLowerCase();
  1818. const role = String(target.getAttribute('role') || '').toLowerCase();
  1819. const isTextInput = tag === 'input' || tag === 'textarea';
  1820. const isEditableBlock = ce === 'true' || role === 'textbox';
  1821. try {
  1822. target.focus();
  1823. if (isTextInput) {
  1824. target.value = '';
  1825. target.dispatchEvent(new Event('input', { bubbles: true }));
  1826. target.value = title;
  1827. target.dispatchEvent(new Event('input', { bubbles: true }));
  1828. target.dispatchEvent(new Event('change', { bubbles: true }));
  1829. const v = String(target.value || '').trim();
  1830. if (v) return { ok: true, value: v, score: item.score || 0 };
  1831. } else if (isEditableBlock) {
  1832. target.textContent = '';
  1833. target.dispatchEvent(new Event('input', { bubbles: true }));
  1834. target.textContent = title;
  1835. target.dispatchEvent(new Event('input', { bubbles: true }));
  1836. target.dispatchEvent(new Event('change', { bubbles: true }));
  1837. const v = String(target.innerText || target.textContent || '').trim();
  1838. if (v) return { ok: true, value: v, score: item.score || 0 };
  1839. }
  1840. } catch (e) {
  1841. lastError = String(e || '');
  1842. }
  1843. }
  1844. return { ok: false, value: '', reason: lastError || 'set-value-failed' };
  1845. }
  1846. """,
  1847. desired_title
  1848. )
  1849. if fallback and fallback.get('ok'):
  1850. written = str(fallback.get('value') or '').strip()
  1851. if _title_matches_expected(written):
  1852. title_filled = True
  1853. title_verified_value = written
  1854. print(f"[{self.platform_name}] 标题 JS 兜底填写成功: frame={frame_url}, value={written}")
  1855. break
  1856. elif written:
  1857. fallback_reason = f"fallback-value-not-match:{written}"
  1858. title_failure_reason = fallback_reason
  1859. print(f"[{self.platform_name}] 标题 JS 兜底命中疑似错误字段,已忽略: frame={frame_url}, value={written}")
  1860. elif fallback:
  1861. fallback_reason = str(fallback.get('reason') or '')
  1862. if fallback_reason:
  1863. title_failure_reason = fallback_reason
  1864. except Exception as e:
  1865. fallback_reason = str(e)
  1866. if fallback_reason:
  1867. title_failure_reason = fallback_reason
  1868. if not title_filled:
  1869. print(f"[{self.platform_name}] 标题 JS 兜底未命中: reason={fallback_reason or 'unknown'}")
  1870. # 强化重试:标题框可能在上传收尾阶段延迟可编辑,循环尝试写入一段时间
  1871. if not title_filled and desired_title:
  1872. print(f"[{self.platform_name}] 标题常规填写未命中,进入强化重试...")
  1873. # 百家号在上传 80%+ 后可能经历较长静默处理期,给更长窗口等待标题输入框真正可编辑
  1874. strong_retry_deadline = asyncio.get_event_loop().time() + 240
  1875. strong_retry_round = 0
  1876. last_retry_log = 0.0
  1877. while asyncio.get_event_loop().time() < strong_retry_deadline and not title_filled:
  1878. strong_retry_round += 1
  1879. retry_reason = ""
  1880. if strong_retry_round == 1 or strong_retry_round % 5 == 0:
  1881. await _try_enter_publish_form(f"title-retry-{strong_retry_round}")
  1882. for frame in self.page.frames:
  1883. if title_filled:
  1884. break
  1885. frame_url = frame.url or "about:blank"
  1886. try:
  1887. retry_result = await frame.evaluate(
  1888. """
  1889. (title) => {
  1890. const nodes = Array.from(document.querySelectorAll(
  1891. 'input:not([type="file"]):not([type="hidden"]), textarea, [contenteditable="true"], [role="textbox"]'
  1892. ));
  1893. const candidates = nodes
  1894. .map((el) => {
  1895. const tag = String(el.tagName || '').toLowerCase();
  1896. const type = String((el.getAttribute('type') || '')).toLowerCase();
  1897. if (tag === 'input' && ['file', 'hidden', 'checkbox', 'radio', 'button', 'submit'].includes(type)) return null;
  1898. if (el.disabled || el.readOnly) return null;
  1899. const style = window.getComputedStyle(el);
  1900. if (style.display === 'none' || style.visibility === 'hidden') return null;
  1901. const rect = el.getBoundingClientRect();
  1902. if (!rect || rect.width < 8 || rect.height < 8) return null;
  1903. const ph = String(el.getAttribute('placeholder') || '');
  1904. const aria = String(el.getAttribute('aria-label') || '');
  1905. const name = String(el.getAttribute('name') || '');
  1906. const id = String(el.getAttribute('id') || '');
  1907. const cls = String(el.className || '');
  1908. const ce = String(el.getAttribute('contenteditable') || '').toLowerCase();
  1909. const role = String(el.getAttribute('role') || '').toLowerCase();
  1910. const maxLen = parseInt(String(el.getAttribute('maxlength') || '0'), 10) || 0;
  1911. const container = el.closest('label, [class*="form"], [class*="item"], [class*="field"], [class*="title"]');
  1912. const ctx = String((container && container.innerText) || '').slice(0, 80);
  1913. let score = 0;
  1914. if (/标题|title/i.test(ph)) score += 7;
  1915. if (/标题|title/i.test(aria)) score += 6;
  1916. if (/标题|title/i.test(name)) score += 5;
  1917. if (/标题|title/i.test(id)) score += 5;
  1918. if (/title|标题/i.test(cls)) score += 4;
  1919. if (/标题|title/i.test(ctx)) score += 5;
  1920. if (maxLen > 0 && maxLen <= 40) score += 3;
  1921. if (tag === 'input' || tag === 'textarea') score += 2;
  1922. if (ce === 'true' || role === 'textbox') score += 1;
  1923. return { el, score };
  1924. })
  1925. .filter(Boolean)
  1926. .sort((a, b) => b.score - a.score);
  1927. if (!candidates.length) {
  1928. return { ok: false, value: '', score: -1, reason: 'no-candidate' };
  1929. }
  1930. let lastError = '';
  1931. for (const item of candidates.slice(0, 12)) {
  1932. const target = item.el;
  1933. const tag = String(target.tagName || '').toLowerCase();
  1934. const ce = String(target.getAttribute('contenteditable') || '').toLowerCase();
  1935. const role = String(target.getAttribute('role') || '').toLowerCase();
  1936. const isTextInput = tag === 'input' || tag === 'textarea';
  1937. const isEditableBlock = ce === 'true' || role === 'textbox';
  1938. const emit = () => {
  1939. target.dispatchEvent(new Event('input', { bubbles: true }));
  1940. target.dispatchEvent(new Event('change', { bubbles: true }));
  1941. };
  1942. try {
  1943. target.focus();
  1944. if (isTextInput) {
  1945. try {
  1946. const proto = tag === 'textarea' ? window.HTMLTextAreaElement.prototype : window.HTMLInputElement.prototype;
  1947. const setter = Object.getOwnPropertyDescriptor(proto, 'value')?.set;
  1948. if (setter) {
  1949. setter.call(target, '');
  1950. emit();
  1951. setter.call(target, title);
  1952. emit();
  1953. } else {
  1954. target.value = '';
  1955. emit();
  1956. target.value = title;
  1957. emit();
  1958. }
  1959. } catch {
  1960. target.value = title;
  1961. emit();
  1962. }
  1963. const v = String(target.value || '').trim();
  1964. if (v) return { ok: true, value: v, score: item.score || 0, reason: '' };
  1965. } else if (isEditableBlock) {
  1966. target.textContent = '';
  1967. emit();
  1968. target.textContent = title;
  1969. emit();
  1970. const v = String(target.innerText || target.textContent || '').trim();
  1971. if (v) return { ok: true, value: v, score: item.score || 0, reason: '' };
  1972. }
  1973. } catch (e) {
  1974. lastError = String(e || '');
  1975. }
  1976. }
  1977. return { ok: false, value: '', score: -1, reason: lastError || 'set-value-failed' };
  1978. }
  1979. """,
  1980. desired_title
  1981. )
  1982. if retry_result and retry_result.get('ok'):
  1983. written = str(retry_result.get('value') or '').strip()
  1984. score = int(retry_result.get('score') or 0)
  1985. # 强化重试仍要求“像标题”且可匹配,避免误写到其他文本框
  1986. if score >= 3 and _title_matches_expected(written):
  1987. title_filled = True
  1988. title_verified_value = written
  1989. print(f"[{self.platform_name}] 标题强化重试成功: round={strong_retry_round}, frame={frame_url}, score={score}, value={written}")
  1990. break
  1991. elif written:
  1992. retry_reason = f"value-not-match:{written},score={score}"
  1993. elif retry_result:
  1994. retry_reason = str(retry_result.get('reason') or '')
  1995. except Exception as e:
  1996. retry_reason = str(e)
  1997. if title_filled:
  1998. break
  1999. now_retry = asyncio.get_event_loop().time()
  2000. if retry_reason in ("no-candidate", "no-scored-input"):
  2001. has_title_input = await _has_editable_title_input()
  2002. if not has_title_input:
  2003. retry_reason = "no-candidate-and-form-not-ready"
  2004. if now_retry - last_retry_log >= 10:
  2005. print(f"[{self.platform_name}] 标题强化重试中: round={strong_retry_round}, reason={retry_reason or 'pending'}")
  2006. last_retry_log = now_retry
  2007. if retry_reason:
  2008. title_failure_reason = retry_reason
  2009. await asyncio.sleep(3)
  2010. # AI 兜底:页面结构变化时,通过视觉识别返回可用 selector
  2011. if not title_filled and desired_title:
  2012. print(f"[{self.platform_name}] 标题强化重试仍未命中,尝试 AI selector 兜底...")
  2013. try:
  2014. ai_goal = "找到页面中用于填写视频标题的输入框或可编辑区域,返回一个可直接输入标题的 Playwright selector"
  2015. ai_selector = await self.ai_suggest_playwright_selector(ai_goal)
  2016. if ai_selector.get("has_selector"):
  2017. selector = str(ai_selector.get("selector") or "").strip()
  2018. confidence = int(ai_selector.get("confidence") or 0)
  2019. print(f"[{self.platform_name}] AI 标题 selector: {selector}, confidence={confidence}")
  2020. for frame in self.page.frames:
  2021. if title_filled:
  2022. break
  2023. frame_url = frame.url or "about:blank"
  2024. try:
  2025. loc = frame.locator(selector).first
  2026. if await loc.count() <= 0 or not await loc.is_visible():
  2027. continue
  2028. try:
  2029. await loc.click(timeout=2500)
  2030. except Exception:
  2031. pass
  2032. node_tag = ""
  2033. try:
  2034. node_tag = ((await loc.evaluate("el => (el.tagName || '').toLowerCase()")) or "").strip()
  2035. except Exception:
  2036. node_tag = ""
  2037. is_text_input = node_tag in ["input", "textarea"]
  2038. if is_text_input:
  2039. try:
  2040. await loc.fill(desired_title, timeout=5000)
  2041. except Exception:
  2042. await self.page.keyboard.press("Control+KeyA")
  2043. await self.page.keyboard.press("Backspace")
  2044. await self.page.keyboard.type(desired_title)
  2045. else:
  2046. try:
  2047. await self.page.keyboard.press("Control+KeyA")
  2048. await self.page.keyboard.press("Backspace")
  2049. await self.page.keyboard.type(desired_title)
  2050. except Exception:
  2051. await loc.evaluate(
  2052. """
  2053. (el, title) => {
  2054. el.focus();
  2055. const tag = String(el.tagName || '').toLowerCase();
  2056. if (tag === 'input' || tag === 'textarea') {
  2057. el.value = title;
  2058. } else {
  2059. el.textContent = title;
  2060. }
  2061. el.dispatchEvent(new Event('input', { bubbles: true }));
  2062. el.dispatchEvent(new Event('change', { bubbles: true }));
  2063. }
  2064. """,
  2065. desired_title
  2066. )
  2067. await asyncio.sleep(0.3)
  2068. current_value = ""
  2069. try:
  2070. if is_text_input:
  2071. current_value = (await loc.input_value() or "").strip()
  2072. else:
  2073. current_value = ((await loc.evaluate("el => (el.innerText || el.textContent || '')")) or "").strip()
  2074. except Exception:
  2075. current_value = ""
  2076. if _title_matches_expected(current_value):
  2077. title_filled = True
  2078. title_verified_value = current_value
  2079. print(f"[{self.platform_name}] AI selector 标题填写成功: frame={frame_url}, value={current_value}")
  2080. break
  2081. else:
  2082. print(f"[{self.platform_name}] AI selector 命中但值不匹配: frame={frame_url}, value={current_value}")
  2083. except Exception as e:
  2084. print(f"[{self.platform_name}] AI selector 执行失败: frame={frame_url}, err={e}")
  2085. else:
  2086. print(f"[{self.platform_name}] AI 未返回可用标题 selector: {ai_selector.get('notes') or 'no-notes'}")
  2087. title_failure_reason = "ai-no-selector"
  2088. except Exception as e:
  2089. print(f"[{self.platform_name}] AI 标题兜底异常: {e}")
  2090. title_failure_reason = f"ai-exception:{e}"
  2091. if not title_filled:
  2092. # 某些版本页面在上传后长期不暴露可编辑标题框;不中断流程,尝试继续发布。
  2093. if any(k in (title_failure_reason or "") for k in ["no-candidate", "form-not-ready", "title-not-ready", "ai-no-selector"]):
  2094. print(f"[{self.platform_name}] 标题输入框未就绪({title_failure_reason}),继续后续发布流程(使用页面现有标题)")
  2095. else:
  2096. screenshot_base64 = await self.capture_screenshot()
  2097. return PublishResult(
  2098. success=False,
  2099. platform=self.platform_name,
  2100. error=f"标题填写失败,已终止发布: {title_failure_reason or 'unknown'}",
  2101. screenshot_base64=screenshot_base64,
  2102. page_url=await self.get_page_url(),
  2103. status='failed'
  2104. )
  2105. # 填写描述
  2106. if params.description:
  2107. self.report_progress(65, "正在填写描述...")
  2108. try:
  2109. desc_selectors = [
  2110. 'textarea[placeholder*="描述"]',
  2111. 'textarea[placeholder*="简介"]',
  2112. '[class*="desc"] textarea',
  2113. '[class*="description"] textarea',
  2114. ]
  2115. for selector in desc_selectors:
  2116. try:
  2117. desc_input = self.page.locator(selector).first
  2118. if await desc_input.count() > 0 and await desc_input.is_visible():
  2119. await desc_input.click()
  2120. await self.page.keyboard.type(params.description[:200])
  2121. print(f"[{self.platform_name}] 描述填写成功")
  2122. break
  2123. except:
  2124. pass
  2125. except Exception as e:
  2126. print(f"[{self.platform_name}] 描述填写失败: {e}")
  2127. self.report_progress(70, "正在发布...")
  2128. await asyncio.sleep(1.5)
  2129. # 点击发布按钮(等待按钮可点击,避免上传完成后直接误判失败)
  2130. publish_selectors = [
  2131. 'button:has-text("立即发布")',
  2132. '[role="button"]:has-text("立即发布")',
  2133. 'button:has-text("确认发布")',
  2134. '[role="button"]:has-text("确认发布")',
  2135. 'button:has-text("发布")',
  2136. '[role="button"]:has-text("发布")',
  2137. 'button:has-text("发表")',
  2138. 'button:has-text("提交")',
  2139. '[class*="publish"] button',
  2140. '[class*="submit"] button',
  2141. ]
  2142. publish_blocked_keywords = [
  2143. "定时发布",
  2144. "预约发布",
  2145. "存草稿",
  2146. "草稿",
  2147. "取消",
  2148. "返回",
  2149. "预览",
  2150. ]
  2151. publish_processing_indicators = [
  2152. 'div:has-text("发布中")',
  2153. 'div:has-text("提交中")',
  2154. 'span:has-text("发布中")',
  2155. 'span:has-text("提交中")',
  2156. 'div:has-text("正在上传")',
  2157. 'div:has-text("正在处理")',
  2158. 'span:has-text("正在上传")',
  2159. 'span:has-text("正在处理")',
  2160. 'div:has-text("请稍候")',
  2161. 'span:has-text("请稍候")',
  2162. 'div:has-text("审核中")',
  2163. 'span:has-text("审核中")',
  2164. ]
  2165. def _compact_btn_text(text: str) -> str:
  2166. return re.sub(r"\s+", "", str(text or "")).strip()
  2167. def _score_publish_button(btn_text_compact: str, prefer_confirm: bool = False) -> int:
  2168. if not btn_text_compact:
  2169. return -1
  2170. if any(k in btn_text_compact for k in publish_blocked_keywords):
  2171. return -1
  2172. if "发布中" in btn_text_compact or "提交中" in btn_text_compact:
  2173. return -1
  2174. score = -1
  2175. if "立即发布" in btn_text_compact:
  2176. score = 130
  2177. elif btn_text_compact == "确认发布":
  2178. score = 125
  2179. elif "确认发布" in btn_text_compact:
  2180. score = 120
  2181. elif btn_text_compact == "发布":
  2182. score = 115
  2183. elif "发布" in btn_text_compact:
  2184. score = 100
  2185. elif "发表" in btn_text_compact:
  2186. score = 80
  2187. elif "提交" in btn_text_compact:
  2188. score = 70
  2189. if score < 0:
  2190. return -1
  2191. if prefer_confirm and ("确认发布" in btn_text_compact or "立即发布" in btn_text_compact):
  2192. score += 20
  2193. return score
  2194. async def _collect_publish_candidates(prefer_confirm: bool = False):
  2195. candidates = []
  2196. found_visible_button = False
  2197. found_disabled_button = False
  2198. for frame in self.page.frames:
  2199. frame_url = frame.url or "about:blank"
  2200. for selector in publish_selectors:
  2201. try:
  2202. btns = frame.locator(selector)
  2203. btn_count = await btns.count()
  2204. for idx in range(min(btn_count, 6)):
  2205. btn = btns.nth(idx)
  2206. if not await btn.is_visible():
  2207. continue
  2208. found_visible_button = True
  2209. btn_text = (await btn.text_content() or "").strip()
  2210. btn_text_compact = _compact_btn_text(btn_text)
  2211. disabled_attr = await btn.get_attribute('disabled')
  2212. aria_disabled = (await btn.get_attribute('aria-disabled') or '').lower()
  2213. cls = (await btn.get_attribute('class') or '').lower()
  2214. is_disabled = bool(disabled_attr) or aria_disabled == 'true' or 'disabled' in cls
  2215. if is_disabled:
  2216. found_disabled_button = True
  2217. continue
  2218. score = _score_publish_button(btn_text_compact, prefer_confirm=prefer_confirm)
  2219. if score < 0:
  2220. continue
  2221. candidates.append({
  2222. "btn": btn,
  2223. "frame_url": frame_url,
  2224. "selector": selector,
  2225. "idx": idx,
  2226. "text": btn_text,
  2227. "score": score,
  2228. })
  2229. except Exception:
  2230. pass
  2231. candidates.sort(key=lambda x: x.get("score", 0), reverse=True)
  2232. return candidates, found_visible_button, found_disabled_button
  2233. async def _click_publish_candidate(candidate: dict):
  2234. btn = candidate.get("btn")
  2235. if not btn:
  2236. return False, "candidate-empty"
  2237. frame_url = str(candidate.get("frame_url") or "about:blank")
  2238. selector = str(candidate.get("selector") or "")
  2239. idx = int(candidate.get("idx") or 0)
  2240. btn_text = str(candidate.get("text") or "").strip()
  2241. before_url = self.page.url
  2242. try:
  2243. try:
  2244. await btn.scroll_into_view_if_needed(timeout=1500)
  2245. except Exception:
  2246. pass
  2247. try:
  2248. await btn.click(timeout=4000)
  2249. except Exception:
  2250. await btn.click(force=True, timeout=4000)
  2251. await asyncio.sleep(0.6)
  2252. after_url = self.page.url
  2253. state_flags = []
  2254. if after_url != before_url:
  2255. state_flags.append("url-changed")
  2256. try:
  2257. post_text = _compact_btn_text(await btn.text_content() or "")
  2258. if any(k in post_text for k in ["发布中", "提交中", "处理中"]):
  2259. state_flags.append("btn-processing")
  2260. except Exception:
  2261. pass
  2262. try:
  2263. for indicator in publish_processing_indicators:
  2264. loc = self.page.locator(indicator).first
  2265. if await loc.count() > 0 and await loc.is_visible():
  2266. state_flags.append("processing-indicator")
  2267. break
  2268. except Exception:
  2269. pass
  2270. state_desc = ",".join(state_flags) if state_flags else "no-immediate-signal"
  2271. print(f"[{self.platform_name}] 点击发布按钮成功: frame={frame_url}, selector={selector}, idx={idx}, text={btn_text}, state={state_desc}")
  2272. return True, ""
  2273. except Exception as e:
  2274. return False, str(e)
  2275. publish_clicked = False
  2276. publish_click_error = ""
  2277. publish_clicked_text = ""
  2278. click_deadline = asyncio.get_event_loop().time() + 180
  2279. last_publish_log = 0.0
  2280. while asyncio.get_event_loop().time() < click_deadline and not publish_clicked:
  2281. candidates, found_visible_button, found_disabled_button = await _collect_publish_candidates(prefer_confirm=False)
  2282. if candidates:
  2283. for candidate in candidates[:6]:
  2284. ok, err = await _click_publish_candidate(candidate)
  2285. if ok:
  2286. publish_clicked = True
  2287. publish_clicked_text = str(candidate.get("text") or "").strip()
  2288. break
  2289. if err:
  2290. publish_click_error = err
  2291. if publish_clicked:
  2292. break
  2293. now_click = asyncio.get_event_loop().time()
  2294. if now_click - last_publish_log >= 10:
  2295. if found_visible_button and found_disabled_button:
  2296. print(f"[{self.platform_name}] 发布按钮可见但不可点击,等待可用...")
  2297. elif found_visible_button:
  2298. print(f"[{self.platform_name}] 发布按钮可见,但点击失败,继续重试...")
  2299. else:
  2300. print(f"[{self.platform_name}] 尚未找到可见发布按钮,继续等待...")
  2301. last_publish_log = now_click
  2302. await asyncio.sleep(2)
  2303. # 某些页面会二次弹出“确认发布/立即发布”,补一次优先确认点击
  2304. if publish_clicked:
  2305. initial_text = _compact_btn_text(publish_clicked_text)
  2306. if initial_text and initial_text != "立即发布":
  2307. await asyncio.sleep(1)
  2308. confirm_candidates, _, _ = await _collect_publish_candidates(prefer_confirm=True)
  2309. for candidate in confirm_candidates[:4]:
  2310. candidate_text = _compact_btn_text(str(candidate.get("text") or ""))
  2311. if candidate_text == initial_text and ("确认发布" not in candidate_text and "立即发布" not in candidate_text):
  2312. continue
  2313. ok, err = await _click_publish_candidate(candidate)
  2314. if ok:
  2315. print(f"[{self.platform_name}] 检测到二次确认发布流程,已补点确认按钮: {candidate_text}")
  2316. break
  2317. if err:
  2318. publish_click_error = err
  2319. if not publish_clicked:
  2320. screenshot_base64 = await self.capture_screenshot()
  2321. return PublishResult(
  2322. success=False,
  2323. platform=self.platform_name,
  2324. error=f"发布按钮未找到或不可点击(可能仍在处理/必填项未通过)。title={title_verified_value or desired_title}; err={publish_click_error or 'none'}",
  2325. screenshot_base64=screenshot_base64,
  2326. page_url=await self.get_page_url(),
  2327. status='failed'
  2328. )
  2329. self.report_progress(80, "等待发布完成...")
  2330. # 记录点击发布前的 URL
  2331. publish_page_url = self.page.url
  2332. print(f"[{self.platform_name}] 发布前 URL: {publish_page_url}")
  2333. # 等待发布完成(百家号审核/处理链路可能较慢,默认等待 15 分钟)
  2334. publish_timeout = 900
  2335. start_time = asyncio.get_event_loop().time()
  2336. last_url = publish_page_url
  2337. republish_click_count = 0
  2338. republish_attempt_count = 0
  2339. last_republish_attempt_time = 0.0
  2340. republish_attempt_interval = 45 # 失败后至少间隔 45s 再尝试,避免刷屏和误操作
  2341. max_republish_attempts = 2
  2342. while asyncio.get_event_loop().time() - start_time < publish_timeout:
  2343. await asyncio.sleep(3)
  2344. current_url = self.page.url
  2345. # 检测 URL 是否发生变化
  2346. if current_url != last_url:
  2347. print(f"[{self.platform_name}] URL 变化: {last_url} -> {current_url}")
  2348. last_url = current_url
  2349. # 检查是否跳转到内容管理页面(真正的成功标志)
  2350. # 百家号发布成功后会跳转到 /builder/rc/content 页面
  2351. if '/builder/rc/content' in current_url and 'edit' not in current_url:
  2352. self.report_progress(100, "发布成功!")
  2353. print(f"[{self.platform_name}] 发布成功,已跳转到内容管理页: {current_url}")
  2354. screenshot_base64 = await self.capture_screenshot()
  2355. return PublishResult(
  2356. success=True,
  2357. platform=self.platform_name,
  2358. message="发布成功",
  2359. screenshot_base64=screenshot_base64,
  2360. page_url=current_url,
  2361. status='success'
  2362. )
  2363. # 检查是否有明确的成功提示弹窗
  2364. try:
  2365. # 百家号发布成功会显示"发布成功"弹窗
  2366. success_modal = self.page.locator('div:has-text("发布成功"), div:has-text("提交成功"), div:has-text("视频发布成功")').first
  2367. if await success_modal.count() > 0 and await success_modal.is_visible():
  2368. self.report_progress(100, "发布成功!")
  2369. print(f"[{self.platform_name}] 检测到发布成功弹窗")
  2370. screenshot_base64 = await self.capture_screenshot()
  2371. # 等待一下看是否会跳转
  2372. await asyncio.sleep(3)
  2373. return PublishResult(
  2374. success=True,
  2375. platform=self.platform_name,
  2376. message="发布成功",
  2377. screenshot_base64=screenshot_base64,
  2378. page_url=self.page.url,
  2379. status='success'
  2380. )
  2381. except Exception as e:
  2382. print(f"[{self.platform_name}] 检测成功提示异常: {e}")
  2383. # 检查是否有错误提示
  2384. try:
  2385. error_selectors = [
  2386. 'div.error-tip',
  2387. 'div[class*="error-msg"]',
  2388. 'span[class*="error"]',
  2389. 'div:has-text("发布失败")',
  2390. 'div:has-text("提交失败")',
  2391. ]
  2392. for error_selector in error_selectors:
  2393. error_el = self.page.locator(error_selector).first
  2394. if await error_el.count() > 0 and await error_el.is_visible():
  2395. error_text = await error_el.text_content()
  2396. if error_text and error_text.strip():
  2397. print(f"[{self.platform_name}] 检测到错误: {error_text}")
  2398. screenshot_base64 = await self.capture_screenshot()
  2399. return PublishResult(
  2400. success=False,
  2401. platform=self.platform_name,
  2402. error=f"发布失败: {error_text.strip()}",
  2403. screenshot_base64=screenshot_base64,
  2404. page_url=current_url,
  2405. status='failed'
  2406. )
  2407. except Exception as e:
  2408. print(f"[{self.platform_name}] 检测错误提示异常: {e}")
  2409. # 检查验证码
  2410. captcha_result = await self.check_captcha()
  2411. if captcha_result['need_captcha']:
  2412. screenshot_base64 = await self.capture_screenshot()
  2413. return PublishResult(
  2414. success=False,
  2415. platform=self.platform_name,
  2416. error=f"发布过程中需要{captcha_result['captcha_type']}验证码",
  2417. need_captcha=True,
  2418. captcha_type=captcha_result['captcha_type'],
  2419. screenshot_base64=screenshot_base64,
  2420. page_url=current_url,
  2421. status='need_captcha'
  2422. )
  2423. # 检查发布按钮状态(如果还在编辑页面)
  2424. if 'edit' in current_url:
  2425. try:
  2426. is_processing = False
  2427. for indicator in publish_processing_indicators:
  2428. loc = self.page.locator(indicator).first
  2429. if await loc.count() > 0 and await loc.is_visible():
  2430. is_processing = True
  2431. print(f"[{self.platform_name}] 正在处理中...")
  2432. break
  2433. if not is_processing:
  2434. # 如果不是在处理中,按节流策略尝试重新点击发布按钮
  2435. now_loop = asyncio.get_event_loop().time()
  2436. elapsed = now_loop - start_time
  2437. if (
  2438. elapsed > 60
  2439. and republish_attempt_count < max_republish_attempts
  2440. and (now_loop - last_republish_attempt_time) >= republish_attempt_interval
  2441. ):
  2442. last_republish_attempt_time = now_loop
  2443. republish_attempt_count += 1
  2444. print(f"[{self.platform_name}] 发布状态未变化,执行第 {republish_attempt_count}/{max_republish_attempts} 次补点发布...")
  2445. republish_done = False
  2446. republish_candidates, _, _ = await _collect_publish_candidates(prefer_confirm=True)
  2447. for candidate in republish_candidates[:6]:
  2448. ok, err = await _click_publish_candidate(candidate)
  2449. if ok:
  2450. republish_done = True
  2451. republish_click_count += 1
  2452. candidate_text = _compact_btn_text(str(candidate.get("text") or ""))
  2453. print(f"[{self.platform_name}] 重新点击发布按钮成功: text={candidate_text}, count={republish_click_count}")
  2454. break
  2455. if err:
  2456. publish_click_error = err
  2457. if not republish_done:
  2458. print(f"[{self.platform_name}] 本轮未找到可用的立即发布按钮,继续等待状态变化")
  2459. except Exception as e:
  2460. print(f"[{self.platform_name}] 检查处理状态异常: {e}")
  2461. # 超时,获取截图分析最终状态
  2462. print(f"[{self.platform_name}] 发布超时,最终 URL: {self.page.url}")
  2463. screenshot_base64 = await self.capture_screenshot()
  2464. # 最后一次检查是否在内容管理页
  2465. final_url = self.page.url
  2466. if '/builder/rc/content' in final_url and 'edit' not in final_url:
  2467. return PublishResult(
  2468. success=True,
  2469. platform=self.platform_name,
  2470. message="发布成功(延迟确认)",
  2471. screenshot_base64=screenshot_base64,
  2472. page_url=final_url,
  2473. status='success'
  2474. )
  2475. # 超时后兜底:跳转内容管理页按标题校验,避免“已发布但未跳转”误判失败
  2476. print(f"[{self.platform_name}] 超时后执行内容页二次校验,title={params.title}")
  2477. verify_deadline = asyncio.get_event_loop().time() + 120 # 最多再校验 2 分钟
  2478. while asyncio.get_event_loop().time() < verify_deadline:
  2479. if await self._verify_publish_from_content_page(params.title, page_size=20):
  2480. screenshot_base64 = await self.capture_screenshot()
  2481. return PublishResult(
  2482. success=True,
  2483. platform=self.platform_name,
  2484. message="发布成功(内容页校验)",
  2485. screenshot_base64=screenshot_base64,
  2486. page_url=self.page.url,
  2487. status='success'
  2488. )
  2489. await asyncio.sleep(8)
  2490. return PublishResult(
  2491. success=False,
  2492. platform=self.platform_name,
  2493. error="发布超时,请手动检查发布状态",
  2494. screenshot_base64=screenshot_base64,
  2495. page_url=final_url,
  2496. status='need_action'
  2497. )
  2498. async def get_works(self, cookies: str, page: int = 0, page_size: int = 20) -> WorksResult:
  2499. """
  2500. 获取百家号作品列表
  2501. 优先使用内容管理页的接口(pcui/article/lists)。
  2502. 说明:
  2503. - 该接口通常需要自定义请求头 token(JWT),仅靠 Cookie 可能会返回“未登录”
  2504. - 这里使用 Playwright 打开内容页,从 localStorage/sessionStorage/页面脚本中自动提取 token,
  2505. 再在页面上下文中发起 fetch(携带 cookie + token),以提高成功率
  2506. """
  2507. import re
  2508. print(f"\n{'='*60}")
  2509. print(f"[{self.platform_name}] 获取作品列表 (使用 API)")
  2510. print(f"[{self.platform_name}] page={page}, page_size={page_size}")
  2511. print(f"{'='*60}")
  2512. works: List[WorkItem] = []
  2513. total = 0
  2514. has_more = False
  2515. next_page = ""
  2516. try:
  2517. # 解析并设置 cookies(Playwright)
  2518. cookie_list = self.parse_cookies(cookies)
  2519. await self.init_browser()
  2520. await self.set_cookies(cookie_list)
  2521. if not self.page:
  2522. raise Exception("Page not initialized")
  2523. # 先打开内容管理页,确保本页 Referer/会话就绪
  2524. # Node 侧传 page=0,1,...;接口 currentPage 为 1,2,...
  2525. current_page = int(page) + 1
  2526. page_size = int(page_size)
  2527. content_url = (
  2528. "https://baijiahao.baidu.com/builder/rc/content"
  2529. f"?currentPage={current_page}&pageSize={page_size}"
  2530. "&search=&type=&collection=&startDate=&endDate="
  2531. )
  2532. await self.page.goto(content_url, wait_until="domcontentloaded", timeout=60000)
  2533. await asyncio.sleep(2)
  2534. # 1) 提取 token(JWT)
  2535. token = await self.page.evaluate(
  2536. """
  2537. () => {
  2538. const isJwtLike = (v) => {
  2539. if (!v || typeof v !== 'string') return false;
  2540. const s = v.trim();
  2541. if (s.length < 60) return false;
  2542. const parts = s.split('.');
  2543. if (parts.length !== 3) return false;
  2544. return parts.every(p => /^[A-Za-z0-9_-]+$/.test(p) && p.length > 10);
  2545. };
  2546. const pickFromStorage = (storage) => {
  2547. try {
  2548. const keys = Object.keys(storage || {});
  2549. for (const k of keys) {
  2550. const v = storage.getItem(k);
  2551. if (isJwtLike(v)) return v;
  2552. }
  2553. } catch {}
  2554. return "";
  2555. };
  2556. // localStorage / sessionStorage
  2557. let t = pickFromStorage(window.localStorage);
  2558. if (t) return t;
  2559. t = pickFromStorage(window.sessionStorage);
  2560. if (t) return t;
  2561. // meta 标签
  2562. const meta = document.querySelector('meta[name="token"], meta[name="bjh-token"]');
  2563. const metaToken = meta && meta.getAttribute('content');
  2564. if (isJwtLike(metaToken)) return metaToken;
  2565. // 简单从全局变量里找
  2566. const candidates = [
  2567. (window.__INITIAL_STATE__ && window.__INITIAL_STATE__.token) || "",
  2568. (window.__PRELOADED_STATE__ && window.__PRELOADED_STATE__.token) || "",
  2569. (window.__NUXT__ && window.__NUXT__.state && window.__NUXT__.state.token) || "",
  2570. ];
  2571. for (const c of candidates) {
  2572. if (isJwtLike(c)) return c;
  2573. }
  2574. return "";
  2575. }
  2576. """
  2577. )
  2578. # 2) 若仍未取到 token,再从页面 HTML 兜底提取
  2579. if not token:
  2580. html = await self.page.content()
  2581. m = re.search(r'([A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,})', html)
  2582. if m:
  2583. token = m.group(1)
  2584. if not token:
  2585. raise Exception("未能从页面提取 token(可能未登录或触发风控),请重新登录百家号账号后再试")
  2586. # 3) 调用接口(在页面上下文 fetch,自动携带 cookie)
  2587. api_url = (
  2588. "https://baijiahao.baidu.com/pcui/article/lists"
  2589. f"?currentPage={current_page}"
  2590. f"&pageSize={page_size}"
  2591. "&search=&type=&collection=&startDate=&endDate="
  2592. "&clearBeforeFetch=false"
  2593. "&dynamic=1"
  2594. )
  2595. resp = await self.page.evaluate(
  2596. """
  2597. async ({ url, token }) => {
  2598. const r = await fetch(url, {
  2599. method: 'GET',
  2600. credentials: 'include',
  2601. headers: {
  2602. 'accept': 'application/json, text/plain, */*',
  2603. ...(token ? { token } : {}),
  2604. },
  2605. });
  2606. const text = await r.text();
  2607. return { ok: r.ok, status: r.status, text };
  2608. }
  2609. """,
  2610. {"url": api_url, "token": token},
  2611. )
  2612. if not resp or not resp.get("ok"):
  2613. status = resp.get("status") if isinstance(resp, dict) else "unknown"
  2614. raise Exception(f"百家号接口请求失败: HTTP {status}")
  2615. api_result = json.loads(resp.get("text") or "{}")
  2616. print(f"[{self.platform_name}] pcui/article/lists 响应: errno={api_result.get('errno')}, errmsg={api_result.get('errmsg')}")
  2617. if api_result.get("errno") != 0:
  2618. errno = api_result.get("errno")
  2619. errmsg = api_result.get("errmsg", "unknown error")
  2620. # 20040001 常见为“未登录”
  2621. if errno in (110, 20040001):
  2622. raise Exception("百家号未登录或 Cookie/token 失效,请重新登录后再同步")
  2623. raise Exception(f"百家号接口错误: errno={errno}, errmsg={errmsg}")
  2624. data = api_result.get("data", {}) or {}
  2625. items = data.get("list", []) or []
  2626. page_info = data.get("page", {}) or {}
  2627. total = int(page_info.get("totalCount", 0) or 0)
  2628. total_page = int(page_info.get("totalPage", 0) or 0)
  2629. cur_page = int(page_info.get("currentPage", current_page) or current_page)
  2630. has_more = bool(total_page and cur_page < total_page)
  2631. next_page = cur_page + 1 if has_more else ""
  2632. print(f"[{self.platform_name}] 获取到 {len(items)} 个作品,总数: {total}, currentPage={cur_page}, totalPage={total_page}")
  2633. def _pick_cover(item: dict) -> str:
  2634. cover = item.get("crosswise_cover") or item.get("vertical_cover") or ""
  2635. if cover:
  2636. return cover
  2637. raw = item.get("cover_images") or ""
  2638. try:
  2639. # cover_images 可能是 JSON 字符串
  2640. parsed = json.loads(raw) if isinstance(raw, str) else raw
  2641. if isinstance(parsed, list) and parsed:
  2642. first = parsed[0]
  2643. if isinstance(first, dict):
  2644. return first.get("src") or first.get("ori_src") or ""
  2645. if isinstance(first, str):
  2646. return first
  2647. except Exception:
  2648. pass
  2649. return ""
  2650. def _pick_duration(item: dict) -> int:
  2651. for k in ("rmb_duration", "duration", "long"):
  2652. try:
  2653. v = int(item.get(k) or 0)
  2654. if v > 0:
  2655. return v
  2656. except Exception:
  2657. pass
  2658. # displaytype_exinfo 里可能有 ugcvideo.video_info.durationInSecond
  2659. ex = item.get("displaytype_exinfo") or ""
  2660. try:
  2661. exj = json.loads(ex) if isinstance(ex, str) and ex else (ex if isinstance(ex, dict) else {})
  2662. ugc = (exj.get("ugcvideo") or {}) if isinstance(exj, dict) else {}
  2663. vi = ugc.get("video_info") or {}
  2664. v = int(vi.get("durationInSecond") or ugc.get("long") or 0)
  2665. return v if v > 0 else 0
  2666. except Exception:
  2667. return 0
  2668. def _pick_status(item: dict) -> str:
  2669. qs = str(item.get("quality_status") or "").lower()
  2670. st = str(item.get("status") or "").lower()
  2671. if qs == "rejected" or "reject" in st:
  2672. return "rejected"
  2673. if st in ("draft", "unpublish", "unpublished"):
  2674. return "draft"
  2675. # 百家号常见 publish
  2676. return "published"
  2677. for item in items:
  2678. # 优先使用 nid(builder 预览链接使用这个)
  2679. work_id = str(item.get("nid") or item.get("feed_id") or item.get("article_id") or item.get("id") or "")
  2680. if not work_id:
  2681. continue
  2682. works.append(
  2683. WorkItem(
  2684. work_id=work_id,
  2685. title=str(item.get("title") or ""),
  2686. cover_url=_pick_cover(item),
  2687. video_url=str(item.get("url") or ""),
  2688. duration=_pick_duration(item),
  2689. status=_pick_status(item),
  2690. publish_time=str(item.get("publish_time") or item.get("publish_at") or item.get("created_at") or ""),
  2691. play_count=int(item.get("read_amount") or 0),
  2692. like_count=int(item.get("like_amount") or 0),
  2693. comment_count=int(item.get("comment_amount") or 0),
  2694. share_count=int(item.get("share_amount") or 0),
  2695. collect_count=int(item.get("collection_amount") or 0),
  2696. )
  2697. )
  2698. print(f"[{self.platform_name}] ✓ 成功解析 {len(works)} 个作品")
  2699. except Exception as e:
  2700. import traceback
  2701. traceback.print_exc()
  2702. return WorksResult(
  2703. success=False,
  2704. platform=self.platform_name,
  2705. error=str(e),
  2706. debug_info="baijiahao_get_works_failed"
  2707. )
  2708. return WorksResult(
  2709. success=True,
  2710. platform=self.platform_name,
  2711. works=works,
  2712. total=total,
  2713. has_more=has_more,
  2714. next_page=next_page
  2715. )
  2716. async def get_article_stats(
  2717. self,
  2718. cookies: str,
  2719. start_day: str,
  2720. end_day: str,
  2721. stat_type: str,
  2722. num: int,
  2723. count: int,
  2724. ) -> dict:
  2725. """
  2726. 调用百家号 /author/eco/statistics/articleListStatistic 接口(不依赖浏览器 token),用于作品列表维度的每日数据。
  2727. """
  2728. import aiohttp
  2729. print(f"[{self.platform_name}] get_article_stats: {start_day}-{end_day}, type={stat_type}, num={num}, count={count}")
  2730. # 解析 cookies
  2731. cookie_list = self.parse_cookies(cookies)
  2732. cookie_dict = {c['name']: c['value'] for c in cookie_list}
  2733. session_headers = {
  2734. 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
  2735. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
  2736. 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
  2737. 'Accept-Encoding': 'gzip, deflate, br',
  2738. 'Connection': 'keep-alive',
  2739. 'Upgrade-Insecure-Requests': '1',
  2740. }
  2741. headers = {
  2742. 'Accept': 'application/json, text/plain, */*',
  2743. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
  2744. 'Referer': 'https://baijiahao.baidu.com/builder/rc/analysiscontent/single',
  2745. 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
  2746. 'Accept-Encoding': 'gzip, deflate, br',
  2747. 'Connection': 'keep-alive',
  2748. }
  2749. async with aiohttp.ClientSession(cookies=cookie_dict) as session:
  2750. # 0) 先访问 single 页面建立会话上下文(与 Node 端 UI 打开的页面一致)
  2751. try:
  2752. await session.get(
  2753. 'https://baijiahao.baidu.com/builder/rc/analysiscontent/single',
  2754. headers=session_headers,
  2755. timeout=aiohttp.ClientTimeout(total=20),
  2756. )
  2757. except Exception as e:
  2758. print(f"[{self.platform_name}] warmup single page failed (non-fatal): {e}")
  2759. # 1) 调用 articleListStatistic
  2760. api_url = (
  2761. "https://baijiahao.baidu.com/author/eco/statistics/articleListStatistic"
  2762. f"?start_day={start_day}&end_day={end_day}&type={stat_type}&num={num}&count={count}"
  2763. )
  2764. async with session.get(
  2765. api_url,
  2766. headers=headers,
  2767. timeout=aiohttp.ClientTimeout(total=30),
  2768. ) as resp:
  2769. status = resp.status
  2770. try:
  2771. data = await resp.json()
  2772. except Exception:
  2773. text = await resp.text()
  2774. print(f"[{self.platform_name}] articleListStatistic non-JSON response: {text[:1000]}")
  2775. raise
  2776. errno = data.get('errno')
  2777. errmsg = data.get('errmsg')
  2778. print(f"[{self.platform_name}] articleListStatistic: http={status}, errno={errno}, msg={errmsg}")
  2779. return {
  2780. "success": status == 200 and errno == 0,
  2781. "status": status,
  2782. "errno": errno,
  2783. "errmsg": errmsg,
  2784. "data": data.get('data') if isinstance(data, dict) else None,
  2785. }
  2786. async def get_trend_data(
  2787. self,
  2788. cookies: str,
  2789. nid: str,
  2790. ) -> dict:
  2791. """
  2792. 调用百家号 /author/eco/statistic/gettrenddata 接口,获取单作品的按日统计数据(basic_list)。
  2793. """
  2794. import aiohttp
  2795. print(f"[{self.platform_name}] get_trend_data: nid={nid}")
  2796. cookie_list = self.parse_cookies(cookies)
  2797. cookie_dict = {c['name']: c['value'] for c in cookie_list}
  2798. session_headers = {
  2799. 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
  2800. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
  2801. 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
  2802. 'Accept-Encoding': 'gzip, deflate, br',
  2803. 'Connection': 'keep-alive',
  2804. 'Upgrade-Insecure-Requests': '1',
  2805. }
  2806. headers = {
  2807. 'Accept': 'application/json, text/plain, */*',
  2808. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
  2809. 'Referer': 'https://baijiahao.baidu.com/builder/rc/analysiscontent/single',
  2810. 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
  2811. 'Accept-Encoding': 'gzip, deflate, br',
  2812. 'Connection': 'keep-alive',
  2813. }
  2814. async with aiohttp.ClientSession(cookies=cookie_dict) as session:
  2815. # 0) warmup
  2816. try:
  2817. await session.get(
  2818. 'https://baijiahao.baidu.com/builder/rc/analysiscontent/single',
  2819. headers=session_headers,
  2820. timeout=aiohttp.ClientTimeout(total=20),
  2821. )
  2822. except Exception as e:
  2823. print(f"[{self.platform_name}] warmup single page (trend) failed (non-fatal): {e}")
  2824. api_url = (
  2825. "https://baijiahao.baidu.com/author/eco/statistic/gettrenddata"
  2826. f"?nid={nid}&trend_type=all&data_type=addition"
  2827. )
  2828. async with session.get(
  2829. api_url,
  2830. headers=headers,
  2831. timeout=aiohttp.ClientTimeout(total=30),
  2832. ) as resp:
  2833. status = resp.status
  2834. try:
  2835. data = await resp.json()
  2836. except Exception:
  2837. text = await resp.text()
  2838. print(f"[{self.platform_name}] gettrenddata non-JSON response: {text[:1000]}")
  2839. raise
  2840. errno = data.get('errno')
  2841. errmsg = data.get('errmsg')
  2842. print(f"[{self.platform_name}] gettrenddata: http={status}, errno={errno}, msg={errmsg}")
  2843. return {
  2844. "success": status == 200 and errno == 0,
  2845. "status": status,
  2846. "errno": errno,
  2847. "errmsg": errmsg,
  2848. "data": data.get('data') if isinstance(data, dict) else None,
  2849. }
  2850. async def check_login_status(self, cookies: str) -> dict:
  2851. """
  2852. 检查百家号 Cookie 登录状态
  2853. 现在与其他平台保持一致,直接复用 BasePublisher 的浏览器检测逻辑:
  2854. - 使用 Playwright 打开后台页面
  2855. - 根据是否跳转到登录页 / 是否出现登录弹窗或风控提示,判断登录是否有效
  2856. """
  2857. print(f"[{self.platform_name}] 检查登录状态 (使用通用浏览器逻辑)")
  2858. # 直接调用父类的实现,保持与抖音/小红书/视频号一致
  2859. return await super().check_login_status(cookies)
  2860. async def get_comments(self, cookies: str, work_id: str, cursor: str = "") -> CommentsResult:
  2861. """获取百家号作品评论"""
  2862. # TODO: 实现评论获取逻辑
  2863. return CommentsResult(
  2864. success=False,
  2865. platform=self.platform_name,
  2866. work_id=work_id,
  2867. error="百家号评论功能暂未实现"
  2868. )