base.py 51 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537
  1. # -*- coding: utf-8 -*-
  2. """
  3. 平台发布基类
  4. 提供通用的发布接口和工具方法
  5. """
  6. import asyncio
  7. import json
  8. import os
  9. import uuid
  10. import random
  11. from abc import ABC, abstractmethod
  12. from dataclasses import dataclass, field
  13. from datetime import datetime
  14. from typing import List, Optional, Callable, Dict, Any
  15. from playwright.async_api import async_playwright, Browser, BrowserContext, Page
  16. # 导入反检测工具
  17. try:
  18. from utils.stealth import (
  19. get_browser_context_args,
  20. get_all_stealth_scripts,
  21. get_user_agent,
  22. get_viewport,
  23. )
  24. STEALTH_AVAILABLE = True
  25. except ImportError:
  26. STEALTH_AVAILABLE = False
  27. print("[Warning] Stealth utils not available, anti-detection disabled")
  28. @dataclass
  29. class PublishParams:
  30. """发布参数"""
  31. title: str
  32. video_path: str
  33. description: str = ""
  34. cover_path: Optional[str] = None
  35. tags: List[str] = field(default_factory=list)
  36. publish_date: Optional[datetime] = None
  37. location: str = "重庆市"
  38. def __post_init__(self):
  39. if not self.description:
  40. self.description = self.title
  41. @dataclass
  42. class PublishResult:
  43. """发布结果"""
  44. success: bool
  45. platform: str
  46. video_id: str = ""
  47. video_url: str = ""
  48. message: str = ""
  49. error: str = ""
  50. need_captcha: bool = False
  51. captcha_type: str = ""
  52. screenshot_base64: str = ""
  53. page_url: str = ""
  54. status: str = ""
  55. screenshot_path: str = ""
  56. @dataclass
  57. class WorkItem:
  58. """作品数据"""
  59. work_id: str
  60. title: str
  61. cover_url: str = ""
  62. video_url: str = ""
  63. duration: int = 0 # 秒
  64. status: str = "published" # published, reviewing, rejected, draft
  65. publish_time: str = ""
  66. play_count: int = 0
  67. like_count: int = 0
  68. comment_count: int = 0
  69. share_count: int = 0
  70. collect_count: int = 0
  71. def to_dict(self) -> Dict[str, Any]:
  72. return {
  73. "work_id": self.work_id,
  74. "title": self.title,
  75. "cover_url": self.cover_url,
  76. "video_url": self.video_url,
  77. "duration": self.duration,
  78. "status": self.status,
  79. "publish_time": self.publish_time,
  80. "play_count": self.play_count,
  81. "like_count": self.like_count,
  82. "comment_count": self.comment_count,
  83. "share_count": self.share_count,
  84. "collect_count": self.collect_count,
  85. }
  86. @dataclass
  87. class CommentItem:
  88. """评论数据"""
  89. comment_id: str
  90. parent_comment_id: str
  91. work_id: str
  92. content: str
  93. author_id: str = ""
  94. author_name: str = ""
  95. author_avatar: str = ""
  96. like_count: int = 0
  97. reply_count: int = 0
  98. create_time: str = ""
  99. is_author: bool = False # 是否是作者的评论
  100. replies: List["CommentItem"] = field(default_factory=list)
  101. def to_dict(self) -> Dict[str, Any]:
  102. return {
  103. "comment_id": self.comment_id,
  104. "parent_comment_id": self.parent_comment_id,
  105. "work_id": self.work_id,
  106. "content": self.content,
  107. "author_id": self.author_id,
  108. "author_name": self.author_name,
  109. "author_avatar": self.author_avatar,
  110. "like_count": self.like_count,
  111. "reply_count": self.reply_count,
  112. "create_time": self.create_time,
  113. "is_author": self.is_author,
  114. "replies": [r.to_dict() for r in self.replies],
  115. }
  116. @dataclass
  117. class WorksResult:
  118. """作品列表结果"""
  119. success: bool
  120. platform: str
  121. works: List[WorkItem] = field(default_factory=list)
  122. total: int = 0
  123. has_more: bool = False
  124. next_page: Any = ""
  125. error: str = ""
  126. debug_info: str = "" # 调试信息
  127. def to_dict(self) -> Dict[str, Any]:
  128. return {
  129. "success": self.success,
  130. "platform": self.platform,
  131. "works": [w.to_dict() for w in self.works],
  132. "total": self.total,
  133. "has_more": self.has_more,
  134. "next_page": self.next_page,
  135. "error": self.error,
  136. "debug_info": self.debug_info,
  137. }
  138. @dataclass
  139. class CommentsResult:
  140. """评论列表结果"""
  141. success: bool
  142. platform: str
  143. work_id: str
  144. comments: List[CommentItem] = field(default_factory=list)
  145. total: int = 0
  146. has_more: bool = False
  147. error: str = ""
  148. def to_dict(self) -> Dict[str, Any]:
  149. return {
  150. "success": self.success,
  151. "platform": self.platform,
  152. "work_id": self.work_id,
  153. "comments": [c.to_dict() for c in self.comments],
  154. "total": self.total,
  155. "has_more": self.has_more,
  156. "error": self.error,
  157. }
  158. class BasePublisher(ABC):
  159. """
  160. 平台发布基类
  161. 所有平台发布器都需要继承此类
  162. """
  163. platform_name: str = "base"
  164. login_url: str = ""
  165. publish_url: str = ""
  166. cookie_domain: str = ""
  167. def __init__(self, headless: bool = True):
  168. self.headless = headless
  169. self.browser: Optional[Browser] = None
  170. self.context: Optional[BrowserContext] = None
  171. self.page: Optional[Page] = None
  172. self.playwright = None # Playwright server instance, must be stopped in close_browser()
  173. self.on_progress: Optional[Callable[[int, str], None]] = None
  174. self.user_id: Optional[int] = None
  175. self.publish_task_id: Optional[int] = None
  176. self.publish_account_id: Optional[int] = None
  177. self.proxy_config: Optional[Dict[str, Any]] = None
  178. def set_progress_callback(self, callback: Callable[[int, str], None]):
  179. """设置进度回调"""
  180. self.on_progress = callback
  181. def report_progress(self, progress: int, message: str):
  182. """报告进度"""
  183. print(f"[{self.platform_name}] [{progress}%] {message}")
  184. if self.on_progress:
  185. self.on_progress(progress, message)
  186. @staticmethod
  187. def parse_cookies(cookies_str: str) -> list:
  188. """解析 cookie 字符串为列表"""
  189. try:
  190. cookies = json.loads(cookies_str)
  191. if isinstance(cookies, list):
  192. return cookies
  193. except json.JSONDecodeError:
  194. pass
  195. # 字符串格式: name=value; name2=value2
  196. cookies = []
  197. for item in cookies_str.split(";"):
  198. item = item.strip()
  199. if "=" in item:
  200. name, value = item.split("=", 1)
  201. cookies.append(
  202. {
  203. "name": name.strip(),
  204. "value": value.strip(),
  205. "domain": "",
  206. "path": "/",
  207. }
  208. )
  209. return cookies
  210. @staticmethod
  211. def _normalize_same_site(value: Any) -> Optional[str]:
  212. """将不同来源的 sameSite 值转换为 Playwright 接受的值。"""
  213. if value is None:
  214. return None
  215. v = str(value).strip().lower()
  216. if not v:
  217. return None
  218. mapping = {
  219. "strict": "Strict",
  220. "lax": "Lax",
  221. "none": "None",
  222. "no_restriction": "None", # Electron
  223. "unspecified": None, # Electron: 不传该字段
  224. }
  225. return mapping.get(v, None)
  226. @classmethod
  227. def _sanitize_cookie_for_playwright(
  228. cls, cookie: Dict[str, Any], default_domain: str
  229. ) -> Optional[Dict[str, Any]]:
  230. """清洗 cookie 字段,避免 BrowserContext.add_cookies 参数校验失败。"""
  231. if not isinstance(cookie, dict):
  232. return None
  233. name = str(cookie.get("name") or "").strip()
  234. if not name:
  235. return None
  236. cleaned: Dict[str, Any] = {
  237. "name": name,
  238. "value": str(cookie.get("value") or ""),
  239. }
  240. url = str(cookie.get("url") or "").strip()
  241. if url:
  242. cleaned["url"] = url
  243. else:
  244. domain = str(cookie.get("domain") or "").strip() or default_domain
  245. if not domain:
  246. return None
  247. cleaned["domain"] = domain
  248. cleaned["path"] = str(cookie.get("path") or "/")
  249. if "httpOnly" in cookie:
  250. cleaned["httpOnly"] = bool(cookie.get("httpOnly"))
  251. if "secure" in cookie:
  252. cleaned["secure"] = bool(cookie.get("secure"))
  253. expires_raw = cookie.get("expires", cookie.get("expirationDate"))
  254. if expires_raw not in (None, "", 0):
  255. try:
  256. expires_val = float(expires_raw)
  257. if expires_val > 0:
  258. cleaned["expires"] = expires_val
  259. except Exception:
  260. pass
  261. same_site = cls._normalize_same_site(
  262. cookie.get("sameSite", cookie.get("same_site"))
  263. )
  264. if same_site:
  265. cleaned["sameSite"] = same_site
  266. return cleaned
  267. @staticmethod
  268. def cookies_to_string(cookies: list) -> str:
  269. """将 cookie 列表转换为字符串"""
  270. return "; ".join([f"{c['name']}={c['value']}" for c in cookies])
  271. async def init_browser(
  272. self, storage_state: str = None, proxy_config: Dict[str, Any] = None
  273. ):
  274. """初始化浏览器(带反检测增强)"""
  275. print(
  276. f"[{self.platform_name}] init_browser: headless={self.headless}", flush=True
  277. )
  278. self.playwright = await async_playwright().start()
  279. proxy = proxy_config or self.proxy_config
  280. has_proxy = proxy and isinstance(proxy, dict) and proxy.get("server")
  281. if has_proxy:
  282. print(f"[{self.platform_name}] 使用代理: {proxy.get('server')}", flush=True)
  283. # 浏览器启动参数 — 尽量模拟真实用户浏览器
  284. launch_args = {
  285. "headless": self.headless,
  286. "args": [
  287. "--disable-blink-features=AutomationControlled",
  288. "--no-sandbox",
  289. "--disable-dev-shm-usage",
  290. "--disable-infobars",
  291. "--window-size=1920,1080",
  292. ],
  293. }
  294. if has_proxy:
  295. launch_args["proxy"] = proxy
  296. # 优先使用系统 Chrome(与用户手动操作一致,不容易被反爬检测)
  297. # 回退到 Playwright 自带 Chromium
  298. chrome_launched = False
  299. for channel in ("chrome", "msedge"):
  300. try:
  301. self.browser = await self.playwright.chromium.launch(
  302. channel=channel, **launch_args
  303. )
  304. print(
  305. f"[{self.platform_name}] 使用系统浏览器: {channel}",
  306. flush=True,
  307. )
  308. chrome_launched = True
  309. break
  310. except Exception as e:
  311. print(
  312. f"[{self.platform_name}] 系统 {channel} 不可用: {e}",
  313. flush=True,
  314. )
  315. if not chrome_launched:
  316. print(
  317. f"[{self.platform_name}] 回退到 Playwright Chromium(注意: 更容易被平台检测)",
  318. flush=True,
  319. )
  320. self.browser = await self.playwright.chromium.launch(**launch_args)
  321. # 生成浏览器上下文参数(带反检测配置)
  322. if STEALTH_AVAILABLE:
  323. context_args = get_browser_context_args(
  324. proxy_config=proxy if has_proxy else None
  325. )
  326. # 确保不使用代理参数(代理在 launch 时已设置)
  327. if "proxy" in context_args:
  328. del context_args["proxy"]
  329. else:
  330. context_args = {
  331. "user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
  332. "viewport": {"width": 1920, "height": 1080},
  333. "locale": "zh-CN",
  334. "timezone_id": "Asia/Shanghai",
  335. }
  336. if storage_state and os.path.exists(storage_state):
  337. context_args["storage_state"] = storage_state
  338. self.context = await self.browser.new_context(**context_args)
  339. # 注入反检测脚本
  340. if STEALTH_AVAILABLE:
  341. stealth_script = get_all_stealth_scripts()
  342. await self.context.add_init_script(stealth_script)
  343. print(f"[{self.platform_name}] 已注入反检测脚本", flush=True)
  344. self.page = await self.context.new_page()
  345. # 增加默认超时时间(等同 Selenium implicitlyWait),避免有头浏览器操作时过早超时自动关闭
  346. await self.page.set_default_timeout(60000) # 60 秒
  347. # 设置额外的页面属性
  348. await self.page.set_extra_http_headers(
  349. {
  350. "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
  351. "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
  352. "Accept-Encoding": "gzip, deflate, br",
  353. "Connection": "keep-alive",
  354. "Upgrade-Insecure-Requests": "1",
  355. "Sec-Fetch-Dest": "document",
  356. "Sec-Fetch-Mode": "navigate",
  357. "Sec-Fetch-Site": "none",
  358. "Sec-Fetch-User": "?1",
  359. "Cache-Control": "max-age=0",
  360. }
  361. )
  362. print(
  363. f"[{self.platform_name}] 浏览器初始化完成 (stealth={'enabled' if STEALTH_AVAILABLE else 'disabled'})",
  364. flush=True,
  365. )
  366. return self.page
  367. async def set_cookies(self, cookies: list):
  368. """设置 cookies"""
  369. if not self.context:
  370. raise Exception("Browser context not initialized")
  371. sanitized: List[Dict[str, Any]] = []
  372. for cookie in cookies:
  373. cleaned = self._sanitize_cookie_for_playwright(cookie, self.cookie_domain)
  374. if cleaned:
  375. sanitized.append(cleaned)
  376. if not sanitized:
  377. raise Exception("没有可用的 Cookie(清洗后为空)")
  378. await self.context.add_cookies(sanitized)
  379. async def close_browser(self):
  380. """关闭浏览器"""
  381. if self.page:
  382. try:
  383. await self.page.close()
  384. except Exception:
  385. pass
  386. self.page = None
  387. if self.context:
  388. try:
  389. await self.context.close()
  390. except Exception:
  391. pass
  392. self.context = None
  393. if self.browser:
  394. try:
  395. await self.browser.close()
  396. except Exception:
  397. pass
  398. self.browser = None
  399. if getattr(self, 'playwright', None):
  400. try:
  401. await self.playwright.stop()
  402. except Exception:
  403. pass
  404. self.playwright = None
  405. async def human_like_delay(self, min_ms: int = 100, max_ms: int = 500):
  406. """模拟人类操作延迟"""
  407. delay = random.randint(min_ms, max_ms) / 1000.0
  408. await asyncio.sleep(delay)
  409. async def human_like_scroll(self, distance: int = None):
  410. """模拟人类滚动"""
  411. if distance is None:
  412. distance = random.randint(200, 500)
  413. # 分多次滚动,模拟真实行为
  414. steps = random.randint(3, 6)
  415. step_distance = distance // steps
  416. for _ in range(steps):
  417. if self.page:
  418. await self.page.evaluate(f"window.scrollBy(0, {step_distance})")
  419. await self.human_like_delay(100, 300)
  420. async def human_like_type(self, selector: str, text: str, clear_first: bool = True):
  421. """模拟人类输入"""
  422. if not self.page:
  423. return
  424. element = self.page.locator(selector)
  425. if await element.count() == 0:
  426. return
  427. await element.click()
  428. if clear_first:
  429. await self.page.keyboard.press("Control+KeyA")
  430. await self.page.keyboard.press("Backspace")
  431. await self.human_like_delay(50, 150)
  432. for char in text:
  433. delay = random.randint(30, 100) / 1000.0
  434. await self.page.keyboard.type(char)
  435. await asyncio.sleep(delay)
  436. async def random_mouse_move(self):
  437. """随机移动鼠标(增加真实感)"""
  438. if not self.page:
  439. return
  440. try:
  441. # 随机移动到页面某处
  442. x = random.randint(100, 1800)
  443. y = random.randint(100, 900)
  444. await self.page.mouse.move(x, y)
  445. await self.human_like_delay(50, 200)
  446. except Exception:
  447. pass
  448. async def save_cookies(self, file_path: str):
  449. """保存 cookies 到文件"""
  450. if self.context:
  451. await self.context.storage_state(path=file_path)
  452. async def capture_screenshot(self) -> str:
  453. """截取当前页面截图,返回 Base64 编码"""
  454. import base64
  455. if not self.page:
  456. return ""
  457. try:
  458. screenshot_bytes = await self.page.screenshot(type="jpeg", quality=80)
  459. return base64.b64encode(screenshot_bytes).decode("utf-8")
  460. except Exception as e:
  461. print(f"[{self.platform_name}] 截图失败: {e}")
  462. return ""
  463. async def save_screenshot_to_file(
  464. self, directory: str = None, filename_prefix: str = "publish_failed"
  465. ) -> str:
  466. """
  467. 保存截图到指定目录,返回文件路径
  468. Args:
  469. directory: 截图保存目录,默认为 server/python/screenshots
  470. filename_prefix: 文件名前缀
  471. Returns:
  472. str: 保存的文件路径,失败返回空字符串
  473. """
  474. if not self.page:
  475. return ""
  476. try:
  477. if directory is None:
  478. current_dir = os.path.dirname(
  479. os.path.dirname(os.path.abspath(__file__))
  480. )
  481. directory = os.path.join(current_dir, "screenshots")
  482. directory = os.path.abspath(directory)
  483. os.makedirs(directory, exist_ok=True)
  484. timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
  485. task_info = f"_task{self.publish_task_id}" if self.publish_task_id else ""
  486. account_info = (
  487. f"_acc{self.publish_account_id}" if self.publish_account_id else ""
  488. )
  489. filename = f"{filename_prefix}_{self.platform_name}{task_info}{account_info}_{timestamp}.png"
  490. filepath = os.path.join(directory, filename)
  491. await self.page.screenshot(path=filepath, type="png")
  492. print(f"[{self.platform_name}] 截图已保存: {filepath}")
  493. return filepath
  494. except Exception as e:
  495. print(f"[{self.platform_name}] 保存截图失败: {e}")
  496. return ""
  497. async def request_sms_code_from_frontend(
  498. self, phone: str = "", timeout_seconds: int = 120, message: str = ""
  499. ) -> str:
  500. node_api_url = os.environ.get("NODEJS_API_URL", "http://localhost:3000").rstrip(
  501. "/"
  502. )
  503. internal_api_key = os.environ.get(
  504. "INTERNAL_API_KEY", "internal-api-key-default"
  505. )
  506. if not self.user_id:
  507. raise Exception("缺少 user_id,无法请求前端输入验证码")
  508. captcha_task_id = f"py_{self.platform_name}_{uuid.uuid4().hex}"
  509. payload = {
  510. "user_id": self.user_id,
  511. "captcha_task_id": captcha_task_id,
  512. "type": "sms",
  513. "phone": phone or "",
  514. "message": message or "请输入短信验证码",
  515. "timeout_seconds": timeout_seconds,
  516. "publish_task_id": self.publish_task_id,
  517. "publish_account_id": self.publish_account_id,
  518. }
  519. import requests
  520. try:
  521. resp = requests.post(
  522. f"{node_api_url}/api/internal/captcha/request",
  523. headers={
  524. "Content-Type": "application/json",
  525. "X-Internal-API-Key": internal_api_key,
  526. },
  527. json=payload,
  528. timeout=timeout_seconds + 30,
  529. )
  530. except Exception as e:
  531. raise Exception(f"请求前端验证码失败: {e}")
  532. try:
  533. data = resp.json()
  534. except Exception:
  535. raise Exception(f"请求前端验证码失败: HTTP {resp.status_code}")
  536. if resp.status_code >= 400 or not data.get("success"):
  537. raise Exception(
  538. data.get("error")
  539. or data.get("message")
  540. or f"请求前端验证码失败: HTTP {resp.status_code}"
  541. )
  542. code = data.get("code") or ""
  543. if not code:
  544. raise Exception("未收到验证码")
  545. return str(code)
  546. async def ai_analyze_sms_send_state(self, screenshot_base64: str = None) -> dict:
  547. import os
  548. import requests
  549. import json
  550. import re
  551. try:
  552. if not screenshot_base64:
  553. screenshot_base64 = await self.capture_screenshot()
  554. if not screenshot_base64:
  555. return {
  556. "has_sms_modal": False,
  557. "send_button_state": "unknown",
  558. "sent_likely": False,
  559. "block_reason": "unknown",
  560. "suggested_action": "manual_send",
  561. "confidence": 0,
  562. "notes": "无法获取截图",
  563. }
  564. ai_api_key = os.environ.get("DASHSCOPE_API_KEY", "")
  565. ai_base_url = os.environ.get(
  566. "DASHSCOPE_BASE_URL",
  567. "https://dashscope.aliyuncs.com/compatible-mode/v1",
  568. )
  569. ai_vision_model = os.environ.get("AI_VISION_MODEL", "qwen-vl-plus")
  570. if not ai_api_key:
  571. return {
  572. "has_sms_modal": True,
  573. "send_button_state": "unknown",
  574. "sent_likely": False,
  575. "block_reason": "no_ai_key",
  576. "suggested_action": "manual_send",
  577. "confidence": 0,
  578. "notes": "未配置 AI API Key",
  579. }
  580. prompt = """请分析这张网页截图,判断是否处于“短信验证码”验证弹窗/页面,并判断“发送验证码/获取验证码”是否已经触发成功。
  581. 你需要重点识别:
  582. 1) 是否存在短信验证码弹窗(包含“请输入验证码/短信验证码/手机号验证/获取验证码/发送验证码”等)
  583. 2) 发送按钮状态:enabled / disabled / countdown(出现xx秒) / hidden / unknown
  584. 3) 是否已发送成功:例如出现倒计时、按钮禁用、出现“已发送/重新发送/xx秒后重试”等
  585. 4) 是否被阻塞:例如出现滑块/人机验证、频繁发送、风控提示、网络异常等
  586. 请以 JSON 返回:
  587. ```json
  588. {
  589. "has_sms_modal": true,
  590. "send_button_state": "enabled|disabled|countdown|hidden|unknown",
  591. "sent_likely": true,
  592. "block_reason": "none|need_click_send|slider|risk|rate_limit|network|unknown",
  593. "suggested_action": "wait|click_send|solve_slider|manual_send",
  594. "confidence": 0-100,
  595. "notes": "一句话说明你看到的证据"
  596. }
  597. ```"""
  598. headers = {
  599. "Authorization": f"Bearer {ai_api_key}",
  600. "Content-Type": "application/json",
  601. }
  602. payload = {
  603. "model": ai_vision_model,
  604. "messages": [
  605. {
  606. "role": "user",
  607. "content": [
  608. {
  609. "type": "image_url",
  610. "image_url": {
  611. "url": f"data:image/jpeg;base64,{screenshot_base64}"
  612. },
  613. },
  614. {"type": "text", "text": prompt},
  615. ],
  616. }
  617. ],
  618. "max_tokens": 500,
  619. }
  620. response = requests.post(
  621. f"{ai_base_url}/chat/completions",
  622. headers=headers,
  623. json=payload,
  624. timeout=30,
  625. )
  626. if response.status_code != 200:
  627. return {
  628. "has_sms_modal": True,
  629. "send_button_state": "unknown",
  630. "sent_likely": False,
  631. "block_reason": "network",
  632. "suggested_action": "manual_send",
  633. "confidence": 0,
  634. "notes": f"AI API 返回错误 {response.status_code}",
  635. }
  636. result = response.json()
  637. ai_response = (
  638. result.get("choices", [{}])[0].get("message", {}).get("content", "")
  639. )
  640. json_match = re.search(r"```json\\s*([\\s\\S]*?)\\s*```", ai_response)
  641. if json_match:
  642. json_str = json_match.group(1)
  643. else:
  644. json_match = re.search(r"\\{[\\s\\S]*\\}", ai_response)
  645. json_str = json_match.group(0) if json_match else "{}"
  646. try:
  647. data = json.loads(json_str)
  648. except Exception:
  649. data = {}
  650. return {
  651. "has_sms_modal": bool(data.get("has_sms_modal", True)),
  652. "send_button_state": data.get("send_button_state", "unknown"),
  653. "sent_likely": bool(data.get("sent_likely", False)),
  654. "block_reason": data.get("block_reason", "unknown"),
  655. "suggested_action": data.get("suggested_action", "manual_send"),
  656. "confidence": int(data.get("confidence", 0) or 0),
  657. "notes": data.get("notes", ""),
  658. }
  659. except Exception as e:
  660. return {
  661. "has_sms_modal": True,
  662. "send_button_state": "unknown",
  663. "sent_likely": False,
  664. "block_reason": "unknown",
  665. "suggested_action": "manual_send",
  666. "confidence": 0,
  667. "notes": f"AI 分析异常: {e}",
  668. }
  669. async def sync_cookies_to_node(self, cookies: list) -> bool:
  670. import os
  671. import json
  672. import requests
  673. if not self.user_id or not self.publish_account_id:
  674. return False
  675. node_api_url = os.environ.get("NODEJS_API_URL", "http://localhost:3000").rstrip(
  676. "/"
  677. )
  678. internal_api_key = os.environ.get(
  679. "INTERNAL_API_KEY", "internal-api-key-default"
  680. )
  681. try:
  682. payload = {
  683. "user_id": int(self.user_id),
  684. "account_id": int(self.publish_account_id),
  685. "cookies": json.dumps(cookies, ensure_ascii=False),
  686. }
  687. resp = requests.post(
  688. f"{node_api_url}/api/internal/accounts/update-cookies",
  689. headers={
  690. "Content-Type": "application/json",
  691. "X-Internal-API-Key": internal_api_key,
  692. },
  693. json=payload,
  694. timeout=30,
  695. )
  696. if resp.status_code >= 400:
  697. return False
  698. data = resp.json() if resp.content else {}
  699. return bool(data.get("success", True))
  700. except Exception:
  701. return False
  702. async def ai_suggest_playwright_selector(
  703. self, goal: str, screenshot_base64: str = None
  704. ) -> dict:
  705. import os
  706. import requests
  707. import json
  708. import re
  709. try:
  710. if not screenshot_base64:
  711. screenshot_base64 = await self.capture_screenshot()
  712. if not screenshot_base64:
  713. return {
  714. "has_selector": False,
  715. "selector": "",
  716. "confidence": 0,
  717. "notes": "无法获取截图",
  718. }
  719. ai_api_key = os.environ.get("DASHSCOPE_API_KEY", "")
  720. ai_base_url = os.environ.get(
  721. "DASHSCOPE_BASE_URL",
  722. "https://dashscope.aliyuncs.com/compatible-mode/v1",
  723. )
  724. ai_vision_model = os.environ.get("AI_VISION_MODEL", "qwen-vl-plus")
  725. if not ai_api_key:
  726. return {
  727. "has_selector": False,
  728. "selector": "",
  729. "confidence": 0,
  730. "notes": "未配置 AI API Key",
  731. }
  732. prompt = f"""请分析这张网页截图,给出一个 Playwright Python 可用的 selector(用于 page.locator(selector))来完成目标操作。
  733. 目标:{goal}
  734. 要求:
  735. 1) selector 尽量稳定(优先 role/text/aria,其次 class,避免过度依赖随机 class)
  736. 2) selector 必须是 Playwright 支持的选择器语法(如:text="发布"、button:has-text("发布")、[role="button"]:has-text("发布") 等)
  737. 3) 只返回一个最优 selector
  738. 以 JSON 返回:
  739. ```json
  740. {{
  741. "has_selector": true,
  742. "selector": "button:has-text(\\"发布\\")",
  743. "confidence": 0-100,
  744. "notes": "你依据的页面证据"
  745. }}
  746. ```"""
  747. headers = {
  748. "Authorization": f"Bearer {ai_api_key}",
  749. "Content-Type": "application/json",
  750. }
  751. payload = {
  752. "model": ai_vision_model,
  753. "messages": [
  754. {
  755. "role": "user",
  756. "content": [
  757. {
  758. "type": "image_url",
  759. "image_url": {
  760. "url": f"data:image/jpeg;base64,{screenshot_base64}"
  761. },
  762. },
  763. {"type": "text", "text": prompt},
  764. ],
  765. }
  766. ],
  767. "max_tokens": 300,
  768. }
  769. response = requests.post(
  770. f"{ai_base_url}/chat/completions",
  771. headers=headers,
  772. json=payload,
  773. timeout=30,
  774. )
  775. if response.status_code != 200:
  776. return {
  777. "has_selector": False,
  778. "selector": "",
  779. "confidence": 0,
  780. "notes": f"AI API 错误 {response.status_code}",
  781. }
  782. result = response.json()
  783. ai_response = (
  784. result.get("choices", [{}])[0].get("message", {}).get("content", "")
  785. )
  786. json_match = re.search(r"```json\\s*([\\s\\S]*?)\\s*```", ai_response)
  787. if json_match:
  788. json_str = json_match.group(1)
  789. else:
  790. json_match = re.search(r"\\{[\\s\\S]*\\}", ai_response)
  791. json_str = json_match.group(0) if json_match else "{}"
  792. try:
  793. data = json.loads(json_str)
  794. except Exception:
  795. data = {}
  796. selector = str(data.get("selector", "") or "").strip()
  797. has_selector = bool(data.get("has_selector", False)) and bool(selector)
  798. confidence = int(data.get("confidence", 0) or 0)
  799. notes = str(data.get("notes", "") or "")
  800. if not has_selector:
  801. return {
  802. "has_selector": False,
  803. "selector": "",
  804. "confidence": confidence,
  805. "notes": notes or "未给出 selector",
  806. }
  807. return {
  808. "has_selector": True,
  809. "selector": selector,
  810. "confidence": confidence,
  811. "notes": notes,
  812. }
  813. except Exception as e:
  814. return {
  815. "has_selector": False,
  816. "selector": "",
  817. "confidence": 0,
  818. "notes": f"AI selector 异常: {e}",
  819. }
  820. async def ai_check_captcha(self, screenshot_base64: str = None) -> dict:
  821. """
  822. 使用 AI 分析截图检测验证码
  823. Args:
  824. screenshot_base64: 截图的 Base64 编码,如果为空则自动获取当前页面截图
  825. Returns:
  826. dict: {
  827. "has_captcha": bool, # 是否有验证码
  828. "captcha_type": str, # 验证码类型: slider, image, phone, rotate, puzzle
  829. "captcha_description": str, # 验证码描述
  830. "confidence": float, # 置信度 0-100
  831. "need_headful": bool # 是否需要切换到有头浏览器
  832. }
  833. """
  834. import os
  835. import requests
  836. try:
  837. # 获取截图
  838. if not screenshot_base64:
  839. screenshot_base64 = await self.capture_screenshot()
  840. if not screenshot_base64:
  841. print(f"[{self.platform_name}] AI验证码检测: 无法获取截图")
  842. return {
  843. "has_captcha": False,
  844. "captcha_type": "",
  845. "captcha_description": "",
  846. "confidence": 0,
  847. "need_headful": False,
  848. }
  849. # 获取 AI 配置
  850. ai_api_key = os.environ.get("DASHSCOPE_API_KEY", "")
  851. ai_base_url = os.environ.get(
  852. "DASHSCOPE_BASE_URL",
  853. "https://dashscope.aliyuncs.com/compatible-mode/v1",
  854. )
  855. ai_vision_model = os.environ.get("AI_VISION_MODEL", "qwen-vl-plus")
  856. if not ai_api_key:
  857. print(
  858. f"[{self.platform_name}] AI验证码检测: 未配置 AI API Key,使用传统方式检测"
  859. )
  860. return await self._traditional_captcha_check()
  861. # 构建 AI 请求
  862. prompt = """请分析这张网页截图,判断页面上是否存在验证码。
  863. 请检查以下类型的验证码:
  864. 1. 滑块验证码(需要滑动滑块到指定位置)
  865. 2. 图片验证码(需要选择正确的图片、点击图片上的文字等)
  866. 3. 旋转验证码(需要旋转图片到正确角度)
  867. 4. 拼图验证码(需要拖动拼图块到正确位置)
  868. 5. 手机验证码(需要输入手机收到的验证码)
  869. 6. 计算验证码(需要输入计算结果)
  870. 请以 JSON 格式返回结果:
  871. ```json
  872. {
  873. "has_captcha": true/false,
  874. "captcha_type": "slider/image/phone/rotate/puzzle/calculate/none",
  875. "captcha_description": "验证码的具体描述",
  876. "confidence": 0-100
  877. }
  878. ```
  879. 注意:
  880. - 如果页面有明显的验证码弹窗或验证区域,has_captcha 为 true
  881. - 如果只是普通的登录页面或表单,没有特殊的验证步骤,has_captcha 为 false
  882. - confidence 表示你对判断结果的信心,100 表示非常确定"""
  883. headers = {
  884. "Authorization": f"Bearer {ai_api_key}",
  885. "Content-Type": "application/json",
  886. }
  887. payload = {
  888. "model": ai_vision_model,
  889. "messages": [
  890. {
  891. "role": "user",
  892. "content": [
  893. {
  894. "type": "image_url",
  895. "image_url": {
  896. "url": f"data:image/jpeg;base64,{screenshot_base64}"
  897. },
  898. },
  899. {"type": "text", "text": prompt},
  900. ],
  901. }
  902. ],
  903. "max_tokens": 500,
  904. }
  905. print(f"[{self.platform_name}] AI验证码检测: 正在分析截图...")
  906. response = requests.post(
  907. f"{ai_base_url}/chat/completions",
  908. headers=headers,
  909. json=payload,
  910. timeout=30,
  911. )
  912. if response.status_code != 200:
  913. print(
  914. f"[{self.platform_name}] AI验证码检测: API 返回错误 {response.status_code}"
  915. )
  916. return await self._traditional_captcha_check()
  917. result = response.json()
  918. ai_response = (
  919. result.get("choices", [{}])[0].get("message", {}).get("content", "")
  920. )
  921. print(f"[{self.platform_name}] AI验证码检测响应: {ai_response[:200]}...")
  922. # 解析 AI 响应
  923. import re
  924. json_match = re.search(r"```json\s*([\s\S]*?)\s*```", ai_response)
  925. if json_match:
  926. json_str = json_match.group(1)
  927. else:
  928. # 尝试直接解析
  929. json_match = re.search(r"\{[\s\S]*\}", ai_response)
  930. if json_match:
  931. json_str = json_match.group(0)
  932. else:
  933. json_str = "{}"
  934. try:
  935. ai_result = json.loads(json_str)
  936. except:
  937. ai_result = {}
  938. has_captcha = ai_result.get("has_captcha", False)
  939. captcha_type = ai_result.get("captcha_type", "")
  940. captcha_description = ai_result.get("captcha_description", "")
  941. confidence = ai_result.get("confidence", 0)
  942. # 如果检测到验证码,需要切换到有头浏览器
  943. need_headful = has_captcha and captcha_type not in ["none", ""]
  944. print(
  945. f"[{self.platform_name}] AI验证码检测结果: has_captcha={has_captcha}, type={captcha_type}, confidence={confidence}"
  946. )
  947. return {
  948. "has_captcha": has_captcha,
  949. "captcha_type": captcha_type if captcha_type != "none" else "",
  950. "captcha_description": captcha_description,
  951. "confidence": confidence,
  952. "need_headful": need_headful,
  953. }
  954. except Exception as e:
  955. print(f"[{self.platform_name}] AI验证码检测异常: {e}")
  956. import traceback
  957. traceback.print_exc()
  958. return await self._traditional_captcha_check()
  959. async def _traditional_captcha_check(self) -> dict:
  960. """传统方式检测验证码(基于 DOM 元素)"""
  961. if not self.page:
  962. return {
  963. "has_captcha": False,
  964. "captcha_type": "",
  965. "captcha_description": "",
  966. "confidence": 0,
  967. "need_headful": False,
  968. }
  969. try:
  970. # 检查常见的验证码选择器
  971. captcha_selectors = [
  972. # 滑块验证码
  973. ('[class*="slider"]', "slider", "滑块验证码"),
  974. ('[class*="slide-verify"]', "slider", "滑块验证码"),
  975. ('text="滑动"', "slider", "滑块验证码"),
  976. ('text="拖动"', "slider", "滑块验证码"),
  977. # 图片验证码
  978. ('[class*="captcha"]', "image", "图片验证码"),
  979. ('[class*="verify-img"]', "image", "图片验证码"),
  980. ('text="点击"', "image", "图片验证码"),
  981. ('text="选择"', "image", "图片验证码"),
  982. # 手机验证码
  983. ('text="验证码"', "phone", "手机验证码"),
  984. ('text="获取验证码"', "phone", "手机验证码"),
  985. ('[class*="sms-code"]', "phone", "手机验证码"),
  986. # 旋转验证码
  987. ('text="旋转"', "rotate", "旋转验证码"),
  988. ('[class*="rotate"]', "rotate", "旋转验证码"),
  989. ]
  990. for selector, captcha_type, description in captcha_selectors:
  991. try:
  992. count = await self.page.locator(selector).count()
  993. if count > 0:
  994. # 检查是否可见
  995. element = self.page.locator(selector).first
  996. if await element.is_visible():
  997. print(
  998. f"[{self.platform_name}] 传统检测: 发现验证码 - {selector}"
  999. )
  1000. return {
  1001. "has_captcha": True,
  1002. "captcha_type": captcha_type,
  1003. "captcha_description": description,
  1004. "confidence": 80,
  1005. "need_headful": True,
  1006. }
  1007. except:
  1008. pass
  1009. return {
  1010. "has_captcha": False,
  1011. "captcha_type": "",
  1012. "captcha_description": "",
  1013. "confidence": 80,
  1014. "need_headful": False,
  1015. }
  1016. except Exception as e:
  1017. print(f"[{self.platform_name}] 传统验证码检测异常: {e}")
  1018. return {
  1019. "has_captcha": False,
  1020. "captcha_type": "",
  1021. "captcha_description": "",
  1022. "confidence": 0,
  1023. "need_headful": False,
  1024. }
  1025. async def get_page_url(self) -> str:
  1026. """获取当前页面 URL"""
  1027. if not self.page:
  1028. return ""
  1029. try:
  1030. return self.page.url
  1031. except:
  1032. return ""
  1033. async def check_publish_status(self) -> dict:
  1034. """
  1035. 检查发布状态
  1036. 返回: {status, screenshot_base64, page_url, message}
  1037. """
  1038. if not self.page:
  1039. return {"status": "error", "message": "页面未初始化"}
  1040. try:
  1041. screenshot = await self.capture_screenshot()
  1042. page_url = await self.get_page_url()
  1043. # 检查常见的成功/失败标志
  1044. page_content = await self.page.content()
  1045. # 检查成功标志
  1046. success_keywords = ["发布成功", "上传成功", "发表成功", "提交成功"]
  1047. for keyword in success_keywords:
  1048. if keyword in page_content:
  1049. return {
  1050. "status": "success",
  1051. "screenshot_base64": screenshot,
  1052. "page_url": page_url,
  1053. "message": "发布成功",
  1054. }
  1055. # 检查验证码标志
  1056. captcha_keywords = [
  1057. "验证码",
  1058. "身份验证",
  1059. "请完成验证",
  1060. "滑动验证",
  1061. "图形验证",
  1062. ]
  1063. for keyword in captcha_keywords:
  1064. if keyword in page_content:
  1065. return {
  1066. "status": "need_captcha",
  1067. "screenshot_base64": screenshot,
  1068. "page_url": page_url,
  1069. "message": f"检测到{keyword}",
  1070. }
  1071. # 检查失败标志
  1072. fail_keywords = ["发布失败", "上传失败", "提交失败", "操作失败"]
  1073. for keyword in fail_keywords:
  1074. if keyword in page_content:
  1075. return {
  1076. "status": "failed",
  1077. "screenshot_base64": screenshot,
  1078. "page_url": page_url,
  1079. "message": keyword,
  1080. }
  1081. # 默认返回处理中
  1082. return {
  1083. "status": "processing",
  1084. "screenshot_base64": screenshot,
  1085. "page_url": page_url,
  1086. "message": "处理中",
  1087. }
  1088. except Exception as e:
  1089. return {
  1090. "status": "error",
  1091. "screenshot_base64": "",
  1092. "page_url": "",
  1093. "message": str(e),
  1094. }
  1095. async def wait_for_upload_complete(self, success_selector: str, timeout: int = 300):
  1096. """等待上传完成"""
  1097. if not self.page:
  1098. raise Exception("Page not initialized")
  1099. for _ in range(timeout // 3):
  1100. try:
  1101. count = await self.page.locator(success_selector).count()
  1102. if count > 0:
  1103. return True
  1104. except:
  1105. pass
  1106. await asyncio.sleep(3)
  1107. self.report_progress(30, "正在上传视频...")
  1108. return False
  1109. @abstractmethod
  1110. async def publish(self, cookies: str, params: PublishParams) -> PublishResult:
  1111. """
  1112. 发布视频 - 子类必须实现
  1113. Args:
  1114. cookies: cookie 字符串或 JSON
  1115. params: 发布参数
  1116. Returns:
  1117. PublishResult: 发布结果
  1118. """
  1119. pass
  1120. async def get_works(
  1121. self, cookies: str, page: int = 0, page_size: int = 20
  1122. ) -> WorksResult:
  1123. """
  1124. 获取作品列表 - 子类可覆盖实现
  1125. Args:
  1126. cookies: cookie 字符串或 JSON
  1127. page: 页码(从0开始)
  1128. page_size: 每页数量
  1129. Returns:
  1130. WorksResult: 作品列表结果
  1131. """
  1132. return WorksResult(
  1133. success=False,
  1134. platform=self.platform_name,
  1135. error="该平台暂不支持获取作品列表",
  1136. )
  1137. async def get_comments(
  1138. self, cookies: str, work_id: str, cursor: str = ""
  1139. ) -> CommentsResult:
  1140. """
  1141. 获取作品评论 - 子类可覆盖实现
  1142. Args:
  1143. cookies: cookie 字符串或 JSON
  1144. work_id: 作品ID
  1145. cursor: 分页游标
  1146. Returns:
  1147. CommentsResult: 评论列表结果
  1148. """
  1149. return CommentsResult(
  1150. success=False,
  1151. platform=self.platform_name,
  1152. work_id=work_id,
  1153. error="该平台暂不支持获取评论",
  1154. )
  1155. async def run(self, cookies: str, params: PublishParams) -> PublishResult:
  1156. """
  1157. 运行发布任务
  1158. 包装了 publish 方法,添加了异常处理和资源清理
  1159. 发布失败时自动保存截图到 uploads/screenshots 目录
  1160. """
  1161. try:
  1162. result = await self.publish(cookies, params)
  1163. if not result.success and self.page:
  1164. screenshot_path = await self.save_screenshot_to_file()
  1165. if screenshot_path:
  1166. result.screenshot_path = screenshot_path
  1167. return result
  1168. except Exception as e:
  1169. import traceback
  1170. traceback.print_exc()
  1171. screenshot_path = ""
  1172. if self.page:
  1173. screenshot_path = await self.save_screenshot_to_file()
  1174. return PublishResult(
  1175. success=False,
  1176. platform=self.platform_name,
  1177. error=str(e),
  1178. screenshot_path=screenshot_path,
  1179. )
  1180. finally:
  1181. await self.close_browser()
  1182. async def run_get_works(
  1183. self, cookies: str, page: int = 0, page_size: int = 20
  1184. ) -> WorksResult:
  1185. """
  1186. 运行获取作品任务
  1187. """
  1188. try:
  1189. return await self.get_works(cookies, page, page_size)
  1190. except Exception as e:
  1191. import traceback
  1192. traceback.print_exc()
  1193. return WorksResult(success=False, platform=self.platform_name, error=str(e))
  1194. finally:
  1195. await self.close_browser()
  1196. async def run_get_comments(
  1197. self, cookies: str, work_id: str, cursor: str = ""
  1198. ) -> CommentsResult:
  1199. """
  1200. 运行获取评论任务
  1201. """
  1202. try:
  1203. return await self.get_comments(cookies, work_id, cursor)
  1204. except Exception as e:
  1205. import traceback
  1206. traceback.print_exc()
  1207. return CommentsResult(
  1208. success=False,
  1209. platform=self.platform_name,
  1210. work_id=work_id,
  1211. error=str(e),
  1212. )
  1213. finally:
  1214. await self.close_browser()
  1215. async def check_login_status(self, cookies: str) -> dict:
  1216. """
  1217. 检查 Cookie 登录状态(通过浏览器访问后台页面检测)
  1218. Args:
  1219. cookies: cookie 字符串或 JSON
  1220. Returns:
  1221. dict: {
  1222. "success": True,
  1223. "valid": True/False,
  1224. "need_login": True/False,
  1225. "message": "状态描述"
  1226. }
  1227. """
  1228. try:
  1229. await self.init_browser()
  1230. cookie_list = self.parse_cookies(cookies)
  1231. await self.set_cookies(cookie_list)
  1232. if not self.page:
  1233. raise Exception("Page not initialized")
  1234. # 访问平台后台首页
  1235. home_url = self.login_url
  1236. print(f"[{self.platform_name}] 访问后台页面: {home_url}")
  1237. await self.page.goto(home_url, wait_until="domcontentloaded", timeout=30000)
  1238. await asyncio.sleep(3)
  1239. # 检查当前 URL 是否被重定向到登录页
  1240. current_url = self.page.url
  1241. print(f"[{self.platform_name}] 当前 URL: {current_url}")
  1242. # 登录页特征
  1243. login_indicators = ["login", "passport", "signin", "auth"]
  1244. is_login_page = any(
  1245. indicator in current_url.lower() for indicator in login_indicators
  1246. )
  1247. # 检查页面是否有登录弹窗
  1248. need_login = is_login_page
  1249. # 风控/验证码特征
  1250. risk_indicators = [
  1251. "captcha",
  1252. "verify",
  1253. "challenge",
  1254. "risk",
  1255. "security",
  1256. "safe",
  1257. "protect",
  1258. "slider",
  1259. ]
  1260. need_verification = any(
  1261. indicator in current_url.lower() for indicator in risk_indicators
  1262. )
  1263. if not need_login:
  1264. # 检查页面内容是否有登录提示
  1265. login_selectors = [
  1266. 'text="请先登录"',
  1267. 'text="登录后继续"',
  1268. 'text="请登录"',
  1269. '[class*="login-modal"]',
  1270. '[class*="login-dialog"]',
  1271. '[class*="login-popup"]',
  1272. ]
  1273. for selector in login_selectors:
  1274. try:
  1275. if await self.page.locator(selector).count() > 0:
  1276. need_login = True
  1277. print(f"[{self.platform_name}] 检测到登录弹窗: {selector}")
  1278. break
  1279. except:
  1280. pass
  1281. if not need_login and not need_verification:
  1282. verification_selectors = [
  1283. 'text="安全验证"',
  1284. 'text="验证码"',
  1285. 'text="人机验证"',
  1286. 'text="滑块"',
  1287. 'text="请完成验证"',
  1288. 'text="系统检测到异常"',
  1289. 'text="访问受限"',
  1290. 'text="行为异常"',
  1291. ]
  1292. for selector in verification_selectors:
  1293. try:
  1294. if await self.page.locator(selector).count() > 0:
  1295. need_verification = True
  1296. print(
  1297. f"[{self.platform_name}] 检测到风控/验证码提示: {selector}"
  1298. )
  1299. break
  1300. except:
  1301. pass
  1302. if need_login:
  1303. return {
  1304. "success": True,
  1305. "valid": False,
  1306. "need_login": True,
  1307. "message": "Cookie 已过期,需要重新登录",
  1308. }
  1309. elif need_verification:
  1310. return {
  1311. "success": True,
  1312. "valid": False,
  1313. "need_login": True,
  1314. "message": "触发风控/需要验证",
  1315. }
  1316. else:
  1317. return {
  1318. "success": True,
  1319. "valid": True,
  1320. "need_login": False,
  1321. "message": "登录状态有效",
  1322. }
  1323. except Exception as e:
  1324. import traceback
  1325. traceback.print_exc()
  1326. return {
  1327. "success": False,
  1328. "valid": False,
  1329. "need_login": True,
  1330. "error": str(e),
  1331. }
  1332. finally:
  1333. await self.close_browser()