base.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541
  1. # -*- coding: utf-8 -*-
  2. """
  3. 平台发布基类
  4. 提供通用的发布接口和工具方法
  5. """
  6. import asyncio
  7. import json
  8. import os
  9. from abc import ABC, abstractmethod
  10. from dataclasses import dataclass, field
  11. from datetime import datetime
  12. from typing import List, Optional, Callable, Dict, Any
  13. from playwright.async_api import async_playwright, Browser, BrowserContext, Page
  14. @dataclass
  15. class PublishParams:
  16. """发布参数"""
  17. title: str
  18. video_path: str
  19. description: str = ""
  20. cover_path: Optional[str] = None
  21. tags: List[str] = field(default_factory=list)
  22. publish_date: Optional[datetime] = None
  23. location: str = "重庆市"
  24. def __post_init__(self):
  25. if not self.description:
  26. self.description = self.title
  27. @dataclass
  28. class PublishResult:
  29. """发布结果"""
  30. success: bool
  31. platform: str
  32. video_id: str = ""
  33. video_url: str = ""
  34. message: str = ""
  35. error: str = ""
  36. need_captcha: bool = False # 是否需要验证码
  37. captcha_type: str = "" # 验证码类型: phone, slider, image
  38. screenshot_base64: str = "" # 页面截图(Base64)
  39. page_url: str = "" # 当前页面 URL
  40. status: str = "" # 状态: uploading, processing, success, failed, need_captcha, need_action
  41. @dataclass
  42. class WorkItem:
  43. """作品数据"""
  44. work_id: str
  45. title: str
  46. cover_url: str = ""
  47. video_url: str = ""
  48. duration: int = 0 # 秒
  49. status: str = "published" # published, reviewing, rejected, draft
  50. publish_time: str = ""
  51. play_count: int = 0
  52. like_count: int = 0
  53. comment_count: int = 0
  54. share_count: int = 0
  55. collect_count: int = 0
  56. def to_dict(self) -> Dict[str, Any]:
  57. return {
  58. "work_id": self.work_id,
  59. "title": self.title,
  60. "cover_url": self.cover_url,
  61. "video_url": self.video_url,
  62. "duration": self.duration,
  63. "status": self.status,
  64. "publish_time": self.publish_time,
  65. "play_count": self.play_count,
  66. "like_count": self.like_count,
  67. "comment_count": self.comment_count,
  68. "share_count": self.share_count,
  69. "collect_count": self.collect_count,
  70. }
  71. @dataclass
  72. class CommentItem:
  73. """评论数据"""
  74. comment_id: str
  75. work_id: str
  76. content: str
  77. author_id: str = ""
  78. author_name: str = ""
  79. author_avatar: str = ""
  80. like_count: int = 0
  81. reply_count: int = 0
  82. create_time: str = ""
  83. is_author: bool = False # 是否是作者的评论
  84. replies: List['CommentItem'] = field(default_factory=list)
  85. def to_dict(self) -> Dict[str, Any]:
  86. return {
  87. "comment_id": self.comment_id,
  88. "work_id": self.work_id,
  89. "content": self.content,
  90. "author_id": self.author_id,
  91. "author_name": self.author_name,
  92. "author_avatar": self.author_avatar,
  93. "like_count": self.like_count,
  94. "reply_count": self.reply_count,
  95. "create_time": self.create_time,
  96. "is_author": self.is_author,
  97. "replies": [r.to_dict() for r in self.replies],
  98. }
  99. @dataclass
  100. class WorksResult:
  101. """作品列表结果"""
  102. success: bool
  103. platform: str
  104. works: List[WorkItem] = field(default_factory=list)
  105. total: int = 0
  106. has_more: bool = False
  107. error: str = ""
  108. def to_dict(self) -> Dict[str, Any]:
  109. return {
  110. "success": self.success,
  111. "platform": self.platform,
  112. "works": [w.to_dict() for w in self.works],
  113. "total": self.total,
  114. "has_more": self.has_more,
  115. "error": self.error,
  116. }
  117. @dataclass
  118. class CommentsResult:
  119. """评论列表结果"""
  120. success: bool
  121. platform: str
  122. work_id: str
  123. comments: List[CommentItem] = field(default_factory=list)
  124. total: int = 0
  125. has_more: bool = False
  126. error: str = ""
  127. def to_dict(self) -> Dict[str, Any]:
  128. return {
  129. "success": self.success,
  130. "platform": self.platform,
  131. "work_id": self.work_id,
  132. "comments": [c.to_dict() for c in self.comments],
  133. "total": self.total,
  134. "has_more": self.has_more,
  135. "error": self.error,
  136. }
  137. class BasePublisher(ABC):
  138. """
  139. 平台发布基类
  140. 所有平台发布器都需要继承此类
  141. """
  142. platform_name: str = "base"
  143. login_url: str = ""
  144. publish_url: str = ""
  145. cookie_domain: str = ""
  146. def __init__(self, headless: bool = True):
  147. self.headless = headless
  148. self.browser: Optional[Browser] = None
  149. self.context: Optional[BrowserContext] = None
  150. self.page: Optional[Page] = None
  151. self.on_progress: Optional[Callable[[int, str], None]] = None
  152. def set_progress_callback(self, callback: Callable[[int, str], None]):
  153. """设置进度回调"""
  154. self.on_progress = callback
  155. def report_progress(self, progress: int, message: str):
  156. """报告进度"""
  157. print(f"[{self.platform_name}] [{progress}%] {message}")
  158. if self.on_progress:
  159. self.on_progress(progress, message)
  160. @staticmethod
  161. def parse_cookies(cookies_str: str) -> list:
  162. """解析 cookie 字符串为列表"""
  163. try:
  164. cookies = json.loads(cookies_str)
  165. if isinstance(cookies, list):
  166. return cookies
  167. except json.JSONDecodeError:
  168. pass
  169. # 字符串格式: name=value; name2=value2
  170. cookies = []
  171. for item in cookies_str.split(';'):
  172. item = item.strip()
  173. if '=' in item:
  174. name, value = item.split('=', 1)
  175. cookies.append({
  176. 'name': name.strip(),
  177. 'value': value.strip(),
  178. 'domain': '',
  179. 'path': '/'
  180. })
  181. return cookies
  182. @staticmethod
  183. def cookies_to_string(cookies: list) -> str:
  184. """将 cookie 列表转换为字符串"""
  185. return '; '.join([f"{c['name']}={c['value']}" for c in cookies])
  186. async def init_browser(self, storage_state: str = None):
  187. """初始化浏览器"""
  188. print(f"[{self.platform_name}] init_browser: headless={self.headless}", flush=True)
  189. playwright = await async_playwright().start()
  190. self.browser = await playwright.chromium.launch(headless=self.headless)
  191. if storage_state and os.path.exists(storage_state):
  192. self.context = await self.browser.new_context(storage_state=storage_state)
  193. else:
  194. self.context = await self.browser.new_context()
  195. self.page = await self.context.new_page()
  196. return self.page
  197. async def set_cookies(self, cookies: list):
  198. """设置 cookies"""
  199. if not self.context:
  200. raise Exception("Browser context not initialized")
  201. # 设置默认域名
  202. for cookie in cookies:
  203. if 'domain' not in cookie or not cookie['domain']:
  204. cookie['domain'] = self.cookie_domain
  205. await self.context.add_cookies(cookies)
  206. async def close_browser(self):
  207. """关闭浏览器"""
  208. if self.context:
  209. await self.context.close()
  210. if self.browser:
  211. await self.browser.close()
  212. async def save_cookies(self, file_path: str):
  213. """保存 cookies 到文件"""
  214. if self.context:
  215. await self.context.storage_state(path=file_path)
  216. async def capture_screenshot(self) -> str:
  217. """截取当前页面截图,返回 Base64 编码"""
  218. import base64
  219. if not self.page:
  220. return ""
  221. try:
  222. screenshot_bytes = await self.page.screenshot(type="jpeg", quality=80)
  223. return base64.b64encode(screenshot_bytes).decode('utf-8')
  224. except Exception as e:
  225. print(f"[{self.platform_name}] 截图失败: {e}")
  226. return ""
  227. async def get_page_url(self) -> str:
  228. """获取当前页面 URL"""
  229. if not self.page:
  230. return ""
  231. try:
  232. return self.page.url
  233. except:
  234. return ""
  235. async def check_publish_status(self) -> dict:
  236. """
  237. 检查发布状态
  238. 返回: {status, screenshot_base64, page_url, message}
  239. """
  240. if not self.page:
  241. return {"status": "error", "message": "页面未初始化"}
  242. try:
  243. screenshot = await self.capture_screenshot()
  244. page_url = await self.get_page_url()
  245. # 检查常见的成功/失败标志
  246. page_content = await self.page.content()
  247. # 检查成功标志
  248. success_keywords = ['发布成功', '上传成功', '发表成功', '提交成功']
  249. for keyword in success_keywords:
  250. if keyword in page_content:
  251. return {
  252. "status": "success",
  253. "screenshot_base64": screenshot,
  254. "page_url": page_url,
  255. "message": "发布成功"
  256. }
  257. # 检查验证码标志
  258. captcha_keywords = ['验证码', '身份验证', '请完成验证', '滑动验证', '图形验证']
  259. for keyword in captcha_keywords:
  260. if keyword in page_content:
  261. return {
  262. "status": "need_captcha",
  263. "screenshot_base64": screenshot,
  264. "page_url": page_url,
  265. "message": f"检测到{keyword}"
  266. }
  267. # 检查失败标志
  268. fail_keywords = ['发布失败', '上传失败', '提交失败', '操作失败']
  269. for keyword in fail_keywords:
  270. if keyword in page_content:
  271. return {
  272. "status": "failed",
  273. "screenshot_base64": screenshot,
  274. "page_url": page_url,
  275. "message": keyword
  276. }
  277. # 默认返回处理中
  278. return {
  279. "status": "processing",
  280. "screenshot_base64": screenshot,
  281. "page_url": page_url,
  282. "message": "处理中"
  283. }
  284. except Exception as e:
  285. return {
  286. "status": "error",
  287. "screenshot_base64": "",
  288. "page_url": "",
  289. "message": str(e)
  290. }
  291. async def wait_for_upload_complete(self, success_selector: str, timeout: int = 300):
  292. """等待上传完成"""
  293. if not self.page:
  294. raise Exception("Page not initialized")
  295. for _ in range(timeout // 3):
  296. try:
  297. count = await self.page.locator(success_selector).count()
  298. if count > 0:
  299. return True
  300. except:
  301. pass
  302. await asyncio.sleep(3)
  303. self.report_progress(30, "正在上传视频...")
  304. return False
  305. @abstractmethod
  306. async def publish(self, cookies: str, params: PublishParams) -> PublishResult:
  307. """
  308. 发布视频 - 子类必须实现
  309. Args:
  310. cookies: cookie 字符串或 JSON
  311. params: 发布参数
  312. Returns:
  313. PublishResult: 发布结果
  314. """
  315. pass
  316. async def get_works(self, cookies: str, page: int = 0, page_size: int = 20) -> WorksResult:
  317. """
  318. 获取作品列表 - 子类可覆盖实现
  319. Args:
  320. cookies: cookie 字符串或 JSON
  321. page: 页码(从0开始)
  322. page_size: 每页数量
  323. Returns:
  324. WorksResult: 作品列表结果
  325. """
  326. return WorksResult(
  327. success=False,
  328. platform=self.platform_name,
  329. error="该平台暂不支持获取作品列表"
  330. )
  331. async def get_comments(self, cookies: str, work_id: str, cursor: str = "") -> CommentsResult:
  332. """
  333. 获取作品评论 - 子类可覆盖实现
  334. Args:
  335. cookies: cookie 字符串或 JSON
  336. work_id: 作品ID
  337. cursor: 分页游标
  338. Returns:
  339. CommentsResult: 评论列表结果
  340. """
  341. return CommentsResult(
  342. success=False,
  343. platform=self.platform_name,
  344. work_id=work_id,
  345. error="该平台暂不支持获取评论"
  346. )
  347. async def run(self, cookies: str, params: PublishParams) -> PublishResult:
  348. """
  349. 运行发布任务
  350. 包装了 publish 方法,添加了异常处理和资源清理
  351. """
  352. try:
  353. return await self.publish(cookies, params)
  354. except Exception as e:
  355. import traceback
  356. traceback.print_exc()
  357. return PublishResult(
  358. success=False,
  359. platform=self.platform_name,
  360. error=str(e)
  361. )
  362. finally:
  363. await self.close_browser()
  364. async def run_get_works(self, cookies: str, page: int = 0, page_size: int = 20) -> WorksResult:
  365. """
  366. 运行获取作品任务
  367. """
  368. try:
  369. return await self.get_works(cookies, page, page_size)
  370. except Exception as e:
  371. import traceback
  372. traceback.print_exc()
  373. return WorksResult(
  374. success=False,
  375. platform=self.platform_name,
  376. error=str(e)
  377. )
  378. finally:
  379. await self.close_browser()
  380. async def run_get_comments(self, cookies: str, work_id: str, cursor: str = "") -> CommentsResult:
  381. """
  382. 运行获取评论任务
  383. """
  384. try:
  385. return await self.get_comments(cookies, work_id, cursor)
  386. except Exception as e:
  387. import traceback
  388. traceback.print_exc()
  389. return CommentsResult(
  390. success=False,
  391. platform=self.platform_name,
  392. work_id=work_id,
  393. error=str(e)
  394. )
  395. finally:
  396. await self.close_browser()
  397. async def check_login_status(self, cookies: str) -> dict:
  398. """
  399. 检查 Cookie 登录状态(通过浏览器访问后台页面检测)
  400. Args:
  401. cookies: cookie 字符串或 JSON
  402. Returns:
  403. dict: {
  404. "success": True,
  405. "valid": True/False,
  406. "need_login": True/False,
  407. "message": "状态描述"
  408. }
  409. """
  410. try:
  411. await self.init_browser()
  412. cookie_list = self.parse_cookies(cookies)
  413. await self.set_cookies(cookie_list)
  414. if not self.page:
  415. raise Exception("Page not initialized")
  416. # 访问平台后台首页
  417. home_url = self.login_url
  418. print(f"[{self.platform_name}] 访问后台页面: {home_url}")
  419. await self.page.goto(home_url, wait_until='domcontentloaded', timeout=30000)
  420. await asyncio.sleep(3)
  421. # 检查当前 URL 是否被重定向到登录页
  422. current_url = self.page.url
  423. print(f"[{self.platform_name}] 当前 URL: {current_url}")
  424. # 登录页特征
  425. login_indicators = ['login', 'passport', 'signin', 'auth']
  426. is_login_page = any(indicator in current_url.lower() for indicator in login_indicators)
  427. # 检查页面是否有登录弹窗
  428. need_login = is_login_page
  429. if not need_login:
  430. # 检查页面内容是否有登录提示
  431. login_selectors = [
  432. 'text="请先登录"',
  433. 'text="登录后继续"',
  434. 'text="请登录"',
  435. '[class*="login-modal"]',
  436. '[class*="login-dialog"]',
  437. '[class*="login-popup"]',
  438. ]
  439. for selector in login_selectors:
  440. try:
  441. if await self.page.locator(selector).count() > 0:
  442. need_login = True
  443. print(f"[{self.platform_name}] 检测到登录弹窗: {selector}")
  444. break
  445. except:
  446. pass
  447. if need_login:
  448. return {
  449. "success": True,
  450. "valid": False,
  451. "need_login": True,
  452. "message": "Cookie 已过期,需要重新登录"
  453. }
  454. else:
  455. return {
  456. "success": True,
  457. "valid": True,
  458. "need_login": False,
  459. "message": "登录状态有效"
  460. }
  461. except Exception as e:
  462. import traceback
  463. traceback.print_exc()
  464. return {
  465. "success": False,
  466. "valid": False,
  467. "need_login": True,
  468. "error": str(e)
  469. }
  470. finally:
  471. await self.close_browser()