base.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365
  1. # -*- coding: utf-8 -*-
  2. """
  3. 平台发布基类
  4. 提供通用的发布接口和工具方法
  5. """
  6. import asyncio
  7. import json
  8. import os
  9. from abc import ABC, abstractmethod
  10. from dataclasses import dataclass, field
  11. from datetime import datetime
  12. from typing import List, Optional, Callable, Dict, Any
  13. from playwright.async_api import async_playwright, Browser, BrowserContext, Page
  14. @dataclass
  15. class PublishParams:
  16. """发布参数"""
  17. title: str
  18. video_path: str
  19. description: str = ""
  20. cover_path: Optional[str] = None
  21. tags: List[str] = field(default_factory=list)
  22. publish_date: Optional[datetime] = None
  23. location: str = "重庆市"
  24. def __post_init__(self):
  25. if not self.description:
  26. self.description = self.title
  27. @dataclass
  28. class PublishResult:
  29. """发布结果"""
  30. success: bool
  31. platform: str
  32. video_id: str = ""
  33. video_url: str = ""
  34. message: str = ""
  35. error: str = ""
  36. @dataclass
  37. class WorkItem:
  38. """作品数据"""
  39. work_id: str
  40. title: str
  41. cover_url: str = ""
  42. video_url: str = ""
  43. duration: int = 0 # 秒
  44. status: str = "published" # published, reviewing, rejected, draft
  45. publish_time: str = ""
  46. play_count: int = 0
  47. like_count: int = 0
  48. comment_count: int = 0
  49. share_count: int = 0
  50. collect_count: int = 0
  51. def to_dict(self) -> Dict[str, Any]:
  52. return {
  53. "work_id": self.work_id,
  54. "title": self.title,
  55. "cover_url": self.cover_url,
  56. "video_url": self.video_url,
  57. "duration": self.duration,
  58. "status": self.status,
  59. "publish_time": self.publish_time,
  60. "play_count": self.play_count,
  61. "like_count": self.like_count,
  62. "comment_count": self.comment_count,
  63. "share_count": self.share_count,
  64. "collect_count": self.collect_count,
  65. }
  66. @dataclass
  67. class CommentItem:
  68. """评论数据"""
  69. comment_id: str
  70. work_id: str
  71. content: str
  72. author_id: str = ""
  73. author_name: str = ""
  74. author_avatar: str = ""
  75. like_count: int = 0
  76. reply_count: int = 0
  77. create_time: str = ""
  78. is_author: bool = False # 是否是作者的评论
  79. replies: List['CommentItem'] = field(default_factory=list)
  80. def to_dict(self) -> Dict[str, Any]:
  81. return {
  82. "comment_id": self.comment_id,
  83. "work_id": self.work_id,
  84. "content": self.content,
  85. "author_id": self.author_id,
  86. "author_name": self.author_name,
  87. "author_avatar": self.author_avatar,
  88. "like_count": self.like_count,
  89. "reply_count": self.reply_count,
  90. "create_time": self.create_time,
  91. "is_author": self.is_author,
  92. "replies": [r.to_dict() for r in self.replies],
  93. }
  94. @dataclass
  95. class WorksResult:
  96. """作品列表结果"""
  97. success: bool
  98. platform: str
  99. works: List[WorkItem] = field(default_factory=list)
  100. total: int = 0
  101. has_more: bool = False
  102. error: str = ""
  103. def to_dict(self) -> Dict[str, Any]:
  104. return {
  105. "success": self.success,
  106. "platform": self.platform,
  107. "works": [w.to_dict() for w in self.works],
  108. "total": self.total,
  109. "has_more": self.has_more,
  110. "error": self.error,
  111. }
  112. @dataclass
  113. class CommentsResult:
  114. """评论列表结果"""
  115. success: bool
  116. platform: str
  117. work_id: str
  118. comments: List[CommentItem] = field(default_factory=list)
  119. total: int = 0
  120. has_more: bool = False
  121. error: str = ""
  122. def to_dict(self) -> Dict[str, Any]:
  123. return {
  124. "success": self.success,
  125. "platform": self.platform,
  126. "work_id": self.work_id,
  127. "comments": [c.to_dict() for c in self.comments],
  128. "total": self.total,
  129. "has_more": self.has_more,
  130. "error": self.error,
  131. }
  132. class BasePublisher(ABC):
  133. """
  134. 平台发布基类
  135. 所有平台发布器都需要继承此类
  136. """
  137. platform_name: str = "base"
  138. login_url: str = ""
  139. publish_url: str = ""
  140. cookie_domain: str = ""
  141. def __init__(self, headless: bool = True):
  142. self.headless = headless
  143. self.browser: Optional[Browser] = None
  144. self.context: Optional[BrowserContext] = None
  145. self.page: Optional[Page] = None
  146. self.on_progress: Optional[Callable[[int, str], None]] = None
  147. def set_progress_callback(self, callback: Callable[[int, str], None]):
  148. """设置进度回调"""
  149. self.on_progress = callback
  150. def report_progress(self, progress: int, message: str):
  151. """报告进度"""
  152. print(f"[{self.platform_name}] [{progress}%] {message}")
  153. if self.on_progress:
  154. self.on_progress(progress, message)
  155. @staticmethod
  156. def parse_cookies(cookies_str: str) -> list:
  157. """解析 cookie 字符串为列表"""
  158. try:
  159. cookies = json.loads(cookies_str)
  160. if isinstance(cookies, list):
  161. return cookies
  162. except json.JSONDecodeError:
  163. pass
  164. # 字符串格式: name=value; name2=value2
  165. cookies = []
  166. for item in cookies_str.split(';'):
  167. item = item.strip()
  168. if '=' in item:
  169. name, value = item.split('=', 1)
  170. cookies.append({
  171. 'name': name.strip(),
  172. 'value': value.strip(),
  173. 'domain': '',
  174. 'path': '/'
  175. })
  176. return cookies
  177. @staticmethod
  178. def cookies_to_string(cookies: list) -> str:
  179. """将 cookie 列表转换为字符串"""
  180. return '; '.join([f"{c['name']}={c['value']}" for c in cookies])
  181. async def init_browser(self, storage_state: str = None):
  182. """初始化浏览器"""
  183. playwright = await async_playwright().start()
  184. self.browser = await playwright.chromium.launch(headless=self.headless)
  185. if storage_state and os.path.exists(storage_state):
  186. self.context = await self.browser.new_context(storage_state=storage_state)
  187. else:
  188. self.context = await self.browser.new_context()
  189. self.page = await self.context.new_page()
  190. return self.page
  191. async def set_cookies(self, cookies: list):
  192. """设置 cookies"""
  193. if not self.context:
  194. raise Exception("Browser context not initialized")
  195. # 设置默认域名
  196. for cookie in cookies:
  197. if 'domain' not in cookie or not cookie['domain']:
  198. cookie['domain'] = self.cookie_domain
  199. await self.context.add_cookies(cookies)
  200. async def close_browser(self):
  201. """关闭浏览器"""
  202. if self.context:
  203. await self.context.close()
  204. if self.browser:
  205. await self.browser.close()
  206. async def save_cookies(self, file_path: str):
  207. """保存 cookies 到文件"""
  208. if self.context:
  209. await self.context.storage_state(path=file_path)
  210. async def wait_for_upload_complete(self, success_selector: str, timeout: int = 300):
  211. """等待上传完成"""
  212. if not self.page:
  213. raise Exception("Page not initialized")
  214. for _ in range(timeout // 3):
  215. try:
  216. count = await self.page.locator(success_selector).count()
  217. if count > 0:
  218. return True
  219. except:
  220. pass
  221. await asyncio.sleep(3)
  222. self.report_progress(30, "正在上传视频...")
  223. return False
  224. @abstractmethod
  225. async def publish(self, cookies: str, params: PublishParams) -> PublishResult:
  226. """
  227. 发布视频 - 子类必须实现
  228. Args:
  229. cookies: cookie 字符串或 JSON
  230. params: 发布参数
  231. Returns:
  232. PublishResult: 发布结果
  233. """
  234. pass
  235. async def get_works(self, cookies: str, page: int = 0, page_size: int = 20) -> WorksResult:
  236. """
  237. 获取作品列表 - 子类可覆盖实现
  238. Args:
  239. cookies: cookie 字符串或 JSON
  240. page: 页码(从0开始)
  241. page_size: 每页数量
  242. Returns:
  243. WorksResult: 作品列表结果
  244. """
  245. return WorksResult(
  246. success=False,
  247. platform=self.platform_name,
  248. error="该平台暂不支持获取作品列表"
  249. )
  250. async def get_comments(self, cookies: str, work_id: str, cursor: str = "") -> CommentsResult:
  251. """
  252. 获取作品评论 - 子类可覆盖实现
  253. Args:
  254. cookies: cookie 字符串或 JSON
  255. work_id: 作品ID
  256. cursor: 分页游标
  257. Returns:
  258. CommentsResult: 评论列表结果
  259. """
  260. return CommentsResult(
  261. success=False,
  262. platform=self.platform_name,
  263. work_id=work_id,
  264. error="该平台暂不支持获取评论"
  265. )
  266. async def run(self, cookies: str, params: PublishParams) -> PublishResult:
  267. """
  268. 运行发布任务
  269. 包装了 publish 方法,添加了异常处理和资源清理
  270. """
  271. try:
  272. return await self.publish(cookies, params)
  273. except Exception as e:
  274. import traceback
  275. traceback.print_exc()
  276. return PublishResult(
  277. success=False,
  278. platform=self.platform_name,
  279. error=str(e)
  280. )
  281. finally:
  282. await self.close_browser()
  283. async def run_get_works(self, cookies: str, page: int = 0, page_size: int = 20) -> WorksResult:
  284. """
  285. 运行获取作品任务
  286. """
  287. try:
  288. return await self.get_works(cookies, page, page_size)
  289. except Exception as e:
  290. import traceback
  291. traceback.print_exc()
  292. return WorksResult(
  293. success=False,
  294. platform=self.platform_name,
  295. error=str(e)
  296. )
  297. finally:
  298. await self.close_browser()
  299. async def run_get_comments(self, cookies: str, work_id: str, cursor: str = "") -> CommentsResult:
  300. """
  301. 运行获取评论任务
  302. """
  303. try:
  304. return await self.get_comments(cookies, work_id, cursor)
  305. except Exception as e:
  306. import traceback
  307. traceback.print_exc()
  308. return CommentsResult(
  309. success=False,
  310. platform=self.platform_name,
  311. work_id=work_id,
  312. error=str(e)
  313. )
  314. finally:
  315. await self.close_browser()