baijiahao.py 50 KB


  1. # -*- coding: utf-8 -*-
  2. """
  3. 百家号视频发布器
  4. """
  5. import asyncio
  6. import json
  7. from typing import List
  8. from datetime import datetime
  9. from .base import (
  10. BasePublisher, PublishParams, PublishResult,
  11. WorkItem, WorksResult, CommentItem, CommentsResult
  12. )
  13. class BaijiahaoPublisher(BasePublisher):
  14. """
  15. 百家号视频发布器
  16. 使用 Playwright 自动化操作百家号创作者中心
  17. """
  18. platform_name = "baijiahao"
  19. login_url = "https://baijiahao.baidu.com/"
  20. publish_url = "https://baijiahao.baidu.com/builder/rc/edit?type=video"
  21. cookie_domain = ".baidu.com"
  22. # 登录检测配置
  23. login_check_url = "https://baijiahao.baidu.com/builder/rc/home"
  24. login_indicators = ["passport.baidu.com", "/login", "wappass.baidu.com"]
  25. login_selectors = ['text="登录"', 'text="请登录"', '[class*="login-btn"]']
  26. async def get_account_info(self, cookies: str) -> dict:
  27. """
  28. 获取百家号账号信息
  29. 使用直接 HTTP API 调用,不使用浏览器
  30. """
  31. import aiohttp
  32. print(f"\n{'='*60}")
  33. print(f"[{self.platform_name}] 获取账号信息 (使用 API)")
  34. print(f"{'='*60}")
  35. try:
  36. # 解析 cookies
  37. cookie_list = self.parse_cookies(cookies)
  38. cookie_dict = {c['name']: c['value'] for c in cookie_list}
  39. # 重要:百家号需要先访问主页建立会话上下文
  40. print(f"[{self.platform_name}] 第一步:访问主页建立会话...")
  41. session_headers = {
  42. 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
  43. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
  44. # Cookie 由 session 管理,不手动设置
  45. 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
  46. 'Accept-Encoding': 'gzip, deflate, br',
  47. 'Connection': 'keep-alive',
  48. 'Upgrade-Insecure-Requests': '1',
  49. 'Sec-Fetch-Dest': 'document',
  50. 'Sec-Fetch-Mode': 'navigate',
  51. 'Sec-Fetch-Site': 'none',
  52. 'Sec-Fetch-User': '?1',
  53. 'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
  54. 'sec-ch-ua-mobile': '?0',
  55. 'sec-ch-ua-platform': '"Windows"'
  56. }
  57. headers = {
  58. 'Accept': 'application/json, text/plain, */*',
  59. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
  60. # Cookie 由 session 管理,不手动设置
  61. 'Referer': 'https://baijiahao.baidu.com/builder/rc/home',
  62. 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
  63. 'Accept-Encoding': 'gzip, deflate, br',
  64. 'Connection': 'keep-alive',
  65. 'Sec-Fetch-Dest': 'empty',
  66. 'Sec-Fetch-Mode': 'cors',
  67. 'Sec-Fetch-Site': 'same-origin',
  68. 'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
  69. 'sec-ch-ua-mobile': '?0',
  70. 'sec-ch-ua-platform': '"Windows"'
  71. }
  72. # 使用 cookies 参数初始化 session,让 aiohttp 自动管理 cookie 更新
  73. async with aiohttp.ClientSession(cookies=cookie_dict) as session:
  74. # 步骤 0: 先访问主页建立会话上下文(关键步骤!)
  75. print(f"[{self.platform_name}] [0/4] 访问主页建立会话上下文...")
  76. async with session.get(
  77. 'https://baijiahao.baidu.com/builder/rc/home',
  78. headers=session_headers,
  79. timeout=aiohttp.ClientTimeout(total=30)
  80. ) as home_response:
  81. home_status = home_response.status
  82. print(f"[{self.platform_name}] 主页访问状态: {home_status}")
  83. # 获取响应头中的新cookies(如果有)
  84. if 'Set-Cookie' in home_response.headers:
  85. new_cookies = home_response.headers['Set-Cookie']
  86. print(f"[{self.platform_name}] 获取到新的会话Cookie")
  87. # 这里可以处理新的cookies,但暂时跳过复杂处理
  88. # 短暂等待确保会话建立
  89. await asyncio.sleep(1)
  90. # 步骤 1: 获取账号基本信息
  91. print(f"[{self.platform_name}] [1/4] 调用 appinfo API...")
  92. async with session.get(
  93. 'https://baijiahao.baidu.com/builder/app/appinfo',
  94. headers=headers,
  95. timeout=aiohttp.ClientTimeout(total=30)
  96. ) as response:
  97. appinfo_result = await response.json()
  98. print(f"[{self.platform_name}] appinfo API 完整响应: {json.dumps(appinfo_result, ensure_ascii=False)[:500]}")
  99. print(f"[{self.platform_name}] appinfo API 响应: errno={appinfo_result.get('errno')}")
  100. # 检查登录状态
  101. if appinfo_result.get('errno') != 0:
  102. error_msg = appinfo_result.get('errmsg', '未知错误')
  103. errno = appinfo_result.get('errno')
  104. print(f"[{self.platform_name}] API 返回错误: errno={errno}, msg={error_msg}")
  105. # errno 110 表示未登录
  106. if errno == 110:
  107. return {
  108. "success": False,
  109. "error": "Cookie 已失效,需要重新登录",
  110. "need_login": True
  111. }
  112. # errno 10001402 表示分散认证问题,尝试重新访问主页后重试
  113. if errno == 10001402:
  114. print(f"[{self.platform_name}] 检测到分散认证问题,尝试重新访问主页...")
  115. await asyncio.sleep(2)
  116. # 重新访问主页
  117. async with session.get(
  118. 'https://baijiahao.baidu.com/builder/rc/home',
  119. headers=session_headers,
  120. timeout=aiohttp.ClientTimeout(total=30)
  121. ) as retry_home_response:
  122. print(f"[{self.platform_name}] 重新访问主页状态: {retry_home_response.status}")
  123. await asyncio.sleep(1)
  124. # 重试 API 调用
  125. async with session.get(
  126. 'https://baijiahao.baidu.com/builder/app/appinfo',
  127. headers=headers,
  128. timeout=aiohttp.ClientTimeout(total=30)
  129. ) as retry_response:
  130. retry_result = await retry_response.json()
  131. if retry_result.get('errno') == 0:
  132. print(f"[{self.platform_name}] 分散认证问题已解决")
  133. # 使用重试成功的结果继续处理
  134. appinfo_result = retry_result
  135. else:
  136. print(f"[{self.platform_name}] 重试仍然失败")
  137. return {
  138. "success": False,
  139. "error": f"分散认证问题: {error_msg}",
  140. "need_login": True
  141. }
  142. return {
  143. "success": False,
  144. "error": error_msg,
  145. "need_login": True
  146. }
  147. # 获取用户数据
  148. user_data = appinfo_result.get('data', {}).get('user', {})
  149. if not user_data:
  150. return {
  151. "success": False,
  152. "error": "无法获取用户信息",
  153. "need_login": True
  154. }
  155. # 检查账号状态
  156. status = user_data.get('status', '')
  157. # 有效的账号状态:audit(审核中), pass(已通过), normal(正常), newbie(新手)
  158. valid_statuses = ['audit', 'pass', 'normal', 'newbie']
  159. if status not in valid_statuses:
  160. print(f"[{self.platform_name}] 账号状态异常: {status}")
  161. # 提取基本信息
  162. account_name = user_data.get('name') or user_data.get('uname') or '百家号账号'
  163. app_id = user_data.get('app_id') or user_data.get('id', 0)
  164. account_id = str(app_id) if app_id else f"baijiahao_{int(datetime.now().timestamp() * 1000)}"
  165. # 处理头像 URL
  166. avatar_url = user_data.get('avatar') or user_data.get('avatar_unify', '')
  167. if avatar_url and avatar_url.startswith('//'):
  168. avatar_url = 'https:' + avatar_url
  169. print(f"[{self.platform_name}] 账号名称: {account_name}, ID: {account_id}")
  170. # 步骤 2: 获取粉丝数(非关键,失败不影响整体)
  171. fans_count = 0
  172. try:
  173. print(f"[{self.platform_name}] [2/3] 调用 growth/get_info API 获取粉丝数...")
  174. async with session.get(
  175. 'https://baijiahao.baidu.com/cms-ui/rights/growth/get_info',
  176. headers=headers,
  177. timeout=aiohttp.ClientTimeout(total=10)
  178. ) as response:
  179. growth_result = await response.json()
  180. if growth_result.get('errno') == 0:
  181. growth_data = growth_result.get('data', {})
  182. fans_count = int(growth_data.get('fans_num', 0))
  183. print(f"[{self.platform_name}] 粉丝数: {fans_count}")
  184. else:
  185. print(f"[{self.platform_name}] 获取粉丝数失败: {growth_result.get('errmsg')}")
  186. except Exception as e:
  187. print(f"[{self.platform_name}] 获取粉丝数异常(非关键): {e}")
  188. # 步骤 3: 获取作品数量(使用与 Node 端一致的 API)
  189. works_count = 0
  190. try:
  191. print(f"[{self.platform_name}] [3/3] 调用 article/lists API 获取作品数...")
  192. # 使用与 Node 端一致的 API 参数
  193. list_url = 'https://baijiahao.baidu.com/pcui/article/lists?currentPage=1&pageSize=20&search=&type=&collection=&startDate=&endDate=&clearBeforeFetch=false&dynamic=0'
  194. async with session.get(
  195. list_url,
  196. headers={
  197. 'accept': '*/*',
  198. 'user-agent': 'PostmanRuntime/7.51.0',
  199. # cookie 由 session 管理
  200. 'referer': 'https://baijiahao.baidu.com/builder/rc/content',
  201. 'connection': 'keep-alive',
  202. 'accept-encoding': 'gzip, deflate, br',
  203. },
  204. timeout=aiohttp.ClientTimeout(total=30)
  205. ) as response:
  206. response_text = await response.text()
  207. print(f"[{self.platform_name}] ========== Works API Response ==========")
  208. print(f"[{self.platform_name}] Full response: {response_text[:1000]}...") # 只打印前1000字符
  209. print(f"[{self.platform_name}] =========================================")
  210. works_result = json.loads(response_text)
  211. # 处理分散认证问题 (errno=10001402),重试一次
  212. if works_result.get('errno') == 10001402:
  213. print(f"[{self.platform_name}] 分散认证问题 (errno=10001402),3秒后重试...")
  214. await asyncio.sleep(3)
  215. # 重试一次,使用更完整的请求头
  216. retry_headers = headers.copy()
  217. retry_headers.update({
  218. 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
  219. 'Cache-Control': 'max-age=0',
  220. 'Upgrade-Insecure-Requests': '1',
  221. })
  222. async with session.get(
  223. list_url,
  224. headers=retry_headers,
  225. timeout=aiohttp.ClientTimeout(total=30)
  226. ) as retry_response:
  227. retry_text = await retry_response.text()
  228. print(f"[{self.platform_name}] ========== Works API Retry Response ==========")
  229. print(f"[{self.platform_name}] Full retry response: {retry_text[:1000]}...")
  230. print(f"[{self.platform_name}] ===============================================")
  231. works_result = json.loads(retry_text)
  232. if works_result.get('errno') == 10001402:
  233. print(f"[{self.platform_name}] 重试仍然失败,返回已获取的账号信息")
  234. works_result = None
  235. if works_result and works_result.get('errno') == 0:
  236. works_data = works_result.get('data', {})
  237. # 优先使用 data.page.totalCount,如果没有则使用 data.total(兼容旧格式)
  238. page_info = works_data.get('page', {})
  239. works_count = int(page_info.get('totalCount', works_data.get('total', 0)))
  240. print(f"[{self.platform_name}] 作品数: {works_count} (from page.totalCount: {page_info.get('totalCount')}, from total: {works_data.get('total')})")
  241. else:
  242. errno = works_result.get('errno') if works_result else 'unknown'
  243. errmsg = works_result.get('errmsg', 'unknown error') if works_result else 'no response'
  244. print(f"[{self.platform_name}] 获取作品数失败: errno={errno}, errmsg={errmsg}")
  245. except Exception as e:
  246. import traceback
  247. print(f"[{self.platform_name}] 获取作品数异常(非关键): {e}")
  248. traceback.print_exc()
  249. # 返回账号信息
  250. account_info = {
  251. "success": True,
  252. "account_id": account_id,
  253. "account_name": account_name,
  254. "avatar_url": avatar_url,
  255. "fans_count": fans_count,
  256. "works_count": works_count,
  257. }
  258. print(f"[{self.platform_name}] ✓ 获取成功: {account_name} (粉丝: {fans_count}, 作品: {works_count})")
  259. return account_info
  260. except Exception as e:
  261. import traceback
  262. traceback.print_exc()
  263. return {
  264. "success": False,
  265. "error": str(e)
  266. }
  267. async def check_captcha(self) -> dict:
  268. """检查页面是否需要验证码"""
  269. if not self.page:
  270. return {'need_captcha': False, 'captcha_type': ''}
  271. try:
  272. # 检查各种验证码
  273. captcha_selectors = [
  274. 'text="请输入验证码"',
  275. 'text="滑动验证"',
  276. '[class*="captcha"]',
  277. '[class*="verify"]',
  278. ]
  279. for selector in captcha_selectors:
  280. try:
  281. if await self.page.locator(selector).count() > 0:
  282. print(f"[{self.platform_name}] 检测到验证码: {selector}")
  283. return {'need_captcha': True, 'captcha_type': 'image'}
  284. except:
  285. pass
  286. # 检查登录弹窗
  287. login_selectors = [
  288. 'text="请登录"',
  289. 'text="登录后继续"',
  290. '[class*="login-dialog"]',
  291. ]
  292. for selector in login_selectors:
  293. try:
  294. if await self.page.locator(selector).count() > 0:
  295. print(f"[{self.platform_name}] 检测到需要登录: {selector}")
  296. return {'need_captcha': True, 'captcha_type': 'login'}
  297. except:
  298. pass
  299. except Exception as e:
  300. print(f"[{self.platform_name}] 验证码检测异常: {e}")
  301. return {'need_captcha': False, 'captcha_type': ''}
  302. async def publish(self, cookies: str, params: PublishParams) -> PublishResult:
  303. """发布视频到百家号"""
  304. import os
  305. print(f"\n{'='*60}")
  306. print(f"[{self.platform_name}] 开始发布视频")
  307. print(f"[{self.platform_name}] 视频路径: {params.video_path}")
  308. print(f"[{self.platform_name}] 标题: {params.title}")
  309. print(f"[{self.platform_name}] Headless: {self.headless}")
  310. print(f"{'='*60}")
  311. self.report_progress(5, "正在初始化浏览器...")
  312. # 初始化浏览器
  313. await self.init_browser()
  314. print(f"[{self.platform_name}] 浏览器初始化完成")
  315. # 解析并设置 cookies
  316. cookie_list = self.parse_cookies(cookies)
  317. print(f"[{self.platform_name}] 解析到 {len(cookie_list)} 个 cookies")
  318. await self.set_cookies(cookie_list)
  319. if not self.page:
  320. raise Exception("Page not initialized")
  321. # 检查视频文件
  322. if not os.path.exists(params.video_path):
  323. raise Exception(f"视频文件不存在: {params.video_path}")
  324. print(f"[{self.platform_name}] 视频文件存在,大小: {os.path.getsize(params.video_path)} bytes")
  325. self.report_progress(10, "正在打开上传页面...")
  326. # 访问视频发布页面(使用新视频发布界面)
  327. video_publish_url = "https://baijiahao.baidu.com/builder/rc/edit?type=videoV2&is_from_cms=1"
  328. await self.page.goto(video_publish_url, wait_until="domcontentloaded", timeout=60000)
  329. await asyncio.sleep(3)
  330. # 检查是否跳转到登录页
  331. current_url = self.page.url
  332. print(f"[{self.platform_name}] 当前页面: {current_url}")
  333. for indicator in self.login_indicators:
  334. if indicator in current_url:
  335. screenshot_base64 = await self.capture_screenshot()
  336. return PublishResult(
  337. success=False,
  338. platform=self.platform_name,
  339. error="Cookie 已过期,需要重新登录",
  340. need_captcha=True,
  341. captcha_type='login',
  342. screenshot_base64=screenshot_base64,
  343. page_url=current_url,
  344. status='need_captcha'
  345. )
  346. # 使用 AI 检查验证码
  347. ai_captcha = await self.ai_check_captcha()
  348. if ai_captcha['has_captcha']:
  349. print(f"[{self.platform_name}] AI检测到验证码: {ai_captcha['captcha_type']}", flush=True)
  350. screenshot_base64 = await self.capture_screenshot()
  351. return PublishResult(
  352. success=False,
  353. platform=self.platform_name,
  354. error=f"检测到{ai_captcha['captcha_type']}验证码,需要使用有头浏览器完成验证",
  355. need_captcha=True,
  356. captcha_type=ai_captcha['captcha_type'],
  357. screenshot_base64=screenshot_base64,
  358. page_url=current_url,
  359. status='need_captcha'
  360. )
  361. # 传统方式检查验证码
  362. captcha_result = await self.check_captcha()
  363. if captcha_result['need_captcha']:
  364. screenshot_base64 = await self.capture_screenshot()
  365. return PublishResult(
  366. success=False,
  367. platform=self.platform_name,
  368. error=f"需要{captcha_result['captcha_type']}验证码,请使用有头浏览器完成验证",
  369. need_captcha=True,
  370. captcha_type=captcha_result['captcha_type'],
  371. screenshot_base64=screenshot_base64,
  372. page_url=current_url,
  373. status='need_captcha'
  374. )
  375. self.report_progress(15, "正在选择视频文件...")
  376. # 等待页面加载完成
  377. await asyncio.sleep(2)
  378. # 关闭可能的弹窗
  379. try:
  380. close_buttons = [
  381. 'button:has-text("我知道了")',
  382. 'button:has-text("知道了")',
  383. '[class*="close"]',
  384. '[class*="modal-close"]',
  385. ]
  386. for btn_selector in close_buttons:
  387. try:
  388. btn = self.page.locator(btn_selector).first
  389. if await btn.count() > 0 and await btn.is_visible():
  390. await btn.click()
  391. await asyncio.sleep(0.5)
  392. except:
  393. pass
  394. except:
  395. pass
  396. # 上传视频 - 尝试多种方式
  397. upload_success = False
  398. # 方法1: 直接通过 file input 上传
  399. try:
  400. file_inputs = await self.page.query_selector_all('input[type="file"]')
  401. print(f"[{self.platform_name}] 找到 {len(file_inputs)} 个文件输入")
  402. for file_input in file_inputs:
  403. try:
  404. await file_input.set_input_files(params.video_path)
  405. upload_success = True
  406. print(f"[{self.platform_name}] 通过 file input 上传成功")
  407. break
  408. except Exception as e:
  409. print(f"[{self.platform_name}] file input 上传失败: {e}")
  410. except Exception as e:
  411. print(f"[{self.platform_name}] 查找 file input 失败: {e}")
  412. # 方法2: 点击上传区域
  413. if not upload_success:
  414. upload_selectors = [
  415. 'div[class*="upload-box"]',
  416. 'div[class*="drag-upload"]',
  417. 'div[class*="uploader"]',
  418. 'div:has-text("点击上传")',
  419. 'div:has-text("选择文件")',
  420. '[class*="upload-area"]',
  421. ]
  422. for selector in upload_selectors:
  423. if upload_success:
  424. break
  425. try:
  426. upload_area = self.page.locator(selector).first
  427. if await upload_area.count() > 0:
  428. print(f"[{self.platform_name}] 尝试点击上传区域: {selector}")
  429. async with self.page.expect_file_chooser(timeout=10000) as fc_info:
  430. await upload_area.click()
  431. file_chooser = await fc_info.value
  432. await file_chooser.set_files(params.video_path)
  433. upload_success = True
  434. print(f"[{self.platform_name}] 通过点击上传区域成功")
  435. break
  436. except Exception as e:
  437. print(f"[{self.platform_name}] 选择器 {selector} 失败: {e}")
  438. if not upload_success:
  439. screenshot_base64 = await self.capture_screenshot()
  440. return PublishResult(
  441. success=False,
  442. platform=self.platform_name,
  443. error="未找到上传入口",
  444. screenshot_base64=screenshot_base64,
  445. page_url=await self.get_page_url(),
  446. status='failed'
  447. )
  448. self.report_progress(20, "等待视频上传...")
  449. # 等待视频上传完成(最多5分钟)
  450. upload_timeout = 300
  451. start_time = asyncio.get_event_loop().time()
  452. while asyncio.get_event_loop().time() - start_time < upload_timeout:
  453. # 检查上传进度
  454. progress_text = ''
  455. try:
  456. progress_el = self.page.locator('[class*="progress"], [class*="percent"]').first
  457. if await progress_el.count() > 0:
  458. progress_text = await progress_el.text_content()
  459. if progress_text:
  460. import re
  461. match = re.search(r'(\d+)%', progress_text)
  462. if match:
  463. pct = int(match.group(1))
  464. self.report_progress(20 + int(pct * 0.4), f"视频上传中 {pct}%...")
  465. if pct >= 100:
  466. print(f"[{self.platform_name}] 上传完成")
  467. break
  468. except:
  469. pass
  470. # 检查是否出现标题输入框(说明上传完成)
  471. try:
  472. title_input = self.page.locator('input[placeholder*="标题"], textarea[placeholder*="标题"], [class*="title-input"] input').first
  473. if await title_input.count() > 0 and await title_input.is_visible():
  474. print(f"[{self.platform_name}] 检测到标题输入框,上传完成")
  475. break
  476. except:
  477. pass
  478. # 检查是否有错误提示
  479. try:
  480. error_el = self.page.locator('[class*="error"], [class*="fail"]').first
  481. if await error_el.count() > 0:
  482. error_text = await error_el.text_content()
  483. if error_text and ('失败' in error_text or '错误' in error_text):
  484. raise Exception(f"上传失败: {error_text}")
  485. except:
  486. pass
  487. await asyncio.sleep(3)
  488. self.report_progress(60, "正在填写标题...")
  489. await asyncio.sleep(2)
  490. # 填写标题
  491. title_filled = False
  492. title_selectors = [
  493. 'input[placeholder*="标题"]',
  494. 'textarea[placeholder*="标题"]',
  495. '[class*="title-input"] input',
  496. '[class*="title"] input',
  497. 'input[maxlength]',
  498. ]
  499. for selector in title_selectors:
  500. if title_filled:
  501. break
  502. try:
  503. title_input = self.page.locator(selector).first
  504. if await title_input.count() > 0 and await title_input.is_visible():
  505. await title_input.click()
  506. await self.page.keyboard.press("Control+KeyA")
  507. await self.page.keyboard.type(params.title[:30]) # 百家号标题限制30字
  508. title_filled = True
  509. print(f"[{self.platform_name}] 标题填写成功")
  510. except Exception as e:
  511. print(f"[{self.platform_name}] 标题选择器 {selector} 失败: {e}")
  512. if not title_filled:
  513. print(f"[{self.platform_name}] 警告: 未能填写标题")
  514. # 填写描述
  515. if params.description:
  516. self.report_progress(65, "正在填写描述...")
  517. try:
  518. desc_selectors = [
  519. 'textarea[placeholder*="描述"]',
  520. 'textarea[placeholder*="简介"]',
  521. '[class*="desc"] textarea',
  522. '[class*="description"] textarea',
  523. ]
  524. for selector in desc_selectors:
  525. try:
  526. desc_input = self.page.locator(selector).first
  527. if await desc_input.count() > 0 and await desc_input.is_visible():
  528. await desc_input.click()
  529. await self.page.keyboard.type(params.description[:200])
  530. print(f"[{self.platform_name}] 描述填写成功")
  531. break
  532. except:
  533. pass
  534. except Exception as e:
  535. print(f"[{self.platform_name}] 描述填写失败: {e}")
  536. self.report_progress(70, "正在发布...")
  537. await asyncio.sleep(2)
  538. # 点击发布按钮
  539. publish_selectors = [
  540. 'button:has-text("发布")',
  541. 'button:has-text("发表")',
  542. 'button:has-text("提交")',
  543. '[class*="publish"] button',
  544. '[class*="submit"] button',
  545. ]
  546. publish_clicked = False
  547. for selector in publish_selectors:
  548. if publish_clicked:
  549. break
  550. try:
  551. btn = self.page.locator(selector).first
  552. if await btn.count() > 0 and await btn.is_visible():
  553. # 检查按钮是否可用
  554. is_disabled = await btn.get_attribute('disabled')
  555. if is_disabled:
  556. print(f"[{self.platform_name}] 按钮 {selector} 被禁用")
  557. continue
  558. await btn.click()
  559. publish_clicked = True
  560. print(f"[{self.platform_name}] 点击发布按钮成功")
  561. except Exception as e:
  562. print(f"[{self.platform_name}] 发布按钮 {selector} 失败: {e}")
  563. if not publish_clicked:
  564. screenshot_base64 = await self.capture_screenshot()
  565. return PublishResult(
  566. success=False,
  567. platform=self.platform_name,
  568. error="未找到发布按钮",
  569. screenshot_base64=screenshot_base64,
  570. page_url=await self.get_page_url(),
  571. status='failed'
  572. )
  573. self.report_progress(80, "等待发布完成...")
  574. # 记录点击发布前的 URL
  575. publish_page_url = self.page.url
  576. print(f"[{self.platform_name}] 发布前 URL: {publish_page_url}")
  577. # 等待发布完成(最多3分钟)
  578. publish_timeout = 180
  579. start_time = asyncio.get_event_loop().time()
  580. last_url = publish_page_url
  581. while asyncio.get_event_loop().time() - start_time < publish_timeout:
  582. await asyncio.sleep(3)
  583. current_url = self.page.url
  584. # 检测 URL 是否发生变化
  585. if current_url != last_url:
  586. print(f"[{self.platform_name}] URL 变化: {last_url} -> {current_url}")
  587. last_url = current_url
  588. # 检查是否跳转到内容管理页面(真正的成功标志)
  589. # 百家号发布成功后会跳转到 /builder/rc/content 页面
  590. if '/builder/rc/content' in current_url and 'edit' not in current_url:
  591. self.report_progress(100, "发布成功!")
  592. print(f"[{self.platform_name}] 发布成功,已跳转到内容管理页: {current_url}")
  593. screenshot_base64 = await self.capture_screenshot()
  594. return PublishResult(
  595. success=True,
  596. platform=self.platform_name,
  597. message="发布成功",
  598. screenshot_base64=screenshot_base64,
  599. page_url=current_url,
  600. status='success'
  601. )
  602. # 检查是否有明确的成功提示弹窗
  603. try:
  604. # 百家号发布成功会显示"发布成功"弹窗
  605. success_modal = self.page.locator('div:has-text("发布成功"), div:has-text("提交成功"), div:has-text("视频发布成功")').first
  606. if await success_modal.count() > 0 and await success_modal.is_visible():
  607. self.report_progress(100, "发布成功!")
  608. print(f"[{self.platform_name}] 检测到发布成功弹窗")
  609. screenshot_base64 = await self.capture_screenshot()
  610. # 等待一下看是否会跳转
  611. await asyncio.sleep(3)
  612. return PublishResult(
  613. success=True,
  614. platform=self.platform_name,
  615. message="发布成功",
  616. screenshot_base64=screenshot_base64,
  617. page_url=self.page.url,
  618. status='success'
  619. )
  620. except Exception as e:
  621. print(f"[{self.platform_name}] 检测成功提示异常: {e}")
  622. # 检查是否有错误提示
  623. try:
  624. error_selectors = [
  625. 'div.error-tip',
  626. 'div[class*="error-msg"]',
  627. 'span[class*="error"]',
  628. 'div:has-text("发布失败")',
  629. 'div:has-text("提交失败")',
  630. ]
  631. for error_selector in error_selectors:
  632. error_el = self.page.locator(error_selector).first
  633. if await error_el.count() > 0 and await error_el.is_visible():
  634. error_text = await error_el.text_content()
  635. if error_text and error_text.strip():
  636. print(f"[{self.platform_name}] 检测到错误: {error_text}")
  637. screenshot_base64 = await self.capture_screenshot()
  638. return PublishResult(
  639. success=False,
  640. platform=self.platform_name,
  641. error=f"发布失败: {error_text.strip()}",
  642. screenshot_base64=screenshot_base64,
  643. page_url=current_url,
  644. status='failed'
  645. )
  646. except Exception as e:
  647. print(f"[{self.platform_name}] 检测错误提示异常: {e}")
  648. # 检查验证码
  649. captcha_result = await self.check_captcha()
  650. if captcha_result['need_captcha']:
  651. screenshot_base64 = await self.capture_screenshot()
  652. return PublishResult(
  653. success=False,
  654. platform=self.platform_name,
  655. error=f"发布过程中需要{captcha_result['captcha_type']}验证码",
  656. need_captcha=True,
  657. captcha_type=captcha_result['captcha_type'],
  658. screenshot_base64=screenshot_base64,
  659. page_url=current_url,
  660. status='need_captcha'
  661. )
  662. # 检查发布按钮状态(如果还在编辑页面)
  663. if 'edit' in current_url:
  664. try:
  665. # 检查是否正在上传/处理中
  666. processing_indicators = [
  667. '[class*="loading"]',
  668. '[class*="uploading"]',
  669. '[class*="processing"]',
  670. 'div:has-text("正在上传")',
  671. 'div:has-text("正在处理")',
  672. ]
  673. is_processing = False
  674. for indicator in processing_indicators:
  675. if await self.page.locator(indicator).count() > 0:
  676. is_processing = True
  677. print(f"[{self.platform_name}] 正在处理中...")
  678. break
  679. if not is_processing:
  680. # 如果不是在处理中,可能需要重新点击发布按钮
  681. elapsed = asyncio.get_event_loop().time() - start_time
  682. if elapsed > 30: # 30秒后还在编辑页且不在处理中,可能发布没生效
  683. print(f"[{self.platform_name}] 发布似乎未生效,尝试重新点击发布按钮...")
  684. for selector in publish_selectors:
  685. try:
  686. btn = self.page.locator(selector).first
  687. if await btn.count() > 0 and await btn.is_visible():
  688. is_disabled = await btn.get_attribute('disabled')
  689. if not is_disabled:
  690. await btn.click()
  691. print(f"[{self.platform_name}] 重新点击发布按钮")
  692. break
  693. except:
  694. pass
  695. except Exception as e:
  696. print(f"[{self.platform_name}] 检查处理状态异常: {e}")
  697. # 超时,获取截图分析最终状态
  698. print(f"[{self.platform_name}] 发布超时,最终 URL: {self.page.url}")
  699. screenshot_base64 = await self.capture_screenshot()
  700. # 最后一次检查是否在内容管理页
  701. final_url = self.page.url
  702. if '/builder/rc/content' in final_url and 'edit' not in final_url:
  703. return PublishResult(
  704. success=True,
  705. platform=self.platform_name,
  706. message="发布成功(延迟确认)",
  707. screenshot_base64=screenshot_base64,
  708. page_url=final_url,
  709. status='success'
  710. )
  711. return PublishResult(
  712. success=False,
  713. platform=self.platform_name,
  714. error="发布超时,请手动检查发布状态",
  715. screenshot_base64=screenshot_base64,
  716. page_url=final_url,
  717. status='need_action'
  718. )
  719. async def get_works(self, cookies: str, page: int = 0, page_size: int = 20) -> WorksResult:
  720. """
  721. 获取百家号作品列表
  722. 使用直接 HTTP API 调用,不使用浏览器
  723. """
  724. import aiohttp
  725. print(f"\n{'='*60}")
  726. print(f"[{self.platform_name}] 获取作品列表 (使用 API)")
  727. print(f"[{self.platform_name}] page={page}, page_size={page_size}")
  728. print(f"{'='*60}")
  729. works: List[WorkItem] = []
  730. total = 0
  731. has_more = False
  732. try:
  733. # 解析 cookies
  734. cookie_list = self.parse_cookies(cookies)
  735. cookie_dict = {c['name']: c['value'] for c in cookie_list}
  736. headers = {
  737. 'Accept': 'application/json, text/plain, */*',
  738. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
  739. # Cookie 由 session 管理
  740. 'Referer': 'https://baijiahao.baidu.com/builder/rc/content',
  741. 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
  742. 'Accept-Encoding': 'gzip, deflate, br',
  743. 'Connection': 'keep-alive',
  744. 'Sec-Fetch-Dest': 'empty',
  745. 'Sec-Fetch-Mode': 'cors',
  746. 'Sec-Fetch-Site': 'same-origin',
  747. 'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
  748. 'sec-ch-ua-mobile': '?0',
  749. 'sec-ch-ua-platform': '"Windows"'
  750. }
  751. # 计算起始位置
  752. start = page * page_size
  753. async with aiohttp.ClientSession(cookies=cookie_dict) as session:
  754. print(f"[{self.platform_name}] 调用 article/lists API (start={start}, count={page_size})...")
  755. async with session.get(
  756. f'https://baijiahao.baidu.com/pcui/article/lists?start={start}&count={page_size}&article_type=video',
  757. headers=headers,
  758. timeout=aiohttp.ClientTimeout(total=30)
  759. ) as response:
  760. api_result = await response.json()
  761. print(f"[{self.platform_name}] article/lists API 完整响应: {json.dumps(api_result, ensure_ascii=False)[:500]}")
  762. print(f"[{self.platform_name}] API 响应: errno={api_result.get('errno')}")
  763. # 检查登录状态
  764. if api_result.get('errno') != 0:
  765. error_msg = api_result.get('errmsg', '未知错误')
  766. errno = api_result.get('errno')
  767. print(f"[{self.platform_name}] API 返回错误: errno={errno}, msg={error_msg}")
  768. if errno == 110:
  769. raise Exception("Cookie 已过期,请重新登录")
  770. raise Exception(error_msg)
  771. # 解析作品列表
  772. data = api_result.get('data', {})
  773. article_list = data.get('article_list', [])
  774. has_more = data.get('has_more', False)
  775. total = data.get('total', 0)
  776. print(f"[{self.platform_name}] 获取到 {len(article_list)} 个作品,总数: {total}")
  777. for article in article_list:
  778. work_id = str(article.get('article_id', ''))
  779. if not work_id:
  780. continue
  781. # 处理封面图
  782. cover_url = ''
  783. cover_images = article.get('cover_images', [])
  784. if cover_images and len(cover_images) > 0:
  785. cover_url = cover_images[0]
  786. if cover_url and cover_url.startswith('//'):
  787. cover_url = 'https:' + cover_url
  788. works.append(WorkItem(
  789. work_id=work_id,
  790. title=article.get('title', ''),
  791. cover_url=cover_url,
  792. duration=0,
  793. status='published',
  794. publish_time=article.get('publish_time', ''),
  795. play_count=int(article.get('read_count', 0)),
  796. like_count=int(article.get('like_count', 0)),
  797. comment_count=int(article.get('comment_count', 0)),
  798. share_count=int(article.get('share_count', 0)),
  799. ))
  800. print(f"[{self.platform_name}] ✓ 成功解析 {len(works)} 个作品")
  801. except Exception as e:
  802. import traceback
  803. traceback.print_exc()
  804. return WorksResult(
  805. success=False,
  806. platform=self.platform_name,
  807. error=str(e)
  808. )
  809. return WorksResult(
  810. success=True,
  811. platform=self.platform_name,
  812. works=works,
  813. total=total,
  814. has_more=has_more
  815. )
  816. async def check_login_status(self, cookies: str) -> dict:
  817. """
  818. 检查百家号 Cookie 登录状态
  819. 使用直接 HTTP API 调用,不使用浏览器
  820. """
  821. import aiohttp
  822. print(f"[{self.platform_name}] 检查登录状态 (使用 API)")
  823. try:
  824. # 解析 cookies
  825. cookie_list = self.parse_cookies(cookies)
  826. cookie_dict = {c['name']: c['value'] for c in cookie_list}
  827. # 重要:百家号需要先访问主页建立会话上下文
  828. session_headers = {
  829. 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
  830. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
  831. # Cookie 由 session 管理
  832. 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
  833. 'Accept-Encoding': 'gzip, deflate, br',
  834. 'Connection': 'keep-alive',
  835. 'Upgrade-Insecure-Requests': '1',
  836. 'Sec-Fetch-Dest': 'document',
  837. 'Sec-Fetch-Mode': 'navigate',
  838. 'Sec-Fetch-Site': 'none',
  839. 'Sec-Fetch-User': '?1',
  840. 'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
  841. 'sec-ch-ua-mobile': '?0',
  842. 'sec-ch-ua-platform': '"Windows"'
  843. }
  844. headers = {
  845. 'Accept': 'application/json, text/plain, */*',
  846. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
  847. # Cookie 由 session 管理
  848. 'Referer': 'https://baijiahao.baidu.com/builder/rc/home',
  849. 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
  850. 'Accept-Encoding': 'gzip, deflate, br',
  851. 'Connection': 'keep-alive',
  852. 'Sec-Fetch-Dest': 'empty',
  853. 'Sec-Fetch-Mode': 'cors',
  854. 'Sec-Fetch-Site': 'same-origin',
  855. 'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
  856. 'sec-ch-ua-mobile': '?0',
  857. 'sec-ch-ua-platform': '"Windows"'
  858. }
  859. async with aiohttp.ClientSession(cookies=cookie_dict) as session:
  860. # 步骤 0: 先访问主页建立会话上下文(关键步骤!)
  861. print(f"[{self.platform_name}] [0/2] 访问主页建立会话上下文...")
  862. async with session.get(
  863. 'https://baijiahao.baidu.com/builder/rc/home',
  864. headers=session_headers,
  865. timeout=aiohttp.ClientTimeout(total=30)
  866. ) as home_response:
  867. home_status = home_response.status
  868. print(f"[{self.platform_name}] 主页访问状态: {home_status}")
  869. # 短暂等待确保会话建立
  870. await asyncio.sleep(1)
  871. # 步骤 1: 调用 API 检查登录状态
  872. print(f"[{self.platform_name}] [1/2] 调用 appinfo API 检查登录状态...")
  873. async with session.get(
  874. 'https://baijiahao.baidu.com/builder/app/appinfo',
  875. headers=headers,
  876. timeout=aiohttp.ClientTimeout(total=30)
  877. ) as response:
  878. api_result = await response.json()
  879. errno = api_result.get('errno')
  880. print(f"[{self.platform_name}] API 完整响应: {json.dumps(api_result, ensure_ascii=False)[:500]}")
  881. print(f"[{self.platform_name}] API 响应: errno={errno}")
  882. # errno 为 0 表示请求成功
  883. if errno == 0:
  884. # 检查是否有用户数据
  885. user_data = api_result.get('data', {}).get('user', {})
  886. if user_data:
  887. # 检查账号状态
  888. status = user_data.get('status', '')
  889. account_name = user_data.get('name') or user_data.get('uname', '')
  890. # 有效的账号状态:audit(审核中), pass(已通过), normal(正常), newbie(新手)
  891. valid_statuses = ['audit', 'pass', 'normal', 'newbie']
  892. if status in valid_statuses and account_name:
  893. print(f"[{self.platform_name}] ✓ 登录状态有效: {account_name} (status={status})")
  894. return {
  895. "success": True,
  896. "valid": True,
  897. "need_login": False,
  898. "message": "登录状态有效"
  899. }
  900. else:
  901. print(f"[{self.platform_name}] 账号状态异常: status={status}, name={account_name}")
  902. return {
  903. "success": True,
  904. "valid": False,
  905. "need_login": True,
  906. "message": f"账号状态异常: {status}"
  907. }
  908. else:
  909. print(f"[{self.platform_name}] 无用户数据,Cookie 可能无效")
  910. return {
  911. "success": True,
  912. "valid": False,
  913. "need_login": True,
  914. "message": "无用户数据"
  915. }
  916. # errno 非 0 表示请求失败
  917. # 常见错误码:110 = 未登录
  918. error_msg = api_result.get('errmsg', '未知错误')
  919. print(f"[{self.platform_name}] Cookie 无效: errno={errno}, msg={error_msg}")
  920. return {
  921. "success": True,
  922. "valid": False,
  923. "need_login": True,
  924. "message": error_msg
  925. }
  926. except Exception as e:
  927. import traceback
  928. traceback.print_exc()
  929. return {
  930. "success": False,
  931. "valid": False,
  932. "need_login": True,
  933. "error": str(e)
  934. }
  935. async def get_comments(self, cookies: str, work_id: str, cursor: str = "") -> CommentsResult:
  936. """获取百家号作品评论"""
  937. # TODO: 实现评论获取逻辑
  938. return CommentsResult(
  939. success=False,
  940. platform=self.platform_name,
  941. work_id=work_id,
  942. error="百家号评论功能暂未实现"
  943. )