baijiahao.py 42 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943
  1. # -*- coding: utf-8 -*-
  2. """
  3. 百家号视频发布器
  4. """
  5. import asyncio
  6. import json
  7. from typing import List
  8. from datetime import datetime
  9. from .base import (
  10. BasePublisher, PublishParams, PublishResult,
  11. WorkItem, WorksResult, CommentItem, CommentsResult
  12. )
  13. class BaijiahaoPublisher(BasePublisher):
  14. """
  15. 百家号视频发布器
  16. 使用 Playwright 自动化操作百家号创作者中心
  17. """
  18. platform_name = "baijiahao"
  19. login_url = "https://baijiahao.baidu.com/"
  20. publish_url = "https://baijiahao.baidu.com/builder/rc/edit?type=video"
  21. cookie_domain = ".baidu.com"
  22. # 登录检测配置
  23. login_check_url = "https://baijiahao.baidu.com/builder/rc/home"
  24. login_indicators = ["passport.baidu.com", "/login", "wappass.baidu.com"]
  25. login_selectors = ['text="登录"', 'text="请登录"', '[class*="login-btn"]']
  26. async def get_account_info(self, cookies: str) -> dict:
  27. """
  28. 获取百家号账号信息
  29. 使用直接 HTTP API 调用,不使用浏览器
  30. """
  31. import aiohttp
  32. print(f"\n{'='*60}")
  33. print(f"[{self.platform_name}] 获取账号信息 (使用 API)")
  34. print(f"{'='*60}")
  35. try:
  36. # 解析 cookies
  37. cookie_list = self.parse_cookies(cookies)
  38. cookie_str = '; '.join([f"{c['name']}={c['value']}" for c in cookie_list])
  39. headers = {
  40. 'Accept': 'application/json, text/plain, */*',
  41. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
  42. 'Cookie': cookie_str,
  43. 'Referer': 'https://baijiahao.baidu.com/builder/rc/home'
  44. }
  45. async with aiohttp.ClientSession() as session:
  46. # 步骤 1: 获取账号基本信息
  47. print(f"[{self.platform_name}] [1/3] 调用 appinfo API...")
  48. async with session.get(
  49. 'https://baijiahao.baidu.com/builder/app/appinfo',
  50. headers=headers,
  51. timeout=aiohttp.ClientTimeout(total=30)
  52. ) as response:
  53. appinfo_result = await response.json()
  54. print(f"[{self.platform_name}] appinfo API 完整响应: {json.dumps(appinfo_result, ensure_ascii=False)[:500]}")
  55. print(f"[{self.platform_name}] appinfo API 响应: errno={appinfo_result.get('errno')}")
  56. # 检查登录状态
  57. if appinfo_result.get('errno') != 0:
  58. error_msg = appinfo_result.get('errmsg', '未知错误')
  59. errno = appinfo_result.get('errno')
  60. print(f"[{self.platform_name}] API 返回错误: errno={errno}, msg={error_msg}")
  61. # errno 110 表示未登录
  62. if errno == 110:
  63. return {
  64. "success": False,
  65. "error": "Cookie 已失效,需要重新登录",
  66. "need_login": True
  67. }
  68. return {
  69. "success": False,
  70. "error": error_msg,
  71. "need_login": True
  72. }
  73. # 获取用户数据
  74. user_data = appinfo_result.get('data', {}).get('user', {})
  75. if not user_data:
  76. return {
  77. "success": False,
  78. "error": "无法获取用户信息",
  79. "need_login": True
  80. }
  81. # 检查账号状态
  82. status = user_data.get('status', '')
  83. # 有效的账号状态:audit(审核中), pass(已通过), normal(正常), newbie(新手)
  84. valid_statuses = ['audit', 'pass', 'normal', 'newbie']
  85. if status not in valid_statuses:
  86. print(f"[{self.platform_name}] 账号状态异常: {status}")
  87. # 提取基本信息
  88. account_name = user_data.get('name') or user_data.get('uname') or '百家号账号'
  89. app_id = user_data.get('app_id') or user_data.get('id', 0)
  90. account_id = str(app_id) if app_id else f"baijiahao_{int(datetime.now().timestamp() * 1000)}"
  91. # 处理头像 URL
  92. avatar_url = user_data.get('avatar') or user_data.get('avatar_unify', '')
  93. if avatar_url and avatar_url.startswith('//'):
  94. avatar_url = 'https:' + avatar_url
  95. print(f"[{self.platform_name}] 账号名称: {account_name}, ID: {account_id}")
  96. # 步骤 2: 获取粉丝数(非关键,失败不影响整体)
  97. fans_count = 0
  98. try:
  99. print(f"[{self.platform_name}] [2/3] 调用 growth/get_info API 获取粉丝数...")
  100. async with session.get(
  101. 'https://baijiahao.baidu.com/cms-ui/rights/growth/get_info',
  102. headers=headers,
  103. timeout=aiohttp.ClientTimeout(total=10)
  104. ) as response:
  105. growth_result = await response.json()
  106. if growth_result.get('errno') == 0:
  107. growth_data = growth_result.get('data', {})
  108. fans_count = int(growth_data.get('fans_num', 0))
  109. print(f"[{self.platform_name}] 粉丝数: {fans_count}")
  110. else:
  111. print(f"[{self.platform_name}] 获取粉丝数失败: {growth_result.get('errmsg')}")
  112. except Exception as e:
  113. print(f"[{self.platform_name}] 获取粉丝数异常(非关键): {e}")
  114. # 步骤 3: 获取作品数量(使用与 Node 端一致的 API)
  115. works_count = 0
  116. try:
  117. print(f"[{self.platform_name}] [3/3] 调用 article/lists API 获取作品数...")
  118. # 使用与 Node 端一致的 API 参数
  119. list_url = 'https://baijiahao.baidu.com/pcui/article/lists?currentPage=1&pageSize=20&search=&type=&collection=&startDate=&endDate=&clearBeforeFetch=false&dynamic=0'
  120. async with session.get(
  121. list_url,
  122. headers={
  123. 'accept': '*/*',
  124. 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
  125. 'cookie': cookie_str,
  126. 'referer': 'https://baijiahao.baidu.com/builder/rc/content',
  127. 'connection': 'keep-alive',
  128. 'accept-encoding': 'gzip, deflate, br',
  129. },
  130. timeout=aiohttp.ClientTimeout(total=30)
  131. ) as response:
  132. response_text = await response.text()
  133. print(f"[{self.platform_name}] ========== Works API Response ==========")
  134. print(f"[{self.platform_name}] Full response: {response_text[:1000]}...") # 只打印前1000字符
  135. print(f"[{self.platform_name}] =========================================")
  136. works_result = json.loads(response_text)
  137. # 处理分散认证问题 (errno=10001402),重试一次
  138. if works_result.get('errno') == 10001402:
  139. print(f"[{self.platform_name}] 分散认证问题 (errno=10001402),3秒后重试...")
  140. await asyncio.sleep(3)
  141. # 重试一次
  142. async with session.get(
  143. list_url,
  144. headers=headers,
  145. timeout=aiohttp.ClientTimeout(total=30)
  146. ) as retry_response:
  147. retry_text = await retry_response.text()
  148. print(f"[{self.platform_name}] ========== Works API Retry Response ==========")
  149. print(f"[{self.platform_name}] Full retry response: {retry_text[:1000]}...")
  150. print(f"[{self.platform_name}] ===============================================")
  151. works_result = json.loads(retry_text)
  152. if works_result.get('errno') == 10001402:
  153. print(f"[{self.platform_name}] 重试仍然失败,返回已获取的账号信息")
  154. works_result = None
  155. if works_result and works_result.get('errno') == 0:
  156. works_data = works_result.get('data', {})
  157. # 优先使用 data.page.totalCount,如果没有则使用 data.total(兼容旧格式)
  158. page_info = works_data.get('page', {})
  159. works_count = int(page_info.get('totalCount', works_data.get('total', 0)))
  160. print(f"[{self.platform_name}] 作品数: {works_count} (from page.totalCount: {page_info.get('totalCount')}, from total: {works_data.get('total')})")
  161. else:
  162. errno = works_result.get('errno') if works_result else 'unknown'
  163. errmsg = works_result.get('errmsg', 'unknown error') if works_result else 'no response'
  164. print(f"[{self.platform_name}] 获取作品数失败: errno={errno}, errmsg={errmsg}")
  165. except Exception as e:
  166. import traceback
  167. print(f"[{self.platform_name}] 获取作品数异常(非关键): {e}")
  168. traceback.print_exc()
  169. # 返回账号信息
  170. account_info = {
  171. "success": True,
  172. "account_id": account_id,
  173. "account_name": account_name,
  174. "avatar_url": avatar_url,
  175. "fans_count": fans_count,
  176. "works_count": works_count,
  177. }
  178. print(f"[{self.platform_name}] ✓ 获取成功: {account_name} (粉丝: {fans_count}, 作品: {works_count})")
  179. return account_info
  180. except Exception as e:
  181. import traceback
  182. traceback.print_exc()
  183. return {
  184. "success": False,
  185. "error": str(e)
  186. }
  187. async def check_captcha(self) -> dict:
  188. """检查页面是否需要验证码"""
  189. if not self.page:
  190. return {'need_captcha': False, 'captcha_type': ''}
  191. try:
  192. # 检查各种验证码
  193. captcha_selectors = [
  194. 'text="请输入验证码"',
  195. 'text="滑动验证"',
  196. '[class*="captcha"]',
  197. '[class*="verify"]',
  198. ]
  199. for selector in captcha_selectors:
  200. try:
  201. if await self.page.locator(selector).count() > 0:
  202. print(f"[{self.platform_name}] 检测到验证码: {selector}")
  203. return {'need_captcha': True, 'captcha_type': 'image'}
  204. except:
  205. pass
  206. # 检查登录弹窗
  207. login_selectors = [
  208. 'text="请登录"',
  209. 'text="登录后继续"',
  210. '[class*="login-dialog"]',
  211. ]
  212. for selector in login_selectors:
  213. try:
  214. if await self.page.locator(selector).count() > 0:
  215. print(f"[{self.platform_name}] 检测到需要登录: {selector}")
  216. return {'need_captcha': True, 'captcha_type': 'login'}
  217. except:
  218. pass
  219. except Exception as e:
  220. print(f"[{self.platform_name}] 验证码检测异常: {e}")
  221. return {'need_captcha': False, 'captcha_type': ''}
  222. async def publish(self, cookies: str, params: PublishParams) -> PublishResult:
  223. """发布视频到百家号"""
  224. import os
  225. print(f"\n{'='*60}")
  226. print(f"[{self.platform_name}] 开始发布视频")
  227. print(f"[{self.platform_name}] 视频路径: {params.video_path}")
  228. print(f"[{self.platform_name}] 标题: {params.title}")
  229. print(f"[{self.platform_name}] Headless: {self.headless}")
  230. print(f"{'='*60}")
  231. self.report_progress(5, "正在初始化浏览器...")
  232. # 初始化浏览器
  233. await self.init_browser()
  234. print(f"[{self.platform_name}] 浏览器初始化完成")
  235. # 解析并设置 cookies
  236. cookie_list = self.parse_cookies(cookies)
  237. print(f"[{self.platform_name}] 解析到 {len(cookie_list)} 个 cookies")
  238. await self.set_cookies(cookie_list)
  239. if not self.page:
  240. raise Exception("Page not initialized")
  241. # 检查视频文件
  242. if not os.path.exists(params.video_path):
  243. raise Exception(f"视频文件不存在: {params.video_path}")
  244. print(f"[{self.platform_name}] 视频文件存在,大小: {os.path.getsize(params.video_path)} bytes")
  245. self.report_progress(10, "正在打开上传页面...")
  246. # 访问视频发布页面(使用新视频发布界面)
  247. video_publish_url = "https://baijiahao.baidu.com/builder/rc/edit?type=videoV2&is_from_cms=1"
  248. await self.page.goto(video_publish_url, wait_until="domcontentloaded", timeout=60000)
  249. await asyncio.sleep(3)
  250. # 检查是否跳转到登录页
  251. current_url = self.page.url
  252. print(f"[{self.platform_name}] 当前页面: {current_url}")
  253. for indicator in self.login_indicators:
  254. if indicator in current_url:
  255. screenshot_base64 = await self.capture_screenshot()
  256. return PublishResult(
  257. success=False,
  258. platform=self.platform_name,
  259. error="Cookie 已过期,需要重新登录",
  260. need_captcha=True,
  261. captcha_type='login',
  262. screenshot_base64=screenshot_base64,
  263. page_url=current_url,
  264. status='need_captcha'
  265. )
  266. # 使用 AI 检查验证码
  267. ai_captcha = await self.ai_check_captcha()
  268. if ai_captcha['has_captcha']:
  269. print(f"[{self.platform_name}] AI检测到验证码: {ai_captcha['captcha_type']}", flush=True)
  270. screenshot_base64 = await self.capture_screenshot()
  271. return PublishResult(
  272. success=False,
  273. platform=self.platform_name,
  274. error=f"检测到{ai_captcha['captcha_type']}验证码,需要使用有头浏览器完成验证",
  275. need_captcha=True,
  276. captcha_type=ai_captcha['captcha_type'],
  277. screenshot_base64=screenshot_base64,
  278. page_url=current_url,
  279. status='need_captcha'
  280. )
  281. # 传统方式检查验证码
  282. captcha_result = await self.check_captcha()
  283. if captcha_result['need_captcha']:
  284. screenshot_base64 = await self.capture_screenshot()
  285. return PublishResult(
  286. success=False,
  287. platform=self.platform_name,
  288. error=f"需要{captcha_result['captcha_type']}验证码,请使用有头浏览器完成验证",
  289. need_captcha=True,
  290. captcha_type=captcha_result['captcha_type'],
  291. screenshot_base64=screenshot_base64,
  292. page_url=current_url,
  293. status='need_captcha'
  294. )
  295. self.report_progress(15, "正在选择视频文件...")
  296. # 等待页面加载完成
  297. await asyncio.sleep(2)
  298. # 关闭可能的弹窗
  299. try:
  300. close_buttons = [
  301. 'button:has-text("我知道了")',
  302. 'button:has-text("知道了")',
  303. '[class*="close"]',
  304. '[class*="modal-close"]',
  305. ]
  306. for btn_selector in close_buttons:
  307. try:
  308. btn = self.page.locator(btn_selector).first
  309. if await btn.count() > 0 and await btn.is_visible():
  310. await btn.click()
  311. await asyncio.sleep(0.5)
  312. except:
  313. pass
  314. except:
  315. pass
  316. # 上传视频 - 尝试多种方式
  317. upload_success = False
  318. # 方法1: 直接通过 file input 上传
  319. try:
  320. file_inputs = await self.page.query_selector_all('input[type="file"]')
  321. print(f"[{self.platform_name}] 找到 {len(file_inputs)} 个文件输入")
  322. for file_input in file_inputs:
  323. try:
  324. await file_input.set_input_files(params.video_path)
  325. upload_success = True
  326. print(f"[{self.platform_name}] 通过 file input 上传成功")
  327. break
  328. except Exception as e:
  329. print(f"[{self.platform_name}] file input 上传失败: {e}")
  330. except Exception as e:
  331. print(f"[{self.platform_name}] 查找 file input 失败: {e}")
  332. # 方法2: 点击上传区域
  333. if not upload_success:
  334. upload_selectors = [
  335. 'div[class*="upload-box"]',
  336. 'div[class*="drag-upload"]',
  337. 'div[class*="uploader"]',
  338. 'div:has-text("点击上传")',
  339. 'div:has-text("选择文件")',
  340. '[class*="upload-area"]',
  341. ]
  342. for selector in upload_selectors:
  343. if upload_success:
  344. break
  345. try:
  346. upload_area = self.page.locator(selector).first
  347. if await upload_area.count() > 0:
  348. print(f"[{self.platform_name}] 尝试点击上传区域: {selector}")
  349. async with self.page.expect_file_chooser(timeout=10000) as fc_info:
  350. await upload_area.click()
  351. file_chooser = await fc_info.value
  352. await file_chooser.set_files(params.video_path)
  353. upload_success = True
  354. print(f"[{self.platform_name}] 通过点击上传区域成功")
  355. break
  356. except Exception as e:
  357. print(f"[{self.platform_name}] 选择器 {selector} 失败: {e}")
  358. if not upload_success:
  359. screenshot_base64 = await self.capture_screenshot()
  360. return PublishResult(
  361. success=False,
  362. platform=self.platform_name,
  363. error="未找到上传入口",
  364. screenshot_base64=screenshot_base64,
  365. page_url=await self.get_page_url(),
  366. status='failed'
  367. )
  368. self.report_progress(20, "等待视频上传...")
  369. # 等待视频上传完成(最多5分钟)
  370. upload_timeout = 300
  371. start_time = asyncio.get_event_loop().time()
  372. while asyncio.get_event_loop().time() - start_time < upload_timeout:
  373. # 检查上传进度
  374. progress_text = ''
  375. try:
  376. progress_el = self.page.locator('[class*="progress"], [class*="percent"]').first
  377. if await progress_el.count() > 0:
  378. progress_text = await progress_el.text_content()
  379. if progress_text:
  380. import re
  381. match = re.search(r'(\d+)%', progress_text)
  382. if match:
  383. pct = int(match.group(1))
  384. self.report_progress(20 + int(pct * 0.4), f"视频上传中 {pct}%...")
  385. if pct >= 100:
  386. print(f"[{self.platform_name}] 上传完成")
  387. break
  388. except:
  389. pass
  390. # 检查是否出现标题输入框(说明上传完成)
  391. try:
  392. title_input = self.page.locator('input[placeholder*="标题"], textarea[placeholder*="标题"], [class*="title-input"] input').first
  393. if await title_input.count() > 0 and await title_input.is_visible():
  394. print(f"[{self.platform_name}] 检测到标题输入框,上传完成")
  395. break
  396. except:
  397. pass
  398. # 检查是否有错误提示
  399. try:
  400. error_el = self.page.locator('[class*="error"], [class*="fail"]').first
  401. if await error_el.count() > 0:
  402. error_text = await error_el.text_content()
  403. if error_text and ('失败' in error_text or '错误' in error_text):
  404. raise Exception(f"上传失败: {error_text}")
  405. except:
  406. pass
  407. await asyncio.sleep(3)
  408. self.report_progress(60, "正在填写标题...")
  409. await asyncio.sleep(2)
  410. # 填写标题
  411. title_filled = False
  412. title_selectors = [
  413. 'input[placeholder*="标题"]',
  414. 'textarea[placeholder*="标题"]',
  415. '[class*="title-input"] input',
  416. '[class*="title"] input',
  417. 'input[maxlength]',
  418. ]
  419. for selector in title_selectors:
  420. if title_filled:
  421. break
  422. try:
  423. title_input = self.page.locator(selector).first
  424. if await title_input.count() > 0 and await title_input.is_visible():
  425. await title_input.click()
  426. await self.page.keyboard.press("Control+KeyA")
  427. await self.page.keyboard.type(params.title[:30]) # 百家号标题限制30字
  428. title_filled = True
  429. print(f"[{self.platform_name}] 标题填写成功")
  430. except Exception as e:
  431. print(f"[{self.platform_name}] 标题选择器 {selector} 失败: {e}")
  432. if not title_filled:
  433. print(f"[{self.platform_name}] 警告: 未能填写标题")
  434. # 填写描述
  435. if params.description:
  436. self.report_progress(65, "正在填写描述...")
  437. try:
  438. desc_selectors = [
  439. 'textarea[placeholder*="描述"]',
  440. 'textarea[placeholder*="简介"]',
  441. '[class*="desc"] textarea',
  442. '[class*="description"] textarea',
  443. ]
  444. for selector in desc_selectors:
  445. try:
  446. desc_input = self.page.locator(selector).first
  447. if await desc_input.count() > 0 and await desc_input.is_visible():
  448. await desc_input.click()
  449. await self.page.keyboard.type(params.description[:200])
  450. print(f"[{self.platform_name}] 描述填写成功")
  451. break
  452. except:
  453. pass
  454. except Exception as e:
  455. print(f"[{self.platform_name}] 描述填写失败: {e}")
  456. self.report_progress(70, "正在发布...")
  457. await asyncio.sleep(2)
  458. # 点击发布按钮
  459. publish_selectors = [
  460. 'button:has-text("发布")',
  461. 'button:has-text("发表")',
  462. 'button:has-text("提交")',
  463. '[class*="publish"] button',
  464. '[class*="submit"] button',
  465. ]
  466. publish_clicked = False
  467. for selector in publish_selectors:
  468. if publish_clicked:
  469. break
  470. try:
  471. btn = self.page.locator(selector).first
  472. if await btn.count() > 0 and await btn.is_visible():
  473. # 检查按钮是否可用
  474. is_disabled = await btn.get_attribute('disabled')
  475. if is_disabled:
  476. print(f"[{self.platform_name}] 按钮 {selector} 被禁用")
  477. continue
  478. await btn.click()
  479. publish_clicked = True
  480. print(f"[{self.platform_name}] 点击发布按钮成功")
  481. except Exception as e:
  482. print(f"[{self.platform_name}] 发布按钮 {selector} 失败: {e}")
  483. if not publish_clicked:
  484. screenshot_base64 = await self.capture_screenshot()
  485. return PublishResult(
  486. success=False,
  487. platform=self.platform_name,
  488. error="未找到发布按钮",
  489. screenshot_base64=screenshot_base64,
  490. page_url=await self.get_page_url(),
  491. status='failed'
  492. )
  493. self.report_progress(80, "等待发布完成...")
  494. # 记录点击发布前的 URL
  495. publish_page_url = self.page.url
  496. print(f"[{self.platform_name}] 发布前 URL: {publish_page_url}")
  497. # 等待发布完成(最多3分钟)
  498. publish_timeout = 180
  499. start_time = asyncio.get_event_loop().time()
  500. last_url = publish_page_url
  501. while asyncio.get_event_loop().time() - start_time < publish_timeout:
  502. await asyncio.sleep(3)
  503. current_url = self.page.url
  504. # 检测 URL 是否发生变化
  505. if current_url != last_url:
  506. print(f"[{self.platform_name}] URL 变化: {last_url} -> {current_url}")
  507. last_url = current_url
  508. # 检查是否跳转到内容管理页面(真正的成功标志)
  509. # 百家号发布成功后会跳转到 /builder/rc/content 页面
  510. if '/builder/rc/content' in current_url and 'edit' not in current_url:
  511. self.report_progress(100, "发布成功!")
  512. print(f"[{self.platform_name}] 发布成功,已跳转到内容管理页: {current_url}")
  513. screenshot_base64 = await self.capture_screenshot()
  514. return PublishResult(
  515. success=True,
  516. platform=self.platform_name,
  517. message="发布成功",
  518. screenshot_base64=screenshot_base64,
  519. page_url=current_url,
  520. status='success'
  521. )
  522. # 检查是否有明确的成功提示弹窗
  523. try:
  524. # 百家号发布成功会显示"发布成功"弹窗
  525. success_modal = self.page.locator('div:has-text("发布成功"), div:has-text("提交成功"), div:has-text("视频发布成功")').first
  526. if await success_modal.count() > 0 and await success_modal.is_visible():
  527. self.report_progress(100, "发布成功!")
  528. print(f"[{self.platform_name}] 检测到发布成功弹窗")
  529. screenshot_base64 = await self.capture_screenshot()
  530. # 等待一下看是否会跳转
  531. await asyncio.sleep(3)
  532. return PublishResult(
  533. success=True,
  534. platform=self.platform_name,
  535. message="发布成功",
  536. screenshot_base64=screenshot_base64,
  537. page_url=self.page.url,
  538. status='success'
  539. )
  540. except Exception as e:
  541. print(f"[{self.platform_name}] 检测成功提示异常: {e}")
  542. # 检查是否有错误提示
  543. try:
  544. error_selectors = [
  545. 'div.error-tip',
  546. 'div[class*="error-msg"]',
  547. 'span[class*="error"]',
  548. 'div:has-text("发布失败")',
  549. 'div:has-text("提交失败")',
  550. ]
  551. for error_selector in error_selectors:
  552. error_el = self.page.locator(error_selector).first
  553. if await error_el.count() > 0 and await error_el.is_visible():
  554. error_text = await error_el.text_content()
  555. if error_text and error_text.strip():
  556. print(f"[{self.platform_name}] 检测到错误: {error_text}")
  557. screenshot_base64 = await self.capture_screenshot()
  558. return PublishResult(
  559. success=False,
  560. platform=self.platform_name,
  561. error=f"发布失败: {error_text.strip()}",
  562. screenshot_base64=screenshot_base64,
  563. page_url=current_url,
  564. status='failed'
  565. )
  566. except Exception as e:
  567. print(f"[{self.platform_name}] 检测错误提示异常: {e}")
  568. # 检查验证码
  569. captcha_result = await self.check_captcha()
  570. if captcha_result['need_captcha']:
  571. screenshot_base64 = await self.capture_screenshot()
  572. return PublishResult(
  573. success=False,
  574. platform=self.platform_name,
  575. error=f"发布过程中需要{captcha_result['captcha_type']}验证码",
  576. need_captcha=True,
  577. captcha_type=captcha_result['captcha_type'],
  578. screenshot_base64=screenshot_base64,
  579. page_url=current_url,
  580. status='need_captcha'
  581. )
  582. # 检查发布按钮状态(如果还在编辑页面)
  583. if 'edit' in current_url:
  584. try:
  585. # 检查是否正在上传/处理中
  586. processing_indicators = [
  587. '[class*="loading"]',
  588. '[class*="uploading"]',
  589. '[class*="processing"]',
  590. 'div:has-text("正在上传")',
  591. 'div:has-text("正在处理")',
  592. ]
  593. is_processing = False
  594. for indicator in processing_indicators:
  595. if await self.page.locator(indicator).count() > 0:
  596. is_processing = True
  597. print(f"[{self.platform_name}] 正在处理中...")
  598. break
  599. if not is_processing:
  600. # 如果不是在处理中,可能需要重新点击发布按钮
  601. elapsed = asyncio.get_event_loop().time() - start_time
  602. if elapsed > 30: # 30秒后还在编辑页且不在处理中,可能发布没生效
  603. print(f"[{self.platform_name}] 发布似乎未生效,尝试重新点击发布按钮...")
  604. for selector in publish_selectors:
  605. try:
  606. btn = self.page.locator(selector).first
  607. if await btn.count() > 0 and await btn.is_visible():
  608. is_disabled = await btn.get_attribute('disabled')
  609. if not is_disabled:
  610. await btn.click()
  611. print(f"[{self.platform_name}] 重新点击发布按钮")
  612. break
  613. except:
  614. pass
  615. except Exception as e:
  616. print(f"[{self.platform_name}] 检查处理状态异常: {e}")
  617. # 超时,获取截图分析最终状态
  618. print(f"[{self.platform_name}] 发布超时,最终 URL: {self.page.url}")
  619. screenshot_base64 = await self.capture_screenshot()
  620. # 最后一次检查是否在内容管理页
  621. final_url = self.page.url
  622. if '/builder/rc/content' in final_url and 'edit' not in final_url:
  623. return PublishResult(
  624. success=True,
  625. platform=self.platform_name,
  626. message="发布成功(延迟确认)",
  627. screenshot_base64=screenshot_base64,
  628. page_url=final_url,
  629. status='success'
  630. )
  631. return PublishResult(
  632. success=False,
  633. platform=self.platform_name,
  634. error="发布超时,请手动检查发布状态",
  635. screenshot_base64=screenshot_base64,
  636. page_url=final_url,
  637. status='need_action'
  638. )
  639. async def get_works(self, cookies: str, page: int = 0, page_size: int = 20) -> WorksResult:
  640. """
  641. 获取百家号作品列表
  642. 使用直接 HTTP API 调用,不使用浏览器
  643. """
  644. import aiohttp
  645. print(f"\n{'='*60}")
  646. print(f"[{self.platform_name}] 获取作品列表 (使用 API)")
  647. print(f"[{self.platform_name}] page={page}, page_size={page_size}")
  648. print(f"{'='*60}")
  649. works: List[WorkItem] = []
  650. total = 0
  651. has_more = False
  652. try:
  653. # 解析 cookies
  654. cookie_list = self.parse_cookies(cookies)
  655. cookie_str = '; '.join([f"{c['name']}={c['value']}" for c in cookie_list])
  656. headers = {
  657. 'Accept': 'application/json, text/plain, */*',
  658. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
  659. 'Cookie': cookie_str,
  660. 'Referer': 'https://baijiahao.baidu.com/builder/rc/content'
  661. }
  662. # 计算起始位置
  663. start = page * page_size
  664. async with aiohttp.ClientSession() as session:
  665. print(f"[{self.platform_name}] 调用 article/lists API (start={start}, count={page_size})...")
  666. async with session.get(
  667. f'https://baijiahao.baidu.com/pcui/article/lists?start={start}&count={page_size}&article_type=video',
  668. headers=headers,
  669. timeout=aiohttp.ClientTimeout(total=30)
  670. ) as response:
  671. api_result = await response.json()
  672. print(f"[{self.platform_name}] article/lists API 完整响应: {json.dumps(api_result, ensure_ascii=False)[:500]}")
  673. print(f"[{self.platform_name}] API 响应: errno={api_result.get('errno')}")
  674. # 检查登录状态
  675. if api_result.get('errno') != 0:
  676. error_msg = api_result.get('errmsg', '未知错误')
  677. errno = api_result.get('errno')
  678. print(f"[{self.platform_name}] API 返回错误: errno={errno}, msg={error_msg}")
  679. if errno == 110:
  680. raise Exception("Cookie 已过期,请重新登录")
  681. raise Exception(error_msg)
  682. # 解析作品列表
  683. data = api_result.get('data', {})
  684. article_list = data.get('article_list', [])
  685. has_more = data.get('has_more', False)
  686. total = data.get('total', 0)
  687. print(f"[{self.platform_name}] 获取到 {len(article_list)} 个作品,总数: {total}")
  688. for article in article_list:
  689. work_id = str(article.get('article_id', ''))
  690. if not work_id:
  691. continue
  692. # 处理封面图
  693. cover_url = ''
  694. cover_images = article.get('cover_images', [])
  695. if cover_images and len(cover_images) > 0:
  696. cover_url = cover_images[0]
  697. if cover_url and cover_url.startswith('//'):
  698. cover_url = 'https:' + cover_url
  699. works.append(WorkItem(
  700. work_id=work_id,
  701. title=article.get('title', ''),
  702. cover_url=cover_url,
  703. duration=0,
  704. status='published',
  705. publish_time=article.get('publish_time', ''),
  706. play_count=int(article.get('read_count', 0)),
  707. like_count=int(article.get('like_count', 0)),
  708. comment_count=int(article.get('comment_count', 0)),
  709. share_count=int(article.get('share_count', 0)),
  710. ))
  711. print(f"[{self.platform_name}] ✓ 成功解析 {len(works)} 个作品")
  712. except Exception as e:
  713. import traceback
  714. traceback.print_exc()
  715. return WorksResult(
  716. success=False,
  717. platform=self.platform_name,
  718. error=str(e)
  719. )
  720. return WorksResult(
  721. success=True,
  722. platform=self.platform_name,
  723. works=works,
  724. total=total,
  725. has_more=has_more
  726. )
  727. async def check_login_status(self, cookies: str) -> dict:
  728. """
  729. 检查百家号 Cookie 登录状态
  730. 使用直接 HTTP API 调用,不使用浏览器
  731. """
  732. import aiohttp
  733. print(f"[{self.platform_name}] 检查登录状态 (使用 API)")
  734. try:
  735. # 解析 cookies
  736. cookie_list = self.parse_cookies(cookies)
  737. cookie_str = '; '.join([f"{c['name']}={c['value']}" for c in cookie_list])
  738. headers = {
  739. 'Accept': 'application/json, text/plain, */*',
  740. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
  741. 'Cookie': cookie_str,
  742. 'Referer': 'https://baijiahao.baidu.com/builder/rc/home'
  743. }
  744. async with aiohttp.ClientSession() as session:
  745. print(f"[{self.platform_name}] 调用 appinfo API 检查登录状态...")
  746. async with session.get(
  747. 'https://baijiahao.baidu.com/builder/app/appinfo',
  748. headers=headers,
  749. timeout=aiohttp.ClientTimeout(total=30)
  750. ) as response:
  751. api_result = await response.json()
  752. errno = api_result.get('errno')
  753. print(f"[{self.platform_name}] API 完整响应: {json.dumps(api_result, ensure_ascii=False)[:500]}")
  754. print(f"[{self.platform_name}] API 响应: errno={errno}")
  755. # errno 为 0 表示请求成功
  756. if errno == 0:
  757. # 检查是否有用户数据
  758. user_data = api_result.get('data', {}).get('user', {})
  759. if user_data:
  760. # 检查账号状态
  761. status = user_data.get('status', '')
  762. account_name = user_data.get('name') or user_data.get('uname', '')
  763. # 有效的账号状态:audit(审核中), pass(已通过), normal(正常), newbie(新手)
  764. valid_statuses = ['audit', 'pass', 'normal', 'newbie']
  765. if status in valid_statuses and account_name:
  766. print(f"[{self.platform_name}] ✓ 登录状态有效: {account_name} (status={status})")
  767. return {
  768. "success": True,
  769. "valid": True,
  770. "need_login": False,
  771. "message": "登录状态有效"
  772. }
  773. else:
  774. print(f"[{self.platform_name}] 账号状态异常: status={status}, name={account_name}")
  775. return {
  776. "success": True,
  777. "valid": False,
  778. "need_login": True,
  779. "message": f"账号状态异常: {status}"
  780. }
  781. else:
  782. print(f"[{self.platform_name}] 无用户数据,Cookie 可能无效")
  783. return {
  784. "success": True,
  785. "valid": False,
  786. "need_login": True,
  787. "message": "无用户数据"
  788. }
  789. # errno 非 0 表示请求失败
  790. # 常见错误码:110 = 未登录
  791. error_msg = api_result.get('errmsg', '未知错误')
  792. print(f"[{self.platform_name}] Cookie 无效: errno={errno}, msg={error_msg}")
  793. return {
  794. "success": True,
  795. "valid": False,
  796. "need_login": True,
  797. "message": error_msg
  798. }
  799. except Exception as e:
  800. import traceback
  801. traceback.print_exc()
  802. return {
  803. "success": False,
  804. "valid": False,
  805. "need_login": True,
  806. "error": str(e)
  807. }
  808. async def get_comments(self, cookies: str, work_id: str, cursor: str = "") -> CommentsResult:
  809. """获取百家号作品评论"""
  810. # TODO: 实现评论获取逻辑
  811. return CommentsResult(
  812. success=False,
  813. platform=self.platform_name,
  814. work_id=work_id,
  815. error="百家号评论功能暂未实现"
  816. )