baijiahao.py 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748
  1. # -*- coding: utf-8 -*-
  2. """
  3. 百家号视频发布器
  4. """
  5. import asyncio
  6. import json
  7. from typing import List
  8. from datetime import datetime
  9. from .base import (
  10. BasePublisher, PublishParams, PublishResult,
  11. WorkItem, WorksResult, CommentItem, CommentsResult
  12. )
  13. class BaijiahaoPublisher(BasePublisher):
  14. """
  15. 百家号视频发布器
  16. 使用 Playwright 自动化操作百家号创作者中心
  17. """
  18. platform_name = "baijiahao"
  19. login_url = "https://baijiahao.baidu.com/"
  20. publish_url = "https://baijiahao.baidu.com/builder/rc/edit?type=video"
  21. cookie_domain = ".baidu.com"
  22. # 登录检测配置
  23. login_check_url = "https://baijiahao.baidu.com/builder/rc/home"
  24. login_indicators = ["passport.baidu.com", "/login", "wappass.baidu.com"]
  25. login_selectors = ['text="登录"', 'text="请登录"', '[class*="login-btn"]']
  26. async def get_account_info(self, cookies: str) -> dict:
  27. """
  28. 获取百家号账号信息
  29. 通过调用 settingInfo API 获取用户信息
  30. """
  31. print(f"\n{'='*60}")
  32. print(f"[{self.platform_name}] 获取账号信息")
  33. print(f"{'='*60}")
  34. try:
  35. await self.init_browser()
  36. cookie_list = self.parse_cookies(cookies)
  37. await self.set_cookies(cookie_list)
  38. if not self.page:
  39. raise Exception("Page not initialized")
  40. # 访问百家号后台首页
  41. print(f"[{self.platform_name}] 访问后台首页...")
  42. await self.page.goto(self.login_check_url, wait_until="domcontentloaded", timeout=30000)
  43. await asyncio.sleep(3)
  44. # 检查登录状态
  45. current_url = self.page.url
  46. print(f"[{self.platform_name}] 当前 URL: {current_url}")
  47. for indicator in self.login_indicators:
  48. if indicator in current_url:
  49. print(f"[{self.platform_name}] 检测到登录页面,Cookie 已失效")
  50. return {
  51. "success": False,
  52. "error": "Cookie 已失效,需要重新登录",
  53. "need_login": True
  54. }
  55. # 调用 settingInfo API 获取用户信息
  56. print(f"[{self.platform_name}] 调用 settingInfo API...")
  57. api_result = await self.page.evaluate('''
  58. async () => {
  59. try {
  60. const response = await fetch('https://baijiahao.baidu.com/user-ui/cms/settingInfo', {
  61. method: 'GET',
  62. credentials: 'include',
  63. headers: {
  64. 'Accept': 'application/json, text/plain, */*'
  65. }
  66. });
  67. return await response.json();
  68. } catch (e) {
  69. return { error: e.message };
  70. }
  71. }
  72. ''')
  73. print(f"[{self.platform_name}] API 响应: errno={api_result.get('errno')}")
  74. if api_result.get('error'):
  75. return {
  76. "success": False,
  77. "error": api_result.get('error')
  78. }
  79. if api_result.get('errno') == 0 and api_result.get('data'):
  80. data = api_result['data']
  81. account_info = {
  82. "success": True,
  83. "account_id": str(data.get('new_uc_id', '')) or f"baijiahao_{int(datetime.now().timestamp() * 1000)}",
  84. "account_name": data.get('name', '') or '百家号账号',
  85. "avatar_url": data.get('avatar', ''),
  86. "fans_count": 0, # 百家号 API 不直接返回粉丝数
  87. "works_count": 0,
  88. }
  89. print(f"[{self.platform_name}] 获取成功: {account_info['account_name']}")
  90. return account_info
  91. else:
  92. error_msg = api_result.get('errmsg', '未知错误')
  93. print(f"[{self.platform_name}] API 返回错误: {error_msg}")
  94. # 如果是登录相关错误,标记需要重新登录
  95. if api_result.get('errno') in [10000010, 10001401]:
  96. return {
  97. "success": False,
  98. "error": error_msg,
  99. "need_login": True
  100. }
  101. return {
  102. "success": False,
  103. "error": error_msg
  104. }
  105. except Exception as e:
  106. import traceback
  107. traceback.print_exc()
  108. return {
  109. "success": False,
  110. "error": str(e)
  111. }
  112. finally:
  113. await self.close_browser()
  114. async def check_captcha(self) -> dict:
  115. """检查页面是否需要验证码"""
  116. if not self.page:
  117. return {'need_captcha': False, 'captcha_type': ''}
  118. try:
  119. # 检查各种验证码
  120. captcha_selectors = [
  121. 'text="请输入验证码"',
  122. 'text="滑动验证"',
  123. '[class*="captcha"]',
  124. '[class*="verify"]',
  125. ]
  126. for selector in captcha_selectors:
  127. try:
  128. if await self.page.locator(selector).count() > 0:
  129. print(f"[{self.platform_name}] 检测到验证码: {selector}")
  130. return {'need_captcha': True, 'captcha_type': 'image'}
  131. except:
  132. pass
  133. # 检查登录弹窗
  134. login_selectors = [
  135. 'text="请登录"',
  136. 'text="登录后继续"',
  137. '[class*="login-dialog"]',
  138. ]
  139. for selector in login_selectors:
  140. try:
  141. if await self.page.locator(selector).count() > 0:
  142. print(f"[{self.platform_name}] 检测到需要登录: {selector}")
  143. return {'need_captcha': True, 'captcha_type': 'login'}
  144. except:
  145. pass
  146. except Exception as e:
  147. print(f"[{self.platform_name}] 验证码检测异常: {e}")
  148. return {'need_captcha': False, 'captcha_type': ''}
  149. async def publish(self, cookies: str, params: PublishParams) -> PublishResult:
  150. """发布视频到百家号"""
  151. import os
  152. print(f"\n{'='*60}")
  153. print(f"[{self.platform_name}] 开始发布视频")
  154. print(f"[{self.platform_name}] 视频路径: {params.video_path}")
  155. print(f"[{self.platform_name}] 标题: {params.title}")
  156. print(f"[{self.platform_name}] Headless: {self.headless}")
  157. print(f"{'='*60}")
  158. self.report_progress(5, "正在初始化浏览器...")
  159. # 初始化浏览器
  160. await self.init_browser()
  161. print(f"[{self.platform_name}] 浏览器初始化完成")
  162. # 解析并设置 cookies
  163. cookie_list = self.parse_cookies(cookies)
  164. print(f"[{self.platform_name}] 解析到 {len(cookie_list)} 个 cookies")
  165. await self.set_cookies(cookie_list)
  166. if not self.page:
  167. raise Exception("Page not initialized")
  168. # 检查视频文件
  169. if not os.path.exists(params.video_path):
  170. raise Exception(f"视频文件不存在: {params.video_path}")
  171. print(f"[{self.platform_name}] 视频文件存在,大小: {os.path.getsize(params.video_path)} bytes")
  172. self.report_progress(10, "正在打开上传页面...")
  173. # 访问视频发布页面(使用新视频发布界面)
  174. video_publish_url = "https://baijiahao.baidu.com/builder/rc/edit?type=videoV2&is_from_cms=1"
  175. await self.page.goto(video_publish_url, wait_until="domcontentloaded", timeout=60000)
  176. await asyncio.sleep(3)
  177. # 检查是否跳转到登录页
  178. current_url = self.page.url
  179. print(f"[{self.platform_name}] 当前页面: {current_url}")
  180. for indicator in self.login_indicators:
  181. if indicator in current_url:
  182. screenshot_base64 = await self.capture_screenshot()
  183. return PublishResult(
  184. success=False,
  185. platform=self.platform_name,
  186. error="Cookie 已过期,需要重新登录",
  187. need_captcha=True,
  188. captcha_type='login',
  189. screenshot_base64=screenshot_base64,
  190. page_url=current_url,
  191. status='need_captcha'
  192. )
  193. # 使用 AI 检查验证码
  194. ai_captcha = await self.ai_check_captcha()
  195. if ai_captcha['has_captcha']:
  196. print(f"[{self.platform_name}] AI检测到验证码: {ai_captcha['captcha_type']}", flush=True)
  197. screenshot_base64 = await self.capture_screenshot()
  198. return PublishResult(
  199. success=False,
  200. platform=self.platform_name,
  201. error=f"检测到{ai_captcha['captcha_type']}验证码,需要使用有头浏览器完成验证",
  202. need_captcha=True,
  203. captcha_type=ai_captcha['captcha_type'],
  204. screenshot_base64=screenshot_base64,
  205. page_url=current_url,
  206. status='need_captcha'
  207. )
  208. # 传统方式检查验证码
  209. captcha_result = await self.check_captcha()
  210. if captcha_result['need_captcha']:
  211. screenshot_base64 = await self.capture_screenshot()
  212. return PublishResult(
  213. success=False,
  214. platform=self.platform_name,
  215. error=f"需要{captcha_result['captcha_type']}验证码,请使用有头浏览器完成验证",
  216. need_captcha=True,
  217. captcha_type=captcha_result['captcha_type'],
  218. screenshot_base64=screenshot_base64,
  219. page_url=current_url,
  220. status='need_captcha'
  221. )
  222. self.report_progress(15, "正在选择视频文件...")
  223. # 等待页面加载完成
  224. await asyncio.sleep(2)
  225. # 关闭可能的弹窗
  226. try:
  227. close_buttons = [
  228. 'button:has-text("我知道了")',
  229. 'button:has-text("知道了")',
  230. '[class*="close"]',
  231. '[class*="modal-close"]',
  232. ]
  233. for btn_selector in close_buttons:
  234. try:
  235. btn = self.page.locator(btn_selector).first
  236. if await btn.count() > 0 and await btn.is_visible():
  237. await btn.click()
  238. await asyncio.sleep(0.5)
  239. except:
  240. pass
  241. except:
  242. pass
  243. # 上传视频 - 尝试多种方式
  244. upload_success = False
  245. # 方法1: 直接通过 file input 上传
  246. try:
  247. file_inputs = await self.page.query_selector_all('input[type="file"]')
  248. print(f"[{self.platform_name}] 找到 {len(file_inputs)} 个文件输入")
  249. for file_input in file_inputs:
  250. try:
  251. await file_input.set_input_files(params.video_path)
  252. upload_success = True
  253. print(f"[{self.platform_name}] 通过 file input 上传成功")
  254. break
  255. except Exception as e:
  256. print(f"[{self.platform_name}] file input 上传失败: {e}")
  257. except Exception as e:
  258. print(f"[{self.platform_name}] 查找 file input 失败: {e}")
  259. # 方法2: 点击上传区域
  260. if not upload_success:
  261. upload_selectors = [
  262. 'div[class*="upload-box"]',
  263. 'div[class*="drag-upload"]',
  264. 'div[class*="uploader"]',
  265. 'div:has-text("点击上传")',
  266. 'div:has-text("选择文件")',
  267. '[class*="upload-area"]',
  268. ]
  269. for selector in upload_selectors:
  270. if upload_success:
  271. break
  272. try:
  273. upload_area = self.page.locator(selector).first
  274. if await upload_area.count() > 0:
  275. print(f"[{self.platform_name}] 尝试点击上传区域: {selector}")
  276. async with self.page.expect_file_chooser(timeout=10000) as fc_info:
  277. await upload_area.click()
  278. file_chooser = await fc_info.value
  279. await file_chooser.set_files(params.video_path)
  280. upload_success = True
  281. print(f"[{self.platform_name}] 通过点击上传区域成功")
  282. break
  283. except Exception as e:
  284. print(f"[{self.platform_name}] 选择器 {selector} 失败: {e}")
  285. if not upload_success:
  286. screenshot_base64 = await self.capture_screenshot()
  287. return PublishResult(
  288. success=False,
  289. platform=self.platform_name,
  290. error="未找到上传入口",
  291. screenshot_base64=screenshot_base64,
  292. page_url=await self.get_page_url(),
  293. status='failed'
  294. )
  295. self.report_progress(20, "等待视频上传...")
  296. # 等待视频上传完成(最多5分钟)
  297. upload_timeout = 300
  298. start_time = asyncio.get_event_loop().time()
  299. while asyncio.get_event_loop().time() - start_time < upload_timeout:
  300. # 检查上传进度
  301. progress_text = ''
  302. try:
  303. progress_el = self.page.locator('[class*="progress"], [class*="percent"]').first
  304. if await progress_el.count() > 0:
  305. progress_text = await progress_el.text_content()
  306. if progress_text:
  307. import re
  308. match = re.search(r'(\d+)%', progress_text)
  309. if match:
  310. pct = int(match.group(1))
  311. self.report_progress(20 + int(pct * 0.4), f"视频上传中 {pct}%...")
  312. if pct >= 100:
  313. print(f"[{self.platform_name}] 上传完成")
  314. break
  315. except:
  316. pass
  317. # 检查是否出现标题输入框(说明上传完成)
  318. try:
  319. title_input = self.page.locator('input[placeholder*="标题"], textarea[placeholder*="标题"], [class*="title-input"] input').first
  320. if await title_input.count() > 0 and await title_input.is_visible():
  321. print(f"[{self.platform_name}] 检测到标题输入框,上传完成")
  322. break
  323. except:
  324. pass
  325. # 检查是否有错误提示
  326. try:
  327. error_el = self.page.locator('[class*="error"], [class*="fail"]').first
  328. if await error_el.count() > 0:
  329. error_text = await error_el.text_content()
  330. if error_text and ('失败' in error_text or '错误' in error_text):
  331. raise Exception(f"上传失败: {error_text}")
  332. except:
  333. pass
  334. await asyncio.sleep(3)
  335. self.report_progress(60, "正在填写标题...")
  336. await asyncio.sleep(2)
  337. # 填写标题
  338. title_filled = False
  339. title_selectors = [
  340. 'input[placeholder*="标题"]',
  341. 'textarea[placeholder*="标题"]',
  342. '[class*="title-input"] input',
  343. '[class*="title"] input',
  344. 'input[maxlength]',
  345. ]
  346. for selector in title_selectors:
  347. if title_filled:
  348. break
  349. try:
  350. title_input = self.page.locator(selector).first
  351. if await title_input.count() > 0 and await title_input.is_visible():
  352. await title_input.click()
  353. await self.page.keyboard.press("Control+KeyA")
  354. await self.page.keyboard.type(params.title[:30]) # 百家号标题限制30字
  355. title_filled = True
  356. print(f"[{self.platform_name}] 标题填写成功")
  357. except Exception as e:
  358. print(f"[{self.platform_name}] 标题选择器 {selector} 失败: {e}")
  359. if not title_filled:
  360. print(f"[{self.platform_name}] 警告: 未能填写标题")
  361. # 填写描述
  362. if params.description:
  363. self.report_progress(65, "正在填写描述...")
  364. try:
  365. desc_selectors = [
  366. 'textarea[placeholder*="描述"]',
  367. 'textarea[placeholder*="简介"]',
  368. '[class*="desc"] textarea',
  369. '[class*="description"] textarea',
  370. ]
  371. for selector in desc_selectors:
  372. try:
  373. desc_input = self.page.locator(selector).first
  374. if await desc_input.count() > 0 and await desc_input.is_visible():
  375. await desc_input.click()
  376. await self.page.keyboard.type(params.description[:200])
  377. print(f"[{self.platform_name}] 描述填写成功")
  378. break
  379. except:
  380. pass
  381. except Exception as e:
  382. print(f"[{self.platform_name}] 描述填写失败: {e}")
  383. self.report_progress(70, "正在发布...")
  384. await asyncio.sleep(2)
  385. # 点击发布按钮
  386. publish_selectors = [
  387. 'button:has-text("发布")',
  388. 'button:has-text("发表")',
  389. 'button:has-text("提交")',
  390. '[class*="publish"] button',
  391. '[class*="submit"] button',
  392. ]
  393. publish_clicked = False
  394. for selector in publish_selectors:
  395. if publish_clicked:
  396. break
  397. try:
  398. btn = self.page.locator(selector).first
  399. if await btn.count() > 0 and await btn.is_visible():
  400. # 检查按钮是否可用
  401. is_disabled = await btn.get_attribute('disabled')
  402. if is_disabled:
  403. print(f"[{self.platform_name}] 按钮 {selector} 被禁用")
  404. continue
  405. await btn.click()
  406. publish_clicked = True
  407. print(f"[{self.platform_name}] 点击发布按钮成功")
  408. except Exception as e:
  409. print(f"[{self.platform_name}] 发布按钮 {selector} 失败: {e}")
  410. if not publish_clicked:
  411. screenshot_base64 = await self.capture_screenshot()
  412. return PublishResult(
  413. success=False,
  414. platform=self.platform_name,
  415. error="未找到发布按钮",
  416. screenshot_base64=screenshot_base64,
  417. page_url=await self.get_page_url(),
  418. status='failed'
  419. )
  420. self.report_progress(80, "等待发布完成...")
  421. # 记录点击发布前的 URL
  422. publish_page_url = self.page.url
  423. print(f"[{self.platform_name}] 发布前 URL: {publish_page_url}")
  424. # 等待发布完成(最多3分钟)
  425. publish_timeout = 180
  426. start_time = asyncio.get_event_loop().time()
  427. last_url = publish_page_url
  428. while asyncio.get_event_loop().time() - start_time < publish_timeout:
  429. await asyncio.sleep(3)
  430. current_url = self.page.url
  431. # 检测 URL 是否发生变化
  432. if current_url != last_url:
  433. print(f"[{self.platform_name}] URL 变化: {last_url} -> {current_url}")
  434. last_url = current_url
  435. # 检查是否跳转到内容管理页面(真正的成功标志)
  436. # 百家号发布成功后会跳转到 /builder/rc/content 页面
  437. if '/builder/rc/content' in current_url and 'edit' not in current_url:
  438. self.report_progress(100, "发布成功!")
  439. print(f"[{self.platform_name}] 发布成功,已跳转到内容管理页: {current_url}")
  440. screenshot_base64 = await self.capture_screenshot()
  441. return PublishResult(
  442. success=True,
  443. platform=self.platform_name,
  444. message="发布成功",
  445. screenshot_base64=screenshot_base64,
  446. page_url=current_url,
  447. status='success'
  448. )
  449. # 检查是否有明确的成功提示弹窗
  450. try:
  451. # 百家号发布成功会显示"发布成功"弹窗
  452. success_modal = self.page.locator('div:has-text("发布成功"), div:has-text("提交成功"), div:has-text("视频发布成功")').first
  453. if await success_modal.count() > 0 and await success_modal.is_visible():
  454. self.report_progress(100, "发布成功!")
  455. print(f"[{self.platform_name}] 检测到发布成功弹窗")
  456. screenshot_base64 = await self.capture_screenshot()
  457. # 等待一下看是否会跳转
  458. await asyncio.sleep(3)
  459. return PublishResult(
  460. success=True,
  461. platform=self.platform_name,
  462. message="发布成功",
  463. screenshot_base64=screenshot_base64,
  464. page_url=self.page.url,
  465. status='success'
  466. )
  467. except Exception as e:
  468. print(f"[{self.platform_name}] 检测成功提示异常: {e}")
  469. # 检查是否有错误提示
  470. try:
  471. error_selectors = [
  472. 'div.error-tip',
  473. 'div[class*="error-msg"]',
  474. 'span[class*="error"]',
  475. 'div:has-text("发布失败")',
  476. 'div:has-text("提交失败")',
  477. ]
  478. for error_selector in error_selectors:
  479. error_el = self.page.locator(error_selector).first
  480. if await error_el.count() > 0 and await error_el.is_visible():
  481. error_text = await error_el.text_content()
  482. if error_text and error_text.strip():
  483. print(f"[{self.platform_name}] 检测到错误: {error_text}")
  484. screenshot_base64 = await self.capture_screenshot()
  485. return PublishResult(
  486. success=False,
  487. platform=self.platform_name,
  488. error=f"发布失败: {error_text.strip()}",
  489. screenshot_base64=screenshot_base64,
  490. page_url=current_url,
  491. status='failed'
  492. )
  493. except Exception as e:
  494. print(f"[{self.platform_name}] 检测错误提示异常: {e}")
  495. # 检查验证码
  496. captcha_result = await self.check_captcha()
  497. if captcha_result['need_captcha']:
  498. screenshot_base64 = await self.capture_screenshot()
  499. return PublishResult(
  500. success=False,
  501. platform=self.platform_name,
  502. error=f"发布过程中需要{captcha_result['captcha_type']}验证码",
  503. need_captcha=True,
  504. captcha_type=captcha_result['captcha_type'],
  505. screenshot_base64=screenshot_base64,
  506. page_url=current_url,
  507. status='need_captcha'
  508. )
  509. # 检查发布按钮状态(如果还在编辑页面)
  510. if 'edit' in current_url:
  511. try:
  512. # 检查是否正在上传/处理中
  513. processing_indicators = [
  514. '[class*="loading"]',
  515. '[class*="uploading"]',
  516. '[class*="processing"]',
  517. 'div:has-text("正在上传")',
  518. 'div:has-text("正在处理")',
  519. ]
  520. is_processing = False
  521. for indicator in processing_indicators:
  522. if await self.page.locator(indicator).count() > 0:
  523. is_processing = True
  524. print(f"[{self.platform_name}] 正在处理中...")
  525. break
  526. if not is_processing:
  527. # 如果不是在处理中,可能需要重新点击发布按钮
  528. elapsed = asyncio.get_event_loop().time() - start_time
  529. if elapsed > 30: # 30秒后还在编辑页且不在处理中,可能发布没生效
  530. print(f"[{self.platform_name}] 发布似乎未生效,尝试重新点击发布按钮...")
  531. for selector in publish_selectors:
  532. try:
  533. btn = self.page.locator(selector).first
  534. if await btn.count() > 0 and await btn.is_visible():
  535. is_disabled = await btn.get_attribute('disabled')
  536. if not is_disabled:
  537. await btn.click()
  538. print(f"[{self.platform_name}] 重新点击发布按钮")
  539. break
  540. except:
  541. pass
  542. except Exception as e:
  543. print(f"[{self.platform_name}] 检查处理状态异常: {e}")
  544. # 超时,获取截图分析最终状态
  545. print(f"[{self.platform_name}] 发布超时,最终 URL: {self.page.url}")
  546. screenshot_base64 = await self.capture_screenshot()
  547. # 最后一次检查是否在内容管理页
  548. final_url = self.page.url
  549. if '/builder/rc/content' in final_url and 'edit' not in final_url:
  550. return PublishResult(
  551. success=True,
  552. platform=self.platform_name,
  553. message="发布成功(延迟确认)",
  554. screenshot_base64=screenshot_base64,
  555. page_url=final_url,
  556. status='success'
  557. )
  558. return PublishResult(
  559. success=False,
  560. platform=self.platform_name,
  561. error="发布超时,请手动检查发布状态",
  562. screenshot_base64=screenshot_base64,
  563. page_url=final_url,
  564. status='need_action'
  565. )
  566. async def get_works(self, cookies: str, page: int = 0, page_size: int = 20) -> WorksResult:
  567. """获取百家号作品列表"""
  568. print(f"\n{'='*60}")
  569. print(f"[{self.platform_name}] 获取作品列表")
  570. print(f"[{self.platform_name}] page={page}, page_size={page_size}")
  571. print(f"{'='*60}")
  572. works: List[WorkItem] = []
  573. total = 0
  574. has_more = False
  575. try:
  576. await self.init_browser()
  577. cookie_list = self.parse_cookies(cookies)
  578. await self.set_cookies(cookie_list)
  579. if not self.page:
  580. raise Exception("Page not initialized")
  581. # 访问内容管理页面
  582. await self.page.goto("https://baijiahao.baidu.com/builder/rc/content", wait_until="domcontentloaded", timeout=30000)
  583. await asyncio.sleep(3)
  584. # 检查登录状态
  585. current_url = self.page.url
  586. for indicator in self.login_indicators:
  587. if indicator in current_url:
  588. raise Exception("Cookie 已过期,请重新登录")
  589. # 调用作品列表 API
  590. cursor = page * page_size
  591. api_result = await self.page.evaluate(f'''
  592. async () => {{
  593. try {{
  594. const response = await fetch('https://baijiahao.baidu.com/pcui/article/lists?start={cursor}&count={page_size}&article_type=video', {{
  595. method: 'GET',
  596. credentials: 'include',
  597. headers: {{
  598. 'Accept': 'application/json'
  599. }}
  600. }});
  601. return await response.json();
  602. }} catch (e) {{
  603. return {{ error: e.message }};
  604. }}
  605. }}
  606. ''')
  607. print(f"[{self.platform_name}] API 响应: {json.dumps(api_result, ensure_ascii=False)[:200]}")
  608. if api_result.get('errno') == 0:
  609. article_list = api_result.get('data', {}).get('article_list', [])
  610. has_more = api_result.get('data', {}).get('has_more', False)
  611. for article in article_list:
  612. work_id = str(article.get('article_id', ''))
  613. if not work_id:
  614. continue
  615. works.append(WorkItem(
  616. work_id=work_id,
  617. title=article.get('title', ''),
  618. cover_url=article.get('cover_images', [''])[0] if article.get('cover_images') else '',
  619. duration=0,
  620. status='published',
  621. publish_time=article.get('publish_time', ''),
  622. play_count=int(article.get('read_count', 0)),
  623. like_count=int(article.get('like_count', 0)),
  624. comment_count=int(article.get('comment_count', 0)),
  625. share_count=int(article.get('share_count', 0)),
  626. ))
  627. total = len(works)
  628. print(f"[{self.platform_name}] 获取到 {total} 个作品")
  629. except Exception as e:
  630. import traceback
  631. traceback.print_exc()
  632. return WorksResult(
  633. success=False,
  634. platform=self.platform_name,
  635. error=str(e)
  636. )
  637. finally:
  638. await self.close_browser()
  639. return WorksResult(
  640. success=True,
  641. platform=self.platform_name,
  642. works=works,
  643. total=total,
  644. has_more=has_more
  645. )
  646. async def get_comments(self, cookies: str, work_id: str, cursor: str = "") -> CommentsResult:
  647. """获取百家号作品评论"""
  648. # TODO: 实现评论获取逻辑
  649. return CommentsResult(
  650. success=False,
  651. platform=self.platform_name,
  652. work_id=work_id,
  653. error="百家号评论功能暂未实现"
  654. )