|
|
@@ -32,93 +32,139 @@ class BaijiahaoPublisher(BasePublisher):
|
|
|
async def get_account_info(self, cookies: str) -> dict:
|
|
|
"""
|
|
|
获取百家号账号信息
|
|
|
- 通过调用 settingInfo API 获取用户信息
|
|
|
+ 使用直接 HTTP API 调用,不使用浏览器
|
|
|
"""
|
|
|
+ import aiohttp
|
|
|
+
|
|
|
print(f"\n{'='*60}")
|
|
|
- print(f"[{self.platform_name}] 获取账号信息")
|
|
|
+ print(f"[{self.platform_name}] 获取账号信息 (使用 API)")
|
|
|
print(f"{'='*60}")
|
|
|
|
|
|
try:
|
|
|
- await self.init_browser()
|
|
|
+ # 解析 cookies
|
|
|
cookie_list = self.parse_cookies(cookies)
|
|
|
- await self.set_cookies(cookie_list)
|
|
|
-
|
|
|
- if not self.page:
|
|
|
- raise Exception("Page not initialized")
|
|
|
-
|
|
|
- # 访问百家号后台首页
|
|
|
- print(f"[{self.platform_name}] 访问后台首页...")
|
|
|
- await self.page.goto(self.login_check_url, wait_until="domcontentloaded", timeout=30000)
|
|
|
- await asyncio.sleep(3)
|
|
|
+ cookie_str = '; '.join([f"{c['name']}={c['value']}" for c in cookie_list])
|
|
|
|
|
|
- # 检查登录状态
|
|
|
- current_url = self.page.url
|
|
|
- print(f"[{self.platform_name}] 当前 URL: {current_url}")
|
|
|
+ headers = {
|
|
|
+ 'Accept': 'application/json, text/plain, */*',
|
|
|
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
|
+ 'Cookie': cookie_str,
|
|
|
+ 'Referer': 'https://baijiahao.baidu.com/builder/rc/home'
|
|
|
+ }
|
|
|
|
|
|
- for indicator in self.login_indicators:
|
|
|
- if indicator in current_url:
|
|
|
- print(f"[{self.platform_name}] 检测到登录页面,Cookie 已失效")
|
|
|
+ async with aiohttp.ClientSession() as session:
|
|
|
+ # 步骤 1: 获取账号基本信息
|
|
|
+ print(f"[{self.platform_name}] [1/3] 调用 appinfo API...")
|
|
|
+ async with session.get(
|
|
|
+ 'https://baijiahao.baidu.com/builder/app/appinfo',
|
|
|
+ headers=headers,
|
|
|
+ timeout=aiohttp.ClientTimeout(total=30)
|
|
|
+ ) as response:
|
|
|
+ appinfo_result = await response.json()
|
|
|
+
|
|
|
+ print(f"[{self.platform_name}] appinfo API 完整响应: {json.dumps(appinfo_result, ensure_ascii=False)[:500]}")
|
|
|
+ print(f"[{self.platform_name}] appinfo API 响应: errno={appinfo_result.get('errno')}")
|
|
|
+
|
|
|
+ # 检查登录状态
|
|
|
+ if appinfo_result.get('errno') != 0:
|
|
|
+ error_msg = appinfo_result.get('errmsg', '未知错误')
|
|
|
+ errno = appinfo_result.get('errno')
|
|
|
+ print(f"[{self.platform_name}] API 返回错误: errno={errno}, msg={error_msg}")
|
|
|
+
|
|
|
+ # errno 110 表示未登录
|
|
|
+ if errno == 110:
|
|
|
+ return {
|
|
|
+ "success": False,
|
|
|
+ "error": "Cookie 已失效,需要重新登录",
|
|
|
+ "need_login": True
|
|
|
+ }
|
|
|
+
|
|
|
return {
|
|
|
"success": False,
|
|
|
- "error": "Cookie 已失效,需要重新登录",
|
|
|
+ "error": error_msg,
|
|
|
"need_login": True
|
|
|
}
|
|
|
-
|
|
|
- # 调用 settingInfo API 获取用户信息
|
|
|
- print(f"[{self.platform_name}] 调用 settingInfo API...")
|
|
|
- api_result = await self.page.evaluate('''
|
|
|
- async () => {
|
|
|
- try {
|
|
|
- const response = await fetch('https://baijiahao.baidu.com/user-ui/cms/settingInfo', {
|
|
|
- method: 'GET',
|
|
|
- credentials: 'include',
|
|
|
- headers: {
|
|
|
- 'Accept': 'application/json, text/plain, */*'
|
|
|
- }
|
|
|
- });
|
|
|
- return await response.json();
|
|
|
- } catch (e) {
|
|
|
- return { error: e.message };
|
|
|
- }
|
|
|
- }
|
|
|
- ''')
|
|
|
-
|
|
|
- print(f"[{self.platform_name}] API 响应: errno={api_result.get('errno')}")
|
|
|
-
|
|
|
- if api_result.get('error'):
|
|
|
- return {
|
|
|
- "success": False,
|
|
|
- "error": api_result.get('error')
|
|
|
- }
|
|
|
-
|
|
|
- if api_result.get('errno') == 0 and api_result.get('data'):
|
|
|
- data = api_result['data']
|
|
|
- account_info = {
|
|
|
- "success": True,
|
|
|
- "account_id": str(data.get('new_uc_id', '')) or f"baijiahao_{int(datetime.now().timestamp() * 1000)}",
|
|
|
- "account_name": data.get('name', '') or '百家号账号',
|
|
|
- "avatar_url": data.get('avatar', ''),
|
|
|
- "fans_count": 0, # 百家号 API 不直接返回粉丝数
|
|
|
- "works_count": 0,
|
|
|
- }
|
|
|
- print(f"[{self.platform_name}] 获取成功: {account_info['account_name']}")
|
|
|
- return account_info
|
|
|
- else:
|
|
|
- error_msg = api_result.get('errmsg', '未知错误')
|
|
|
- print(f"[{self.platform_name}] API 返回错误: {error_msg}")
|
|
|
|
|
|
- # 如果是登录相关错误,标记需要重新登录
|
|
|
- if api_result.get('errno') in [10000010, 10001401]:
|
|
|
+ # 获取用户数据
|
|
|
+ user_data = appinfo_result.get('data', {}).get('user', {})
|
|
|
+ if not user_data:
|
|
|
return {
|
|
|
"success": False,
|
|
|
- "error": error_msg,
|
|
|
+ "error": "无法获取用户信息",
|
|
|
"need_login": True
|
|
|
}
|
|
|
|
|
|
- return {
|
|
|
- "success": False,
|
|
|
- "error": error_msg
|
|
|
+ # 检查账号状态
|
|
|
+ status = user_data.get('status', '')
|
|
|
+ # 有效的账号状态:audit(审核中), pass(已通过), normal(正常), newbie(新手)
|
|
|
+ valid_statuses = ['audit', 'pass', 'normal', 'newbie']
|
|
|
+ if status not in valid_statuses:
|
|
|
+ print(f"[{self.platform_name}] 账号状态异常: {status}")
|
|
|
+
|
|
|
+ # 提取基本信息
|
|
|
+ account_name = user_data.get('name') or user_data.get('uname') or '百家号账号'
|
|
|
+ app_id = user_data.get('app_id') or user_data.get('id', 0)
|
|
|
+ account_id = str(app_id) if app_id else f"baijiahao_{int(datetime.now().timestamp() * 1000)}"
|
|
|
+
|
|
|
+ # 处理头像 URL
|
|
|
+ avatar_url = user_data.get('avatar') or user_data.get('avatar_unify', '')
|
|
|
+ if avatar_url and avatar_url.startswith('//'):
|
|
|
+ avatar_url = 'https:' + avatar_url
|
|
|
+
|
|
|
+ print(f"[{self.platform_name}] 账号名称: {account_name}, ID: {account_id}")
|
|
|
+
|
|
|
+ # 步骤 2: 获取粉丝数(非关键,失败不影响整体)
|
|
|
+ fans_count = 0
|
|
|
+ try:
|
|
|
+ print(f"[{self.platform_name}] [2/3] 调用 growth/get_info API 获取粉丝数...")
|
|
|
+ async with session.get(
|
|
|
+ 'https://baijiahao.baidu.com/cms-ui/rights/growth/get_info',
|
|
|
+ headers=headers,
|
|
|
+ timeout=aiohttp.ClientTimeout(total=10)
|
|
|
+ ) as response:
|
|
|
+ growth_result = await response.json()
|
|
|
+
|
|
|
+ if growth_result.get('errno') == 0:
|
|
|
+ growth_data = growth_result.get('data', {})
|
|
|
+ fans_count = int(growth_data.get('fans_num', 0))
|
|
|
+ print(f"[{self.platform_name}] 粉丝数: {fans_count}")
|
|
|
+ else:
|
|
|
+ print(f"[{self.platform_name}] 获取粉丝数失败: {growth_result.get('errmsg')}")
|
|
|
+ except Exception as e:
|
|
|
+ print(f"[{self.platform_name}] 获取粉丝数异常(非关键): {e}")
|
|
|
+
|
|
|
+ # 步骤 3: 获取作品数量(从作品列表第一页)
|
|
|
+ works_count = 0
|
|
|
+ try:
|
|
|
+ print(f"[{self.platform_name}] [3/3] 调用 article/lists API 获取作品数...")
|
|
|
+ async with session.get(
|
|
|
+ 'https://baijiahao.baidu.com/pcui/article/lists?start=0&count=1&article_type=video',
|
|
|
+ headers=headers,
|
|
|
+ timeout=aiohttp.ClientTimeout(total=10)
|
|
|
+ ) as response:
|
|
|
+ works_result = await response.json()
|
|
|
+
|
|
|
+ if works_result.get('errno') == 0:
|
|
|
+ works_data = works_result.get('data', {})
|
|
|
+ works_count = int(works_data.get('total', 0))
|
|
|
+ print(f"[{self.platform_name}] 作品数: {works_count}")
|
|
|
+ else:
|
|
|
+ print(f"[{self.platform_name}] 获取作品数失败: {works_result.get('errmsg')}")
|
|
|
+ except Exception as e:
|
|
|
+ print(f"[{self.platform_name}] 获取作品数异常(非关键): {e}")
|
|
|
+
|
|
|
+ # 返回账号信息
|
|
|
+ account_info = {
|
|
|
+ "success": True,
|
|
|
+ "account_id": account_id,
|
|
|
+ "account_name": account_name,
|
|
|
+ "avatar_url": avatar_url,
|
|
|
+ "fans_count": fans_count,
|
|
|
+ "works_count": works_count,
|
|
|
}
|
|
|
+
|
|
|
+ print(f"[{self.platform_name}] ✓ 获取成功: {account_name} (粉丝: {fans_count}, 作品: {works_count})")
|
|
|
+ return account_info
|
|
|
|
|
|
except Exception as e:
|
|
|
import traceback
|
|
|
@@ -127,8 +173,6 @@ class BaijiahaoPublisher(BasePublisher):
|
|
|
"success": False,
|
|
|
"error": str(e)
|
|
|
}
|
|
|
- finally:
|
|
|
- await self.close_browser()
|
|
|
|
|
|
async def check_captcha(self) -> dict:
|
|
|
"""检查页面是否需要验证码"""
|
|
|
@@ -643,9 +687,14 @@ class BaijiahaoPublisher(BasePublisher):
|
|
|
)
|
|
|
|
|
|
async def get_works(self, cookies: str, page: int = 0, page_size: int = 20) -> WorksResult:
|
|
|
- """获取百家号作品列表"""
|
|
|
+ """
|
|
|
+ 获取百家号作品列表
|
|
|
+ 使用直接 HTTP API 调用,不使用浏览器
|
|
|
+ """
|
|
|
+ import aiohttp
|
|
|
+
|
|
|
print(f"\n{'='*60}")
|
|
|
- print(f"[{self.platform_name}] 获取作品列表")
|
|
|
+ print(f"[{self.platform_name}] 获取作品列表 (使用 API)")
|
|
|
print(f"[{self.platform_name}] page={page}, page_size={page_size}")
|
|
|
print(f"{'='*60}")
|
|
|
|
|
|
@@ -654,57 +703,69 @@ class BaijiahaoPublisher(BasePublisher):
|
|
|
has_more = False
|
|
|
|
|
|
try:
|
|
|
- await self.init_browser()
|
|
|
+ # 解析 cookies
|
|
|
cookie_list = self.parse_cookies(cookies)
|
|
|
- await self.set_cookies(cookie_list)
|
|
|
-
|
|
|
- if not self.page:
|
|
|
- raise Exception("Page not initialized")
|
|
|
-
|
|
|
- # 访问内容管理页面
|
|
|
- await self.page.goto("https://baijiahao.baidu.com/builder/rc/content", wait_until="domcontentloaded", timeout=30000)
|
|
|
- await asyncio.sleep(3)
|
|
|
-
|
|
|
- # 检查登录状态
|
|
|
- current_url = self.page.url
|
|
|
- for indicator in self.login_indicators:
|
|
|
- if indicator in current_url:
|
|
|
- raise Exception("Cookie 已过期,请重新登录")
|
|
|
+ cookie_str = '; '.join([f"{c['name']}={c['value']}" for c in cookie_list])
|
|
|
|
|
|
- # 调用作品列表 API
|
|
|
- cursor = page * page_size
|
|
|
- api_result = await self.page.evaluate(f'''
|
|
|
- async () => {{
|
|
|
- try {{
|
|
|
- const response = await fetch('https://baijiahao.baidu.com/pcui/article/lists?start={cursor}&count={page_size}&article_type=video', {{
|
|
|
- method: 'GET',
|
|
|
- credentials: 'include',
|
|
|
- headers: {{
|
|
|
- 'Accept': 'application/json'
|
|
|
- }}
|
|
|
- }});
|
|
|
- return await response.json();
|
|
|
- }} catch (e) {{
|
|
|
- return {{ error: e.message }};
|
|
|
- }}
|
|
|
- }}
|
|
|
- ''')
|
|
|
+ headers = {
|
|
|
+ 'Accept': 'application/json, text/plain, */*',
|
|
|
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
|
+ 'Cookie': cookie_str,
|
|
|
+ 'Referer': 'https://baijiahao.baidu.com/builder/rc/content'
|
|
|
+ }
|
|
|
|
|
|
- print(f"[{self.platform_name}] API 响应: {json.dumps(api_result, ensure_ascii=False)[:200]}")
|
|
|
+ # 计算起始位置
|
|
|
+ start = page * page_size
|
|
|
|
|
|
- if api_result.get('errno') == 0:
|
|
|
- article_list = api_result.get('data', {}).get('article_list', [])
|
|
|
- has_more = api_result.get('data', {}).get('has_more', False)
|
|
|
+ async with aiohttp.ClientSession() as session:
|
|
|
+ print(f"[{self.platform_name}] 调用 article/lists API (start={start}, count={page_size})...")
|
|
|
+
|
|
|
+ async with session.get(
|
|
|
+ f'https://baijiahao.baidu.com/pcui/article/lists?start={start}&count={page_size}&article_type=video',
|
|
|
+ headers=headers,
|
|
|
+ timeout=aiohttp.ClientTimeout(total=30)
|
|
|
+ ) as response:
|
|
|
+ api_result = await response.json()
|
|
|
+
|
|
|
+ print(f"[{self.platform_name}] article/lists API 完整响应: {json.dumps(api_result, ensure_ascii=False)[:500]}")
|
|
|
+ print(f"[{self.platform_name}] API 响应: errno={api_result.get('errno')}")
|
|
|
+
|
|
|
+ # 检查登录状态
|
|
|
+ if api_result.get('errno') != 0:
|
|
|
+ error_msg = api_result.get('errmsg', '未知错误')
|
|
|
+ errno = api_result.get('errno')
|
|
|
+ print(f"[{self.platform_name}] API 返回错误: errno={errno}, msg={error_msg}")
|
|
|
+
|
|
|
+ if errno == 110:
|
|
|
+ raise Exception("Cookie 已过期,请重新登录")
|
|
|
+
|
|
|
+ raise Exception(error_msg)
|
|
|
+
|
|
|
+ # 解析作品列表
|
|
|
+ data = api_result.get('data', {})
|
|
|
+ article_list = data.get('article_list', [])
|
|
|
+ has_more = data.get('has_more', False)
|
|
|
+ total = data.get('total', 0)
|
|
|
+
|
|
|
+ print(f"[{self.platform_name}] 获取到 {len(article_list)} 个作品,总数: {total}")
|
|
|
|
|
|
for article in article_list:
|
|
|
work_id = str(article.get('article_id', ''))
|
|
|
if not work_id:
|
|
|
continue
|
|
|
|
|
|
+ # 处理封面图
|
|
|
+ cover_url = ''
|
|
|
+ cover_images = article.get('cover_images', [])
|
|
|
+ if cover_images and len(cover_images) > 0:
|
|
|
+ cover_url = cover_images[0]
|
|
|
+ if cover_url and cover_url.startswith('//'):
|
|
|
+ cover_url = 'https:' + cover_url
|
|
|
+
|
|
|
works.append(WorkItem(
|
|
|
work_id=work_id,
|
|
|
title=article.get('title', ''),
|
|
|
- cover_url=article.get('cover_images', [''])[0] if article.get('cover_images') else '',
|
|
|
+ cover_url=cover_url,
|
|
|
duration=0,
|
|
|
status='published',
|
|
|
publish_time=article.get('publish_time', ''),
|
|
|
@@ -714,9 +775,7 @@ class BaijiahaoPublisher(BasePublisher):
|
|
|
share_count=int(article.get('share_count', 0)),
|
|
|
))
|
|
|
|
|
|
- total = len(works)
|
|
|
-
|
|
|
- print(f"[{self.platform_name}] 获取到 {total} 个作品")
|
|
|
+ print(f"[{self.platform_name}] ✓ 成功解析 {len(works)} 个作品")
|
|
|
|
|
|
except Exception as e:
|
|
|
import traceback
|
|
|
@@ -726,8 +785,6 @@ class BaijiahaoPublisher(BasePublisher):
|
|
|
platform=self.platform_name,
|
|
|
error=str(e)
|
|
|
)
|
|
|
- finally:
|
|
|
- await self.close_browser()
|
|
|
|
|
|
return WorksResult(
|
|
|
success=True,
|
|
|
@@ -737,6 +794,100 @@ class BaijiahaoPublisher(BasePublisher):
|
|
|
has_more=has_more
|
|
|
)
|
|
|
|
|
|
+ async def check_login_status(self, cookies: str) -> dict:
|
|
|
+ """
|
|
|
+ 检查百家号 Cookie 登录状态
|
|
|
+ 使用直接 HTTP API 调用,不使用浏览器
|
|
|
+ """
|
|
|
+ import aiohttp
|
|
|
+
|
|
|
+ print(f"[{self.platform_name}] 检查登录状态 (使用 API)")
|
|
|
+
|
|
|
+ try:
|
|
|
+ # 解析 cookies
|
|
|
+ cookie_list = self.parse_cookies(cookies)
|
|
|
+ cookie_str = '; '.join([f"{c['name']}={c['value']}" for c in cookie_list])
|
|
|
+
|
|
|
+ headers = {
|
|
|
+ 'Accept': 'application/json, text/plain, */*',
|
|
|
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
|
+ 'Cookie': cookie_str,
|
|
|
+ 'Referer': 'https://baijiahao.baidu.com/builder/rc/home'
|
|
|
+ }
|
|
|
+
|
|
|
+ async with aiohttp.ClientSession() as session:
|
|
|
+ print(f"[{self.platform_name}] 调用 appinfo API 检查登录状态...")
|
|
|
+
|
|
|
+ async with session.get(
|
|
|
+ 'https://baijiahao.baidu.com/builder/app/appinfo',
|
|
|
+ headers=headers,
|
|
|
+ timeout=aiohttp.ClientTimeout(total=30)
|
|
|
+ ) as response:
|
|
|
+ api_result = await response.json()
|
|
|
+
|
|
|
+ errno = api_result.get('errno')
|
|
|
+ print(f"[{self.platform_name}] API 完整响应: {json.dumps(api_result, ensure_ascii=False)[:500]}")
|
|
|
+ print(f"[{self.platform_name}] API 响应: errno={errno}")
|
|
|
+
|
|
|
+ # errno 为 0 表示请求成功
|
|
|
+ if errno == 0:
|
|
|
+ # 检查是否有用户数据
|
|
|
+ user_data = api_result.get('data', {}).get('user', {})
|
|
|
+ if user_data:
|
|
|
+ # 检查账号状态
|
|
|
+ status = user_data.get('status', '')
|
|
|
+ account_name = user_data.get('name') or user_data.get('uname', '')
|
|
|
+
|
|
|
+ # 有效的账号状态:audit(审核中), pass(已通过), normal(正常), newbie(新手)
|
|
|
+ valid_statuses = ['audit', 'pass', 'normal', 'newbie']
|
|
|
+
|
|
|
+ if status in valid_statuses and account_name:
|
|
|
+ print(f"[{self.platform_name}] ✓ 登录状态有效: {account_name} (status={status})")
|
|
|
+ return {
|
|
|
+ "success": True,
|
|
|
+ "valid": True,
|
|
|
+ "need_login": False,
|
|
|
+ "message": "登录状态有效"
|
|
|
+ }
|
|
|
+ else:
|
|
|
+ print(f"[{self.platform_name}] 账号状态异常: status={status}, name={account_name}")
|
|
|
+ return {
|
|
|
+ "success": True,
|
|
|
+ "valid": False,
|
|
|
+ "need_login": True,
|
|
|
+ "message": f"账号状态异常: {status}"
|
|
|
+ }
|
|
|
+ else:
|
|
|
+ print(f"[{self.platform_name}] 无用户数据,Cookie 可能无效")
|
|
|
+ return {
|
|
|
+ "success": True,
|
|
|
+ "valid": False,
|
|
|
+ "need_login": True,
|
|
|
+ "message": "无用户数据"
|
|
|
+ }
|
|
|
+
|
|
|
+ # errno 非 0 表示请求失败
|
|
|
+ # 常见错误码:110 = 未登录
|
|
|
+ error_msg = api_result.get('errmsg', '未知错误')
|
|
|
+ print(f"[{self.platform_name}] Cookie 无效: errno={errno}, msg={error_msg}")
|
|
|
+
|
|
|
+ return {
|
|
|
+ "success": True,
|
|
|
+ "valid": False,
|
|
|
+ "need_login": True,
|
|
|
+ "message": error_msg
|
|
|
+ }
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ import traceback
|
|
|
+ traceback.print_exc()
|
|
|
+ return {
|
|
|
+ "success": False,
|
|
|
+ "valid": False,
|
|
|
+ "need_login": True,
|
|
|
+ "error": str(e)
|
|
|
+ }
|
|
|
+
|
|
|
async def get_comments(self, cookies: str, work_id: str, cursor: str = "") -> CommentsResult:
|
|
|
"""获取百家号作品评论"""
|
|
|
# TODO: 实现评论获取逻辑
|