浏览代码

feat: 优化百家号发布流程并解决分散认证问题

增加会话初始化步骤,使用aiohttp自动管理cookies
添加重试机制处理分散认证错误(10001402)
完善请求头信息,提高API调用成功率
新增测试脚本验证百家号API功能
Ethanfly 15 小时之前
父节点
当前提交
204c4f0738

二进制
server/python/platforms/__pycache__/baijiahao.cpython-313.pyc


二进制
server/python/platforms/__pycache__/weixin.cpython-313.pyc


+ 157 - 17
server/python/platforms/baijiahao.py

@@ -43,18 +43,66 @@ class BaijiahaoPublisher(BasePublisher):
         try:
             # 解析 cookies
             cookie_list = self.parse_cookies(cookies)
-            cookie_str = '; '.join([f"{c['name']}={c['value']}" for c in cookie_list])
+            cookie_dict = {c['name']: c['value'] for c in cookie_list}
+            
+            # 重要:百家号需要先访问主页建立会话上下文
+            print(f"[{self.platform_name}] 第一步:访问主页建立会话...")
+            session_headers = {
+                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
+                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+                # Cookie 由 session 管理,不手动设置
+                'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
+                'Accept-Encoding': 'gzip, deflate, br',
+                'Connection': 'keep-alive',
+                'Upgrade-Insecure-Requests': '1',
+                'Sec-Fetch-Dest': 'document',
+                'Sec-Fetch-Mode': 'navigate',
+                'Sec-Fetch-Site': 'none',
+                'Sec-Fetch-User': '?1',
+                'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
+                'sec-ch-ua-mobile': '?0',
+                'sec-ch-ua-platform': '"Windows"'
+            }
             
             headers = {
                 'Accept': 'application/json, text/plain, */*',
                 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
-                'Cookie': cookie_str,
-                'Referer': 'https://baijiahao.baidu.com/builder/rc/home'
+                # Cookie 由 session 管理,不手动设置
+                'Referer': 'https://baijiahao.baidu.com/builder/rc/home',
+                'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
+                'Accept-Encoding': 'gzip, deflate, br',
+                'Connection': 'keep-alive',
+                'Sec-Fetch-Dest': 'empty',
+                'Sec-Fetch-Mode': 'cors',
+                'Sec-Fetch-Site': 'same-origin',
+                'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
+                'sec-ch-ua-mobile': '?0',
+                'sec-ch-ua-platform': '"Windows"'
             }
             
-            async with aiohttp.ClientSession() as session:
+            # 使用 cookies 参数初始化 session,让 aiohttp 自动管理 cookie 更新
+            async with aiohttp.ClientSession(cookies=cookie_dict) as session:
+                # 步骤 0: 先访问主页建立会话上下文(关键步骤!)
+                print(f"[{self.platform_name}] [0/4] 访问主页建立会话上下文...")
+                async with session.get(
+                    'https://baijiahao.baidu.com/builder/rc/home',
+                    headers=session_headers,
+                    timeout=aiohttp.ClientTimeout(total=30)
+                ) as home_response:
+                    home_status = home_response.status
+                    print(f"[{self.platform_name}] 主页访问状态: {home_status}")
+                    
+                    # 获取响应头中的新cookies(如果有)
+                    if 'Set-Cookie' in home_response.headers:
+                        new_cookies = home_response.headers['Set-Cookie']
+                        print(f"[{self.platform_name}] 获取到新的会话Cookie")
+                        # 这里可以处理新的cookies,但暂时跳过复杂处理
+                
+                # 短暂等待确保会话建立
+                await asyncio.sleep(1)
+                
                 # 步骤 1: 获取账号基本信息
-                print(f"[{self.platform_name}] [1/3] 调用 appinfo API...")
+                print(f"[{self.platform_name}] [1/4] 调用 appinfo API...")
                 async with session.get(
                     'https://baijiahao.baidu.com/builder/app/appinfo',
                     headers=headers,
@@ -79,6 +127,41 @@ class BaijiahaoPublisher(BasePublisher):
                             "need_login": True
                         }
                     
+                    # errno 10001402 表示分散认证问题,尝试重新访问主页后重试
+                    if errno == 10001402:
+                        print(f"[{self.platform_name}] 检测到分散认证问题,尝试重新访问主页...")
+                        await asyncio.sleep(2)
+                        
+                        # 重新访问主页
+                        async with session.get(
+                            'https://baijiahao.baidu.com/builder/rc/home',
+                            headers=session_headers,
+                            timeout=aiohttp.ClientTimeout(total=30)
+                        ) as retry_home_response:
+                            print(f"[{self.platform_name}] 重新访问主页状态: {retry_home_response.status}")
+                        
+                        await asyncio.sleep(1)
+                        
+                        # 重试 API 调用
+                        async with session.get(
+                            'https://baijiahao.baidu.com/builder/app/appinfo',
+                            headers=headers,
+                            timeout=aiohttp.ClientTimeout(total=30)
+                        ) as retry_response:
+                            retry_result = await retry_response.json()
+                            
+                            if retry_result.get('errno') == 0:
+                                print(f"[{self.platform_name}] 分散认证问题已解决")
+                                # 使用重试成功的结果继续处理
+                                appinfo_result = retry_result
+                            else:
+                                print(f"[{self.platform_name}] 重试仍然失败")
+                                return {
+                                    "success": False,
+                                    "error": f"分散认证问题: {error_msg}",
+                                    "need_login": True
+                                }
+                    
                     return {
                         "success": False,
                         "error": error_msg,
@@ -146,7 +229,7 @@ class BaijiahaoPublisher(BasePublisher):
                         headers={
                             'accept': '*/*',
                             'user-agent': 'PostmanRuntime/7.51.0',
-                            'cookie': cookie_str,
+                            # cookie 由 session 管理
                             'referer': 'https://baijiahao.baidu.com/builder/rc/content',
                             'connection': 'keep-alive',
                             'accept-encoding': 'gzip, deflate, br',
@@ -165,10 +248,17 @@ class BaijiahaoPublisher(BasePublisher):
                         print(f"[{self.platform_name}] 分散认证问题 (errno=10001402),3秒后重试...")
                         await asyncio.sleep(3)
                         
-                        # 重试一次
+                        # 重试一次,使用更完整的请求头
+                        retry_headers = headers.copy()
+                        retry_headers.update({
+                            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
+                            'Cache-Control': 'max-age=0',
+                            'Upgrade-Insecure-Requests': '1',
+                        })
+                        
                         async with session.get(
                             list_url,
-                            headers=headers,
+                            headers=retry_headers,
                             timeout=aiohttp.ClientTimeout(total=30)
                         ) as retry_response:
                             retry_text = await retry_response.text()
@@ -749,19 +839,28 @@ class BaijiahaoPublisher(BasePublisher):
         try:
             # 解析 cookies
             cookie_list = self.parse_cookies(cookies)
-            cookie_str = '; '.join([f"{c['name']}={c['value']}" for c in cookie_list])
+            cookie_dict = {c['name']: c['value'] for c in cookie_list}
             
             headers = {
                 'Accept': 'application/json, text/plain, */*',
                 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
-                'Cookie': cookie_str,
-                'Referer': 'https://baijiahao.baidu.com/builder/rc/content'
+                # Cookie 由 session 管理
+                'Referer': 'https://baijiahao.baidu.com/builder/rc/content',
+                'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
+                'Accept-Encoding': 'gzip, deflate, br',
+                'Connection': 'keep-alive',
+                'Sec-Fetch-Dest': 'empty',
+                'Sec-Fetch-Mode': 'cors',
+                'Sec-Fetch-Site': 'same-origin',
+                'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
+                'sec-ch-ua-mobile': '?0',
+                'sec-ch-ua-platform': '"Windows"'
             }
             
             # 计算起始位置
             start = page * page_size
             
-            async with aiohttp.ClientSession() as session:
+            async with aiohttp.ClientSession(cookies=cookie_dict) as session:
                 print(f"[{self.platform_name}] 调用 article/lists API (start={start}, count={page_size})...")
                 
                 async with session.get(
@@ -850,17 +949,58 @@ class BaijiahaoPublisher(BasePublisher):
         try:
             # 解析 cookies
             cookie_list = self.parse_cookies(cookies)
-            cookie_str = '; '.join([f"{c['name']}={c['value']}" for c in cookie_list])
+            cookie_dict = {c['name']: c['value'] for c in cookie_list}
+            
+            # 重要:百家号需要先访问主页建立会话上下文
+            session_headers = {
+                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
+                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+                # Cookie 由 session 管理
+                'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
+                'Accept-Encoding': 'gzip, deflate, br',
+                'Connection': 'keep-alive',
+                'Upgrade-Insecure-Requests': '1',
+                'Sec-Fetch-Dest': 'document',
+                'Sec-Fetch-Mode': 'navigate',
+                'Sec-Fetch-Site': 'none',
+                'Sec-Fetch-User': '?1',
+                'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
+                'sec-ch-ua-mobile': '?0',
+                'sec-ch-ua-platform': '"Windows"'
+            }
             
             headers = {
                 'Accept': 'application/json, text/plain, */*',
                 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
-                'Cookie': cookie_str,
-                'Referer': 'https://baijiahao.baidu.com/builder/rc/home'
+                # Cookie 由 session 管理
+                'Referer': 'https://baijiahao.baidu.com/builder/rc/home',
+                'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
+                'Accept-Encoding': 'gzip, deflate, br',
+                'Connection': 'keep-alive',
+                'Sec-Fetch-Dest': 'empty',
+                'Sec-Fetch-Mode': 'cors',
+                'Sec-Fetch-Site': 'same-origin',
+                'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
+                'sec-ch-ua-mobile': '?0',
+                'sec-ch-ua-platform': '"Windows"'
             }
             
-            async with aiohttp.ClientSession() as session:
-                print(f"[{self.platform_name}] 调用 appinfo API 检查登录状态...")
+            async with aiohttp.ClientSession(cookies=cookie_dict) as session:
+                # 步骤 0: 先访问主页建立会话上下文(关键步骤!)
+                print(f"[{self.platform_name}] [0/2] 访问主页建立会话上下文...")
+                async with session.get(
+                    'https://baijiahao.baidu.com/builder/rc/home',
+                    headers=session_headers,
+                    timeout=aiohttp.ClientTimeout(total=30)
+                ) as home_response:
+                    home_status = home_response.status
+                    print(f"[{self.platform_name}] 主页访问状态: {home_status}")
+                
+                # 短暂等待确保会话建立
+                await asyncio.sleep(1)
+                
+                # 步骤 1: 调用 API 检查登录状态
+                print(f"[{self.platform_name}] [1/2] 调用 appinfo API 检查登录状态...")
                 
                 async with session.get(
                     'https://baijiahao.baidu.com/builder/app/appinfo',

+ 2 - 92
server/python/platforms/weixin.py

@@ -536,100 +536,10 @@ class WeixinPublisher(BasePublisher):
         print(f"[{self.platform_name}] 视频文件存在,大小: {os.path.getsize(params.video_path)} bytes")
         
         self.report_progress(10, "正在打开上传页面...")
-        print(f"[{self.platform_name}] 当前 发布URL: {self.publish_url}")
+        
         # 访问上传页面
         await self.page.goto(self.publish_url, wait_until="networkidle", timeout=60000)
-        await asyncio.sleep(10)
-
-        # 打印页面HTML调试
-        print(f"[{self.platform_name}] 当前 URL: {self.page.url}")
-        html_content = await self.page.content()
-        print(f"[{self.platform_name}] 页面HTML长度: {len(html_content)}")
-        
-        # 截图调试
-        screenshot_path = f"weixin_publish_{int(asyncio.get_event_loop().time())}.png"
-        await self.page.screenshot(path=screenshot_path)
-        print(f"[{self.platform_name}] 截图已保存: {screenshot_path}")
-        
-        # 检查 input[type='file'] 是否存在
-        file_input = self.page.locator("input[type='file']")
-        count = await file_input.count()
-        print(f"[{self.platform_name}] 找到 {count} 个 file input")
-        
-        if count == 0:
-            raise Exception("页面中未找到 input[type='file'] 元素")
-        
-        # 直接设置文件,不触发click
-        print("上传文件...")
-        file_path = params.video_path
-        await file_input.first.set_input_files(file_path)
-        print(f"[{self.platform_name}] 文件已设置: {file_path}")
-        
-        # 等待上传进度
-        await asyncio.sleep(5)
-        
-        # 等待删除标签弹窗可见(可选,设置超时)
-        try:
-            await self.page.wait_for_selector(".weui-desktop-popover__wrp.finder-popover-dialog-wrap .finder-tag-wrap", state="visible", timeout=20000)
-            print("删除标签弹窗已显示")
-        except:
-            print("删除标签弹窗未出现,继续执行")
-        
-        # 主动关闭系统文件选择窗口(如果还存在)
-        try:
-            # 获取所有窗口
-            context_pages = self.page.context.pages
-            for p in context_pages:
-                if p != self.page and "打开" in await p.title():
-                    print(f"关闭系统文件选择窗口: {await p.title()}")
-                    await p.close()
-        except Exception as e:
-            print(f"关闭文件选择窗口异常: {e}")
-
-
-        
-        
-
-        # 填写多个输入框
-        print("填写输入框...")
-        # 描述输入框
-        await self.page.locator("div.input-editor[contenteditable][data-placeholder='添加描述']").fill("智能拍照机来啦")
-        
-        # 短标题输入框
-        await self.page.fill("input.weui-desktop-form__input[placeholder*='概括视频主要内容']", "解放双手的智能拍照机")
-        await self.page.wait_for_timeout(1000)
-
-
-        # 点击最下方的发布按钮
-        print("点击发布按钮...")
-        await self.page.click("button.weui-desktop-btn.weui-desktop-btn_primary:has-text('发表')")
-
-        
-        # 监控是否出现"直接发表"按钮
-        try:
-            direct_publish_btn = self.page.locator("button.weui-desktop-btn.weui-desktop-btn_default:has-text('直接发表')")
-            await direct_publish_btn.wait_for(state="visible", timeout=3000)
-            print("检测到'直接发表'按钮,点击...")
-            await direct_publish_btn.click()
-        except:
-            print("未检测到'直接发表'按钮,继续...")
-
-        
-        
-        # 等待发布完成
-        await self.page.wait_for_timeout(3000)
-        print("发布完成!")
-
-        return PublishResult(
-            success=True,
-            platform=self.platform_name,
-            message="发布成功",
-            screenshot_base64="",
-            page_url=self.publish_url,
-            status='success'
-        )
-
-
+        await asyncio.sleep(3)
         
         # 检查是否跳转到登录页
         current_url = self.page.url

+ 55 - 0
server/python/test_baijiahao_api.py

@@ -0,0 +1,55 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+测试百家号Cookie API调用
+"""
+
+import asyncio
+import aiohttp
+import json
+import sys
+import os
+
+# 添加路径
+sys.path.append(os.path.dirname(os.path.abspath(__file__)))
+
+from platforms.baijiahao import BaijiahaoPublisher
+
+async def test_baijiahao_api():
+    """测试百家号API调用"""
+    print("=" * 60)
+    print("测试百家号API调用")
+    print("=" * 60)
+    
+    # 这里需要替换为实际的Cookie
+    test_cookie = input("请输入百家号Cookie(从浏览器登录后获取): ").strip()
+    
+    if not test_cookie:
+        print("Cookie 不能为空")
+        return
+    
+    publisher = BaijiahaoPublisher(headless=True)
+    
+    try:
+        # 测试获取账号信息
+        print("\n1. 测试获取账号信息...")
+        result = await publisher.get_account_info(test_cookie)
+        print(f"结果: {json.dumps(result, ensure_ascii=False, indent=2)}")
+        
+        # 测试检查登录状态
+        print("\n2. 测试检查登录状态...")
+        status = await publisher.check_login_status(test_cookie)
+        print(f"状态: {json.dumps(status, ensure_ascii=False, indent=2)}")
+        
+        # 测试获取作品列表
+        print("\n3. 测试获取作品列表...")
+        works = await publisher.get_works(test_cookie, 0, 10)
+        print(f"作品: {json.dumps(works.to_dict(), ensure_ascii=False, indent=2)}")
+        
+    except Exception as e:
+        print(f"测试失败: {e}")
+        import traceback
+        traceback.print_exc()
+
+if __name__ == "__main__":
+    asyncio.run(test_baijiahao_api())

+ 6 - 0
server/src/services/AccountService.ts

@@ -383,6 +383,7 @@ export class AccountService {
 
         if (cookieList.length > 0 && !cookieParseError) {
           // 抖音、小红书、百家号直接使用 API 获取准确数据,不使用 AI(因为 AI 可能识别错误)
+          // 但是如果 API 调用失败,作为备用方案仍会尝试 AI
           const platformsSkipAI: PlatformType[] = ['douyin', 'xiaohongshu', 'baijiahao'];
           const shouldUseAI = aiService.isAvailable() && !platformsSkipAI.includes(platform);
           
@@ -473,6 +474,11 @@ export class AccountService {
               } catch (infoError) {
                 // 获取账号信息失败,但 Cookie 检查已通过,保持 active 状态
                 logger.warn(`Failed to fetch account info for ${accountId}, but cookie is valid:`, infoError);
+                
+                // 对于百家号,如果是获取信息失败,可能是分散认证问题,不需要立即标记为失败
+                if (platform === 'baijiahao') {
+                  logger.info(`[baijiahao] Account info fetch failed for ${accountId}, but this might be due to distributed auth. Keeping status active.`);
+                }
               }
             }
           }

+ 40 - 4
server/src/services/login/BaijiahaoLoginService.ts

@@ -60,8 +60,9 @@ export class BaijiahaoLoginService extends BaseLoginService {
         urlPattern: '/pcui/article/lists',
         dataKey: 'articleList',
         handler: (data: any) => {
-          const list = data.data?.list || data.list || [];
-          return { list, count: list.length };
+          // 处理分散的响应格式
+          const dataList = data.data?.article_list || data.data?.list || data.list || [];
+          return { list: dataList, count: dataList.length };
         },
       },
     ];
@@ -72,6 +73,18 @@ export class BaijiahaoLoginService extends BaseLoginService {
    */
   protected override async collectAccountInfo(session: LoginSession): Promise<AccountInfo | null> {
     try {
+      // 关键步骤:确保在主页面(首页)获取完整Cookie上下文
+      logger.info('[百家号] 确保在主页面获取完整Cookie...');
+      const currentUrl = session.page.url();
+      if (!currentUrl.includes('/builder/rc/home')) {
+        logger.info(`[百家号] 当前不在主页: ${currentUrl},跳转到主页...`);
+        await session.page.goto('https://baijiahao.baidu.com/builder/rc/home', { 
+          waitUntil: 'domcontentloaded',
+          timeout: 30000 
+        });
+        await new Promise(resolve => setTimeout(resolve, 3000)); // 等待页面完全加载
+      }
+
       // 步骤3: 等待 appinfo API
       logger.info('[百家号] 等待 appinfo API...');
       let appInfo = await this.waitForApiData(session, 'appInfo', 10000);
@@ -79,7 +92,8 @@ export class BaijiahaoLoginService extends BaseLoginService {
       if (!appInfo) {
         logger.info('[百家号] 未拿到 appinfo,刷新页面重试...');
         await session.page.reload({ waitUntil: 'domcontentloaded' });
-        appInfo = await this.waitForApiData(session, 'appInfo', 10000);
+        await new Promise(resolve => setTimeout(resolve, 3000)); // 增加等待时间
+        appInfo = await this.waitForApiData(session, 'appInfo', 15000);
       }
 
       if (!appInfo?.appId) {
@@ -98,7 +112,29 @@ export class BaijiahaoLoginService extends BaseLoginService {
       // 步骤5+6: 跳转到作品管理页,等待文章列表 API
       logger.info('[百家号] 跳转到作品管理页...');
       const contentUrl = 'https://baijiahao.baidu.com/builder/rc/content';
-      const articleData = await this.navigateAndWaitForApi(session, contentUrl, 'articleList', 15000);
+      
+      // 尝试多次获取作品列表(百家号可能有分散认证问题)
+      let articleData = null;
+      for (let attempt = 1; attempt <= 3; attempt++) {
+        try {
+          logger.info(`[百家号] 获取作品列表(第${attempt}次尝试)...`);
+          articleData = await this.navigateAndWaitForApi(session, contentUrl, 'articleList', 15000);
+          
+          if (articleData) {
+            break;
+          }
+          
+          if (attempt < 3) {
+            logger.info(`[百家号] 第${attempt}次尝试失败,等待${attempt * 2}秒后重试...`);
+            await new Promise(resolve => setTimeout(resolve, attempt * 2000));
+          }
+        } catch (error) {
+          logger.warn(`[百家号] 第${attempt}次尝试获取作品列表失败:`, error);
+          if (attempt < 3) {
+            await new Promise(resolve => setTimeout(resolve, attempt * 2000));
+          }
+        }
+      }
 
       const worksCount = articleData?.count || 0;
       logger.info(`[百家号] 作品数: ${worksCount}`);