فهرست منبع

百家号的作品数据每日任务

Ethanfly 16 ساعت پیش
والد
کامیت
693b90371b

+ 84 - 0
server/python/app.py

@@ -1594,6 +1594,90 @@ def get_account_info():
         return jsonify({"success": False, "error": str(e)}), 500
 
 
+# ==================== 百家号作品每日数据辅助接口 ====================
+
+@app.route("/baijiahao/article_stats", methods=["POST"])
+def baijiahao_article_stats():
+    """
+    百家号:代理调用 /author/eco/statistics/articleListStatistic
+
+    请求体:
+    {
+        "cookie": "...",
+        "start_day": "YYYYMMDD",
+        "end_day": "YYYYMMDD",
+        "type": "small_video_v2|video|news",
+        "num": 1,
+        "count": 10
+    }
+    """
+    try:
+        data = request.json or {}
+        cookie_str = data.get("cookie", "")
+        start_day = data.get("start_day", "")
+        end_day = data.get("end_day", "")
+        stat_type = data.get("type", "video")
+        num = int(data.get("num", 1) or 1)
+        count = int(data.get("count", 10) or 10)
+
+        if not cookie_str:
+            return jsonify({"success": False, "error": "缺少 cookie 参数"}), 400
+        if not start_day or not end_day:
+            return jsonify({"success": False, "error": "缺少 start_day 或 end_day 参数"}), 400
+
+        PublisherClass = get_publisher("baijiahao")
+        publisher = PublisherClass(headless=HEADLESS_MODE)
+        result = asyncio.run(
+            publisher.get_article_stats(
+                cookie_str,
+                start_day=start_day,
+                end_day=end_day,
+                stat_type=stat_type,
+                num=num,
+                count=count,
+            )
+        )
+        return jsonify(result)
+    except Exception as e:
+        traceback.print_exc()
+        return jsonify({"success": False, "error": str(e)}), 500
+
+
+@app.route("/baijiahao/trend_data", methods=["POST"])
+def baijiahao_trend_data():
+    """
+    百家号:代理调用 /author/eco/statistic/gettrenddata
+
+    请求体:
+    {
+        "cookie": "...",
+        "nid": "文章/视频 nid 或 article_id"
+    }
+    """
+    try:
+        data = request.json or {}
+        cookie_str = data.get("cookie", "")
+        nid = data.get("nid", "")
+
+        if not cookie_str:
+            return jsonify({"success": False, "error": "缺少 cookie 参数"}), 400
+        if not nid:
+            return jsonify({"success": False, "error": "缺少 nid 参数"}), 400
+
+        PublisherClass = get_publisher("baijiahao")
+        publisher = PublisherClass(headless=HEADLESS_MODE)
+        result = asyncio.run(
+            publisher.get_trend_data(
+                cookie_str,
+                nid=str(nid),
+            )
+        )
+        return jsonify(result)
+    except Exception as e:
+        traceback.print_exc()
+        return jsonify({"success": False, "error": str(e)}), 500
+
+
 # ==================== 健康检查 ====================
 
 @app.route("/health", methods=["GET"])

+ 150 - 124
server/python/platforms/baijiahao.py

@@ -1067,141 +1067,167 @@ class BaijiahaoPublisher(BasePublisher):
             next_page=next_page
         )
     
-    async def check_login_status(self, cookies: str) -> dict:
+    async def get_article_stats(
+        self,
+        cookies: str,
+        start_day: str,
+        end_day: str,
+        stat_type: str,
+        num: int,
+        count: int,
+    ) -> dict:
         """
-        检查百家号 Cookie 登录状态
-        使用直接 HTTP API 调用,不使用浏览器
+        调用百家号 /author/eco/statistics/articleListStatistic 接口(不依赖浏览器 token),用于作品列表维度的每日数据。
         """
         import aiohttp
         
-        print(f"[{self.platform_name}] 检查登录状态 (使用 API)")
+        print(f"[{self.platform_name}] get_article_stats: {start_day}-{end_day}, type={stat_type}, num={num}, count={count}")
         
-        try:
-            # 解析 cookies
-            cookie_list = self.parse_cookies(cookies)
-            cookie_dict = {c['name']: c['value'] for c in cookie_list}
+        # 解析 cookies
+        cookie_list = self.parse_cookies(cookies)
+        cookie_dict = {c['name']: c['value'] for c in cookie_list}
+        
+        session_headers = {
+            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+            'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
+            'Accept-Encoding': 'gzip, deflate, br',
+            'Connection': 'keep-alive',
+            'Upgrade-Insecure-Requests': '1',
+        }
+        headers = {
+            'Accept': 'application/json, text/plain, */*',
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+            'Referer': 'https://baijiahao.baidu.com/builder/rc/analysiscontent/single',
+            'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
+            'Accept-Encoding': 'gzip, deflate, br',
+            'Connection': 'keep-alive',
+        }
+
+        async with aiohttp.ClientSession(cookies=cookie_dict) as session:
+            # 0) 先访问 single 页面建立会话上下文(与 Node 端 UI 打开的页面一致)
+            try:
+                await session.get(
+                    'https://baijiahao.baidu.com/builder/rc/analysiscontent/single',
+                    headers=session_headers,
+                    timeout=aiohttp.ClientTimeout(total=20),
+                )
+            except Exception as e:
+                print(f"[{self.platform_name}] warmup single page failed (non-fatal): {e}")
             
-            # 重要:百家号需要先访问主页建立会话上下文
-            session_headers = {
-                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
-                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
-                # Cookie 由 session 管理
-                'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
-                'Accept-Encoding': 'gzip, deflate, br',
-                'Connection': 'keep-alive',
-                'Upgrade-Insecure-Requests': '1',
-                'Sec-Fetch-Dest': 'document',
-                'Sec-Fetch-Mode': 'navigate',
-                'Sec-Fetch-Site': 'none',
-                'Sec-Fetch-User': '?1',
-                'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
-                'sec-ch-ua-mobile': '?0',
-                'sec-ch-ua-platform': '"Windows"'
-            }
+            # 1) 调用 articleListStatistic
+            api_url = (
+                "https://baijiahao.baidu.com/author/eco/statistics/articleListStatistic"
+                f"?start_day={start_day}&end_day={end_day}&type={stat_type}&num={num}&count={count}"
+            )
+            async with session.get(
+                api_url,
+                headers=headers,
+                timeout=aiohttp.ClientTimeout(total=30),
+            ) as resp:
+                status = resp.status
+                try:
+                    data = await resp.json()
+                except Exception:
+                    text = await resp.text()
+                    print(f"[{self.platform_name}] articleListStatistic non-JSON response: {text[:1000]}")
+                    raise
             
-            headers = {
-                'Accept': 'application/json, text/plain, */*',
-                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
-                # Cookie 由 session 管理
-                'Referer': 'https://baijiahao.baidu.com/builder/rc/home',
-                'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
-                'Accept-Encoding': 'gzip, deflate, br',
-                'Connection': 'keep-alive',
-                'Sec-Fetch-Dest': 'empty',
-                'Sec-Fetch-Mode': 'cors',
-                'Sec-Fetch-Site': 'same-origin',
-                'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
-                'sec-ch-ua-mobile': '?0',
-                'sec-ch-ua-platform': '"Windows"'
-            }
+            errno = data.get('errno')
+            errmsg = data.get('errmsg')
+            print(f"[{self.platform_name}] articleListStatistic: http={status}, errno={errno}, msg={errmsg}")
             
-            async with aiohttp.ClientSession(cookies=cookie_dict) as session:
-                # 步骤 0: 先访问主页建立会话上下文(关键步骤!)
-                print(f"[{self.platform_name}] [0/2] 访问主页建立会话上下文...")
-                async with session.get(
-                    'https://baijiahao.baidu.com/builder/rc/home',
-                    headers=session_headers,
-                    timeout=aiohttp.ClientTimeout(total=30)
-                ) as home_response:
-                    home_status = home_response.status
-                    print(f"[{self.platform_name}] 主页访问状态: {home_status}")
-                
-                # 短暂等待确保会话建立
-                await asyncio.sleep(1)
-                
-                # 步骤 1: 调用 API 检查登录状态
-                print(f"[{self.platform_name}] [1/2] 调用 appinfo API 检查登录状态...")
-                
-                async with session.get(
-                    'https://baijiahao.baidu.com/builder/app/appinfo',
-                    headers=headers,
-                    timeout=aiohttp.ClientTimeout(total=30)
-                ) as response:
-                    api_result = await response.json()
-                
-                errno = api_result.get('errno')
-                print(f"[{self.platform_name}] API 完整响应: {json.dumps(api_result, ensure_ascii=False)[:500]}")
-                print(f"[{self.platform_name}] API 响应: errno={errno}")
-                
-                # errno 为 0 表示请求成功
-                if errno == 0:
-                    # 检查是否有用户数据
-                    user_data = api_result.get('data', {}).get('user', {})
-                    if user_data:
-                        # 检查账号状态
-                        status = user_data.get('status', '')
-                        account_name = user_data.get('name') or user_data.get('uname', '')
-                        
-                        # 有效的账号状态:audit(审核中), pass(已通过), normal(正常), newbie(新手)
-                        valid_statuses = ['audit', 'pass', 'normal', 'newbie']
-                        
-                        if status in valid_statuses and account_name:
-                            print(f"[{self.platform_name}] ✓ 登录状态有效: {account_name} (status={status})")
-                            return {
-                                "success": True,
-                                "valid": True,
-                                "need_login": False,
-                                "message": "登录状态有效"
-                            }
-                        else:
-                            print(f"[{self.platform_name}] 账号状态异常: status={status}, name={account_name}")
-                            return {
-                                "success": True,
-                                "valid": False,
-                                "need_login": True,
-                                "message": f"账号状态异常: {status}"
-                            }
-                    else:
-                        print(f"[{self.platform_name}] 无用户数据,Cookie 可能无效")
-                        return {
-                            "success": True,
-                            "valid": False,
-                            "need_login": True,
-                            "message": "无用户数据"
-                        }
-                
-                # errno 非 0 表示请求失败
-                # 常见错误码:110 = 未登录
-                error_msg = api_result.get('errmsg', '未知错误')
-                print(f"[{self.platform_name}] Cookie 无效: errno={errno}, msg={error_msg}")
-                
-                return {
-                    "success": True,
-                    "valid": False,
-                    "need_login": True,
-                    "message": error_msg
-                }
-            
-        except Exception as e:
-            import traceback
-            traceback.print_exc()
             return {
-                "success": False,
-                "valid": False,
-                "need_login": True,
-                "error": str(e)
+                "success": status == 200 and errno == 0,
+                "status": status,
+                "errno": errno,
+                "errmsg": errmsg,
+                "data": data.get('data') if isinstance(data, dict) else None,
             }
     
+    async def get_trend_data(
+        self,
+        cookies: str,
+        nid: str,
+    ) -> dict:
+        """
+        调用百家号 /author/eco/statistic/gettrenddata 接口,获取单作品的按日统计数据(basic_list)。
+        """
+        import aiohttp
+        
+        print(f"[{self.platform_name}] get_trend_data: nid={nid}")
+        
+        cookie_list = self.parse_cookies(cookies)
+        cookie_dict = {c['name']: c['value'] for c in cookie_list}
+        
+        session_headers = {
+            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+            'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
+            'Accept-Encoding': 'gzip, deflate, br',
+            'Connection': 'keep-alive',
+            'Upgrade-Insecure-Requests': '1',
+        }
+        headers = {
+            'Accept': 'application/json, text/plain, */*',
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+            'Referer': 'https://baijiahao.baidu.com/builder/rc/analysiscontent/single',
+            'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
+            'Accept-Encoding': 'gzip, deflate, br',
+            'Connection': 'keep-alive',
+        }
+
+        async with aiohttp.ClientSession(cookies=cookie_dict) as session:
+            # 0) warmup
+            try:
+                await session.get(
+                    'https://baijiahao.baidu.com/builder/rc/analysiscontent/single',
+                    headers=session_headers,
+                    timeout=aiohttp.ClientTimeout(total=20),
+                )
+            except Exception as e:
+                print(f"[{self.platform_name}] warmup single page (trend) failed (non-fatal): {e}")
+            
+            api_url = (
+                "https://baijiahao.baidu.com/author/eco/statistic/gettrenddata"
+                f"?nid={nid}&trend_type=all&data_type=addition"
+            )
+            async with session.get(
+                api_url,
+                headers=headers,
+                timeout=aiohttp.ClientTimeout(total=30),
+            ) as resp:
+                status = resp.status
+                try:
+                    data = await resp.json()
+                except Exception:
+                    text = await resp.text()
+                    print(f"[{self.platform_name}] gettrenddata non-JSON response: {text[:1000]}")
+                    raise
+        
+        errno = data.get('errno')
+        errmsg = data.get('errmsg')
+        print(f"[{self.platform_name}] gettrenddata: http={status}, errno={errno}, msg={errmsg}")
+        
+        return {
+            "success": status == 200 and errno == 0,
+            "status": status,
+            "errno": errno,
+            "errmsg": errmsg,
+            "data": data.get('data') if isinstance(data, dict) else None,
+        }
+    
+    async def check_login_status(self, cookies: str) -> dict:
+        """
+        检查百家号 Cookie 登录状态
+        现在与其他平台保持一致,直接复用 BasePublisher 的浏览器检测逻辑:
+        - 使用 Playwright 打开后台页面
+        - 根据是否跳转到登录页 / 是否出现登录弹窗或风控提示,判断登录是否有效
+        """
+        print(f"[{self.platform_name}] 检查登录状态 (使用通用浏览器逻辑)")
+        # 直接调用父类的实现,保持与抖音/小红书/视频号一致
+        return await super().check_login_status(cookies)
+    
     async def get_comments(self, cookies: str, work_id: str, cursor: str = "") -> CommentsResult:
         """获取百家号作品评论"""
         # TODO: 实现评论获取逻辑

+ 3 - 0
server/src/models/entities/Work.ts

@@ -75,6 +75,9 @@ export class Work {
   @Column({ name: 'yesterday_collect_count', type: 'int', default: 0 })
   yesterdayCollectCount!: number;
 
+  @Column({ name: 'yesterday_recommend_count', type: 'int', default: 0 })
+  yesterdayRecommendCount!: number;
+
   @Column({ name: 'yesterday_fans_increase', type: 'int', default: 0 })
   yesterdayFansIncrease!: number;
 

+ 3 - 0
server/src/models/entities/WorkDayStatistics.ts

@@ -30,6 +30,9 @@ export class WorkDayStatistics {
   @Column({ name: 'collect_count', type: 'int', default: 0, comment: '收藏数' })
   collectCount!: number;
 
+  @Column({ name: 'recommend_count', type: 'int', default: 0, comment: '推荐量' })
+  recommendCount!: number;
+
   @Column({ name: 'fans_increase', type: 'int', default: 0, comment: '涨粉数' })
   fansIncrease!: number;
 

+ 23 - 0
server/src/scheduler/index.ts

@@ -13,6 +13,7 @@ import { WeixinVideoDataCenterImportService } from '../services/WeixinVideoDataC
 import { XiaohongshuWorkNoteStatisticsImportService } from '../services/XiaohongshuWorkNoteStatisticsImportService.js';
 import { DouyinWorkStatisticsImportService } from '../services/DouyinWorkStatisticsImportService.js';
 import { WeixinVideoWorkStatisticsImportService } from '../services/WeixinVideoWorkStatisticsImportService.js';
+import { BaijiahaoWorkDailyStatisticsImportService } from '../services/BaijiahaoWorkDailyStatisticsImportService.js';
 
 /**
  * 定时任务调度器
@@ -25,6 +26,7 @@ export class TaskScheduler {
   private isDyImportRunning = false; // 抖音导入锁,防止任务重叠执行
   private isDyWorkImportRunning = false; // 抖音作品日统计导入锁
   private isBjImportRunning = false; // 百家号导入锁,防止任务重叠执行
+  private isBjWorkImportRunning = false; // 百家号作品日统计导入锁
   private isWxImportRunning = false; // 视频号导入锁,防止任务重叠执行
   private isWxWorkImportRunning = false; // 视频号作品日统计导入锁
   private isAutoReplying = false; // 私信回复锁,防止任务重叠执行
@@ -61,6 +63,9 @@ export class TaskScheduler {
     // 每天 12:20:批量导出百家号“数据中心-内容分析-基础数据-近30天”,导入 user_day_statistics
     this.scheduleJob('bj-content-overview-import', '20 12 * * *', this.importBaijiahaoContentOverviewLast30Days.bind(this));
 
+    // 每天 12:25:百家号作品维度「每日数据」同步(列表分页 + 逐条趋势),写入 works.yesterday_* 与 work_day_statistics
+    this.scheduleJob('bj-work-daily-import', '25 12 * * *', this.importBaijiahaoWorkDailyStatistics.bind(this));
+
     // 每天 12:30:批量导出视频号“数据中心-各子菜单-增长详情(数据详情)-近30天-下载表格”,导入 user_day_statistics
     this.scheduleJob('wx-video-data-center-import', '30 12 * * *', this.importWeixinVideoDataCenterLast30Days.bind(this));
 
@@ -87,6 +92,7 @@ export class TaskScheduler {
     logger.info('[Scheduler]   - dy-account-overview-import:  daily at 12:10 (10 12 * * *)');
     logger.info('[Scheduler]   - dy-work-statistics-import:  daily at 12:50 (50 12 * * *)');
     logger.info('[Scheduler]   - bj-content-overview-import: daily at 12:20 (20 12 * * *)');
+    logger.info('[Scheduler]   - bj-work-daily-import:       daily at 12:25 (25 12 * * *)');
     logger.info('[Scheduler]   - wx-video-data-center-import: daily at 12:30 (30 12 * * *)');
     logger.info('[Scheduler]   - wx-video-work-statistics-import: daily at 12:35 (35 12 * * *)');
     logger.info('[Scheduler]   - auto-reply-messages: every minute (* * * * *)');
@@ -519,6 +525,23 @@ export class TaskScheduler {
   }
 
   /**
+   * 百家号:作品维度「每日作品数据」→ 写入 works.yesterday_* 与 work_day_statistics
+   */
+  private async importBaijiahaoWorkDailyStatistics(): Promise<void> {
+    if (this.isBjWorkImportRunning) {
+      logger.info('[Scheduler] Baijiahao work daily import is already running, skipping...');
+      return;
+    }
+
+    this.isBjWorkImportRunning = true;
+    try {
+      await BaijiahaoWorkDailyStatisticsImportService.runDailyImport();
+    } finally {
+      this.isBjWorkImportRunning = false;
+    }
+  }
+
+  /**
    * 视频号:数据中心-关注者/视频/图文 的增长详情(近30天)→ 导入 user_day_statistics
    */
   private async importWeixinVideoDataCenterLast30Days(): Promise<void> {

+ 35 - 0
server/src/scripts/run-baijiahao-work-daily-import.ts

@@ -0,0 +1,35 @@
+import { initDatabase } from '../models/index.js';
+import { logger } from '../utils/logger.js';
+import { BaijiahaoWorkDailyStatisticsImportService } from '../services/BaijiahaoWorkDailyStatisticsImportService.js';
+
+/**
+ * 用法:
+ * - 全量:cd server && pnpm exec tsx src/scripts/run-baijiahao-work-daily-import.ts
+ * - 单账号:cd server && pnpm exec tsx src/scripts/run-baijiahao-work-daily-import.ts <accountId>
+ */
+async function main() {
+  try {
+    await initDatabase();
+    const accountIdArg = process.argv[2];
+
+    logger.info('[BJ WorkDaily] Manual run start...');
+    if (accountIdArg) {
+      const accountId = parseInt(accountIdArg, 10);
+      if (isNaN(accountId)) {
+        logger.error('[BJ WorkDaily] accountId 必须是数字');
+        process.exit(1);
+      }
+      await BaijiahaoWorkDailyStatisticsImportService.runDailyImportForAccount(accountId);
+    } else {
+      await BaijiahaoWorkDailyStatisticsImportService.runDailyImport();
+    }
+    logger.info('[BJ WorkDaily] Manual run done.');
+    process.exit(0);
+  } catch (e) {
+    logger.error('[BJ WorkDaily] Manual run failed:', e);
+    process.exit(1);
+  }
+}
+
+void main();
+

+ 15 - 5
server/src/services/BaijiahaoContentOverviewImportService.ts

@@ -235,12 +235,10 @@ function parseBaijiahaoExcel(
       const shareCount = parseChineseNumberLike(safeGet(10));
       if (typeof shareCount === 'number') (obj as any).shareCount = shareCount;
 
-      // 点击率 → cover_click_rate(通常是百分比字符串,原样入库
+      // 点击率 → coverClickRate(不为 0 时加 %
       const clickRateRaw = safeGet(2);
-      if (clickRateRaw !== undefined && clickRateRaw !== null) {
-        const s = String(clickRateRaw).trim();
-        if (s) (obj as any).coverClickRate = s;
-      }
+      const coverClickRate = formatRateWithPercent(clickRateRaw);
+      if (coverClickRate !== '0') (obj as any).coverClickRate = coverClickRate;
 
       // fans_increase 只看作品涨粉量(不再扣除作品脱粉量)
       const inc = parseChineseNumberLike(safeGet(12));
@@ -253,6 +251,18 @@ function parseBaijiahaoExcel(
   return result;
 }
 
+/** 比率:不为 0 时加上 %,为 0 或空返回 '0' */
+function formatRateWithPercent(v: unknown): string {
+  if (v === null || v === undefined) return '0';
+  const s = String(v).trim();
+  if (!s) return '0';
+  const n = Number(s.replace(/,/g, ''));
+  if (!Number.isFinite(n) || n === 0) return '0';
+  if (s.includes('%')) return s;
+  if (n > 0 && n <= 1) return `${(n * 100).toFixed(2)}%`;
+  return `${Number(n.toFixed(2))}%`;
+}
+
 function formatPercentString(input: unknown): string | null {
   if (input === null || input === undefined) return null;
   const s = String(input).trim();

+ 645 - 0
server/src/services/BaijiahaoWorkDailyStatisticsImportService.ts

@@ -0,0 +1,645 @@
+import fs from 'node:fs/promises';
+import path from 'node:path';
+import { chromium, type Browser, type BrowserContext, type Page } from 'playwright';
+import { AppDataSource, PlatformAccount, Work } from '../models/index.js';
+import { logger } from '../utils/logger.js';
+import { WorkDayStatisticsService } from './WorkDayStatisticsService.js';
+import type { ProxyConfig } from '@media-manager/shared';
+import { AccountService } from './AccountService.js';
+
+type PlaywrightCookie = {
+  name: string;
+  value: string;
+  domain?: string;
+  path?: string;
+  url?: string;
+  expires?: number;
+  httpOnly?: boolean;
+  secure?: boolean;
+  sameSite?: 'Lax' | 'None' | 'Strict';
+};
+
+type BjhListType = 'small_video_v2' | 'video' | 'news';
+
+type ArticleListStatisticItem = {
+  article_id?: string;
+  nid?: string;
+  id?: string;
+  title?: string;
+  type?: string;
+  view_count?: number;
+  comment_count?: number;
+  likes_count?: number;
+  collect_count?: number;
+  share_count?: number;
+  rec_count?: number;
+};
+
+type ArticleListStatisticResponse = {
+  errno?: number;
+  errmsg?: string;
+  data?: {
+    count?: string | number;
+    list?: ArticleListStatisticItem[];
+  };
+};
+
+type TrendItem = {
+  event_day?: string; // YYYYMMDD
+  view_count?: string | number;
+  disp_pv?: string | number;
+  likes_count?: string | number;
+  comment_count?: string | number;
+  collect_count?: string | number;
+  share_count?: string | number;
+  cover_ctr?: string | number;
+  completion_ratio?: string | number;
+  avg_duration?: string | number;
+  view_duration?: string | number;
+  fans_add_cnt?: string | number;
+};
+
+type GetTrendDataResponse = {
+  errno?: number;
+  errmsg?: string;
+  data?: {
+    basic_list?: TrendItem[];
+  };
+};
+
+function ensureDir(p: string) {
+  return fs.mkdir(p, { recursive: true });
+}
+
+function parseCookiesFromAccount(cookieData: string | null): PlaywrightCookie[] {
+  if (!cookieData) return [];
+  const raw = cookieData.trim();
+  if (!raw) return [];
+
+  // 1) JSON array
+  if (raw.startsWith('[') || raw.startsWith('{')) {
+    try {
+      const parsed = JSON.parse(raw);
+      const arr = Array.isArray(parsed) ? parsed : (parsed?.cookies ? parsed.cookies : []);
+      if (!Array.isArray(arr)) return [];
+      return arr
+        .map((c: any) => {
+          const name = String(c?.name ?? '').trim();
+          const value = String(c?.value ?? '').trim();
+          if (!name) return null;
+          const domain = c?.domain ? String(c.domain) : undefined;
+          const pathVal = c?.path ? String(c.path) : '/';
+          const url = !domain ? 'https://baijiahao.baidu.com' : undefined;
+          const sameSiteRaw = c?.sameSite;
+          const sameSite =
+            sameSiteRaw === 'Lax' || sameSiteRaw === 'None' || sameSiteRaw === 'Strict'
+              ? sameSiteRaw
+              : undefined;
+
+          return {
+            name,
+            value,
+            domain,
+            path: pathVal,
+            url,
+            expires: typeof c?.expires === 'number' ? c.expires : undefined,
+            httpOnly: typeof c?.httpOnly === 'boolean' ? c.httpOnly : undefined,
+            secure: typeof c?.secure === 'boolean' ? c.secure : undefined,
+            sameSite,
+          } satisfies PlaywrightCookie;
+        })
+        .filter(Boolean) as PlaywrightCookie[];
+    } catch {
+      // fallthrough
+    }
+  }
+
+  // 2) "a=b; c=d"
+  const pairs = raw.split(';').map((p) => p.trim()).filter(Boolean);
+  const cookies: PlaywrightCookie[] = [];
+  for (const p of pairs) {
+    const idx = p.indexOf('=');
+    if (idx <= 0) continue;
+    const name = p.slice(0, idx).trim();
+    const value = p.slice(idx + 1).trim();
+    if (!name) continue;
+    cookies.push({ name, value, url: 'https://baijiahao.baidu.com' });
+  }
+  return cookies;
+}
+
+async function createBrowserForAccount(proxy: ProxyConfig | null): Promise<{ browser: Browser; shouldClose: boolean }> {
+  const headless = true;
+  if (proxy?.enabled) {
+    const server = `${proxy.type}://${proxy.host}:${proxy.port}`;
+    const browser = await chromium.launch({
+      headless,
+      proxy: {
+        server,
+        username: proxy.username,
+        password: proxy.password,
+      },
+      args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-gpu', '--window-size=1920,1080'],
+    });
+    return { browser, shouldClose: true };
+  }
+
+  const browser = await chromium.launch({
+    headless,
+    args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-gpu', '--window-size=1920,1080'],
+  });
+  return { browser, shouldClose: true };
+}
+
+function isJwtLike(v: unknown): v is string {
+  if (!v || typeof v !== 'string') return false;
+  const s = v.trim();
+  if (s.length < 60) return false;
+  const parts = s.split('.');
+  if (parts.length !== 3) return false;
+  return parts.every((p) => /^[A-Za-z0-9_-]+$/.test(p) && p.length > 10);
+}
+
+async function extractTokenFromPage(page: Page): Promise<string> {
+  const token = await page
+    .evaluate(() => {
+      const isJwtLikeInner = (v: any) => {
+        if (!v || typeof v !== 'string') return false;
+        const s = v.trim();
+        if (s.length < 60) return false;
+        const parts = s.split('.');
+        if (parts.length !== 3) return false;
+        return parts.every((p) => /^[A-Za-z0-9_-]+$/.test(p) && p.length > 10);
+      };
+      const pickFromStorage = (storage: Storage) => {
+        try {
+          const keys = Object.keys(storage || {});
+          for (const k of keys) {
+            const v = storage.getItem(k);
+            if (isJwtLikeInner(v)) return v;
+          }
+        } catch {
+          // ignore
+        }
+        return '';
+      };
+
+      let t = pickFromStorage(window.localStorage);
+      if (t) return t;
+      t = pickFromStorage(window.sessionStorage);
+      if (t) return t;
+
+      const meta = document.querySelector('meta[name="token"], meta[name="bjh-token"]');
+      const metaToken = meta && meta.getAttribute('content');
+      if (isJwtLikeInner(metaToken)) return metaToken;
+
+      const candidates = [
+        ((window as any).__INITIAL_STATE__ && (window as any).__INITIAL_STATE__.token) || '',
+        ((window as any).__PRELOADED_STATE__ && (window as any).__PRELOADED_STATE__.token) || '',
+        ((window as any).__NUXT__ && (window as any).__NUXT__.state && (window as any).__NUXT__.state.token) || '',
+      ];
+      for (const c of candidates) {
+        if (isJwtLikeInner(c)) return c;
+      }
+
+      return '';
+    })
+    .catch(() => '');
+
+  if (token && isJwtLike(token)) return token;
+
+  // HTML 兜底
+  const html = await page.content().catch(() => '');
+  const m = html.match(/([A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,})/);
+  if (m?.[1] && isJwtLike(m[1])) return m[1];
+
+  return '';
+}
+
+function toYmd(date: Date): string {
+  const yyyy = date.getFullYear();
+  const mm = String(date.getMonth() + 1).padStart(2, '0');
+  const dd = String(date.getDate()).padStart(2, '0');
+  return `${yyyy}${mm}${dd}`;
+}
+
+function parseYyyyMmDdCompactToDate(day: string): Date | null {
+  const s = String(day || '').trim();
+  const m = s.match(/^(\d{4})(\d{2})(\d{2})$/);
+  if (!m) return null;
+  const d = new Date(Number(m[1]), Number(m[2]) - 1, Number(m[3]));
+  d.setHours(0, 0, 0, 0);
+  return d;
+}
+
+function toInt(v: unknown): number {
+  if (v === null || v === undefined) return 0;
+  if (typeof v === 'number' && Number.isFinite(v)) return Math.floor(v);
+  const s = String(v).trim();
+  if (!s) return 0;
+  const n = Number(s.replace(/,/g, ''));
+  return Number.isFinite(n) ? Math.floor(n) : 0;
+}
+
+function toStr(v: unknown): string {
+  if (v === null || v === undefined) return '0';
+  const s = String(v).trim();
+  return s || '0';
+}
+
+/** 比率:不为 0 时加上 %,为 0 或空返回 '0' */
+function formatRateWithPercent(v: unknown): string {
+  if (v === null || v === undefined) return '0';
+  const s = String(v).trim();
+  if (!s) return '0';
+  const n = Number(s.replace(/,/g, ''));
+  if (!Number.isFinite(n) || n === 0) return '0';
+  if (s.includes('%')) return s;
+  if (n > 0 && n <= 1) return `${(n * 100).toFixed(2)}%`;
+  return `${Number(n.toFixed(2))}%`;
+}
+
+/** 观看时长:保留两位小数 */
+function formatDurationTwoDecimals(v: unknown): string {
+  if (v === null || v === undefined) return '0';
+  const n = Number(String(v).trim().replace(/,/g, ''));
+  if (!Number.isFinite(n)) return '0';
+  return n.toFixed(2);
+}
+
+export class BaijiahaoWorkDailyStatisticsImportService {
+  private accountRepository = AppDataSource.getRepository(PlatformAccount);
+  private workRepository = AppDataSource.getRepository(Work);
+  private workDayStatisticsService = new WorkDayStatisticsService();
+  private accountService = new AccountService();
+
+  private stateDir = path.resolve(process.cwd(), 'tmp', 'baijiahao-storage-state');
+
+  static async runDailyImport(): Promise<void> {
+    const svc = new BaijiahaoWorkDailyStatisticsImportService();
+    await svc.runDailyImportForAllBaijiahaoAccounts();
+  }
+
+  static async runDailyImportForAccount(accountId: number): Promise<void> {
+    const svc = new BaijiahaoWorkDailyStatisticsImportService();
+    const account = await svc.accountRepository.findOne({
+      where: { id: accountId, platform: 'baijiahao' as any },
+    });
+    if (!account) throw new Error(`未找到百家号账号 id=${accountId}`);
+    await svc.importAccountWorkDaily(account);
+  }
+
+  async runDailyImportForAllBaijiahaoAccounts(): Promise<void> {
+    await ensureDir(this.stateDir);
+    const accounts = await this.accountRepository.find({ where: { platform: 'baijiahao' as any } });
+    logger.info(`[BJ WorkDaily] Start. total_accounts=${accounts.length}`);
+    for (const account of accounts) {
+      try {
+        await this.importAccountWorkDaily(account);
+      } catch (e) {
+        logger.error(
+          `[BJ WorkDaily] Account failed. accountId=${account.id} name=${account.accountName || ''}`,
+          e
+        );
+      }
+    }
+    logger.info('[BJ WorkDaily] Done.');
+  }
+
+  private getStatePath(accountId: number) {
+    return path.join(this.stateDir, `${accountId}.json`);
+  }
+
+  private async createContext(
+    account: PlatformAccount,
+    cookies: PlaywrightCookie[]
+  ): Promise<{ context: BrowserContext; browser: Browser; shouldClose: boolean; token: string }> {
+    const { browser, shouldClose } = await createBrowserForAccount(account.proxyConfig);
+
+    const statePath = this.getStatePath(account.id);
+    let hasState = false;
+    try {
+      await fs.access(statePath);
+      hasState = true;
+    } catch {
+      hasState = false;
+    }
+
+    const context = await browser.newContext({
+      viewport: { width: 1920, height: 1080 },
+      locale: 'zh-CN',
+      timezoneId: 'Asia/Shanghai',
+      userAgent:
+        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36 Edg/144.0.0.0',
+      ...(hasState ? { storageState: statePath } : {}),
+    });
+    context.setDefaultTimeout(60_000);
+    if (!hasState) {
+      await context.addCookies(cookies as any);
+    }
+
+    const page = await context.newPage();
+    await page.goto('https://baijiahao.baidu.com/builder/rc/analysiscontent/single', {
+      waitUntil: 'domcontentloaded',
+    });
+    await page.waitForTimeout(1500);
+
+    const token = await extractTokenFromPage(page);
+    if (token) {
+      try {
+        await ensureDir(this.stateDir);
+        await context.storageState({ path: statePath });
+      } catch {
+        // ignore
+      }
+    }
+
+    await page.close().catch(() => undefined);
+    return { context, browser, shouldClose, token };
+  }
+
+  private buildCommonHeaders(token: string): Record<string, string> {
+    const headers: Record<string, string> = {
+      accept: 'application/json, text/plain, */*',
+      'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
+      referer: 'https://baijiahao.baidu.com/builder/rc/analysiscontent/single',
+    };
+    if (token) headers.token = token;
+    return headers;
+  }
+
+  private async fetchArticleListStatisticPage(
+    context: BrowserContext,
+    token: string,
+    params: {
+      startDay: string; // YYYYMMDD
+      endDay: string; // YYYYMMDD
+      type: BjhListType;
+      num: number;
+      count: number;
+    }
+  ): Promise<ArticleListStatisticResponse> {
+    const { startDay, endDay, type, num, count } = params;
+    const url = `https://baijiahao.baidu.com/author/eco/statistics/articleListStatistic?start_day=${startDay}&end_day=${endDay}&type=${type}&num=${num}&count=${count}`;
+    const res = await (context as any).request.get(url, {
+      headers: this.buildCommonHeaders(token),
+    });
+    const json = (await res.json().catch(() => null)) as ArticleListStatisticResponse | null;
+    if (!json) throw new Error(`articleListStatistic json parse failed (http=${res.status()})`);
+    return json;
+  }
+
+  private async fetchTrendData(
+    context: BrowserContext,
+    token: string,
+    nid: string
+  ): Promise<GetTrendDataResponse> {
+    const url = `https://baijiahao.baidu.com/author/eco/statistic/gettrenddata?nid=${encodeURIComponent(
+      nid
+    )}&trend_type=all&data_type=addition`;
+    const res = await (context as any).request.get(url, {
+      headers: this.buildCommonHeaders(token),
+    });
+    const json = (await res.json().catch(() => null)) as GetTrendDataResponse | null;
+    if (!json) throw new Error(`gettrenddata json parse failed (http=${res.status()})`);
+    return json;
+  }
+
+  private isNotLoggedInErrno(errno: unknown): boolean {
+    const n = typeof errno === 'number' ? errno : Number(errno);
+    // 110: 未登录;20040001: 当前用户未登录(你示例里的 errno)
+    return n === 110 || n === 20040001;
+  }
+
+  private isNotLoggedInError(e: unknown): boolean {
+    const err = e as any;
+    if (!err) return false;
+    if (err.code === 'BJH_NOT_LOGGED_IN') return true;
+    const msg = String(err.message || '').toLowerCase();
+    return msg.includes('未登录') || msg.includes('not logged in');
+  }
+
+  private async importAccountWorkDaily(account: PlatformAccount, isRetry = false): Promise<void> {
+    const cookies = parseCookiesFromAccount(account.cookieData);
+    if (!cookies.length) {
+      logger.warn(
+        `[BJ WorkDaily] accountId=${account.id} cookieData 为空或无法解析,跳过`
+      );
+      return;
+    }
+
+    const works = await this.workRepository.find({
+      where: { accountId: account.id, platform: 'baijiahao' as any },
+      select: ['id', 'platformVideoId'],
+    });
+    if (!works.length) {
+      logger.info(
+        `[BJ WorkDaily] accountId=${account.id} 没有 baijiahao 作品,跳过`
+      );
+      return;
+    }
+    const idMap = new Map<string, number>();
+    for (const w of works) {
+      const k = String(w.platformVideoId || '').trim();
+      if (k) idMap.set(k, w.id);
+    }
+
+    let context: BrowserContext | null = null;
+    let browser: Browser | null = null;
+    let shouldClose = false;
+    let token = '';
+
+    try {
+      const created = await this.createContext(account, cookies);
+      context = created.context;
+      browser = created.browser;
+      shouldClose = created.shouldClose;
+      token = created.token;
+
+      if (!token) {
+        throw Object.assign(
+          new Error('未能提取百家号 token(可能未登录)'),
+          { code: 'BJH_NOT_LOGGED_IN' }
+        );
+      }
+
+      // 默认取昨天(中国时区)
+      const now = new Date();
+      const chinaNow = new Date(now.getTime() + 8 * 60 * 60 * 1000);
+      const chinaYesterday = new Date(chinaNow.getTime() - 24 * 60 * 60 * 1000);
+      const endDay = toYmd(chinaYesterday);
+      const startDayDate = new Date(chinaYesterday);
+      startDayDate.setDate(startDayDate.getDate() - 6);
+      const startDay = toYmd(startDayDate);
+
+      const types: BjhListType[] = ['small_video_v2', 'video', 'news'];
+      const pageSize = 10;
+
+      let worksUpdated = 0;
+      let wdsInserted = 0;
+      let wdsUpdated = 0;
+
+      for (const t of types) {
+        let num = 1;
+        let total = 0;
+        while (true) {
+          const body = await this.fetchArticleListStatisticPage(context!, token, {
+            startDay,
+            endDay,
+            type: t,
+            num,
+            count: pageSize,
+          });
+
+          if (this.isNotLoggedInErrno(body.errno)) {
+            const err = new Error(
+              `articleListStatistic errno=${body.errno} 未登录/会话失效`
+            );
+            (err as any).code = 'BJH_NOT_LOGGED_IN';
+            throw err;
+          }
+          if (body.errno !== 0) {
+            throw new Error(
+              `articleListStatistic errno=${body.errno} errmsg=${body.errmsg || ''}`
+            );
+          }
+
+          const list = body.data?.list || [];
+          const countRaw = body.data?.count;
+          total = typeof countRaw === 'string' ? toInt(countRaw) : toInt(countRaw);
+
+          if (!list.length) break;
+
+          // 1) 先把列表汇总写入 works.yesterday_*
+          for (const it of list) {
+            const articleId = String(it.article_id || '').trim();
+            if (!articleId) continue;
+            const workId = idMap.get(articleId);
+            if (!workId) continue;
+
+            const patch: Partial<Work> = {
+              yesterdayPlayCount: toInt(it.view_count),
+              yesterdayCommentCount: toInt(it.comment_count),
+              yesterdayLikeCount: toInt(it.likes_count),
+              yesterdayCollectCount: toInt(it.collect_count),
+              yesterdayShareCount: toInt(it.share_count),
+              // 百家号列表 rec_count → 推荐量
+              yesterdayRecommendCount: toInt(it.rec_count),
+            };
+
+            const r = await this.workRepository.update(workId, patch);
+            if (r.affected && r.affected > 0) worksUpdated += r.affected;
+          }
+
+          // 2) 再逐条拉趋势,把 basic_list 写入 work_day_statistics
+          for (const it of list) {
+            const articleId = String(it.article_id || '').trim();
+            if (!articleId) continue;
+            const workId = idMap.get(articleId);
+            if (!workId) continue;
+
+            const trend = await this.fetchTrendData(context!, token, articleId);
+            if (this.isNotLoggedInErrno(trend.errno)) {
+              const err = new Error(
+                `gettrenddata errno=${trend.errno} 未登录/会话失效`
+              );
+              (err as any).code = 'BJH_NOT_LOGGED_IN';
+              throw err;
+            }
+            if (trend.errno !== 0) {
+              logger.warn(
+                `[BJ WorkDaily] gettrenddata errno=${trend.errno} nid=${articleId} errmsg=${trend.errmsg || ''}`
+              );
+              continue;
+            }
+            const basic = trend.data?.basic_list || [];
+            for (const day of basic) {
+              const d = parseYyyyMmDdCompactToDate(String(day.event_day || ''));
+              if (!d) continue;
+
+              const save = await this.workDayStatisticsService.saveStatisticsForDate(
+                workId,
+                d,
+                {
+                  playCount: toInt(day.view_count),
+                  likeCount: toInt(day.likes_count),
+                  commentCount: toInt(day.comment_count),
+                  collectCount: toInt(day.collect_count),
+                  shareCount: toInt(day.share_count),
+                  // basic_list 目前没有推荐量字段;如果后续有再映射到 recommendCount
+                  fansIncrease: toInt(day.fans_add_cnt),
+                  coverClickRate: formatRateWithPercent(day.cover_ctr),
+                  completionRate: formatRateWithPercent(day.completion_ratio),
+                  avgWatchDuration: formatDurationTwoDecimals(day.avg_duration),
+                  totalWatchDuration: formatDurationTwoDecimals(day.view_duration),
+                }
+              );
+              wdsInserted += save.inserted;
+              wdsUpdated += save.updated;
+            }
+          }
+
+          const fetched = num * pageSize;
+          if (total > 0 && fetched >= total) break;
+          num += 1;
+          if (num > 200) break;
+        }
+      }
+
+      logger.info(
+        `[BJ WorkDaily] accountId=${account.id} done. worksUpdated=${worksUpdated} wdsInserted=${wdsInserted} wdsUpdated=${wdsUpdated} range=${startDay}-${endDay}`
+      );
+    } catch (e) {
+      if (!isRetry && this.isNotLoggedInError(e)) {
+        logger.info(
+          `[BJ WorkDaily] Login expired detected for account ${account.id}, attempting to refresh account...`
+        );
+        try {
+          const refreshResult = await this.accountService.refreshAccount(
+            account.userId,
+            account.id
+          );
+          if (refreshResult.needReLogin) {
+            logger.warn(
+              `[BJ WorkDaily] Account ${account.id} refresh finished but still need re-login, mark as expired.`
+            );
+            await this.accountRepository.update(account.id, {
+              status: 'expired' as any,
+            });
+            return;
+          }
+
+          const refreshed = await this.accountRepository.findOne({
+            where: { id: account.id },
+          });
+          if (!refreshed) {
+            throw new Error('账号刷新后未找到');
+          }
+
+          logger.info(
+            `[BJ WorkDaily] Account ${account.id} refresh success, retry work daily import once...`
+          );
+          await this.importAccountWorkDaily(refreshed, true);
+          return;
+        } catch (refreshError) {
+          logger.error(
+            `[BJ WorkDaily] Account ${account.id} refresh failed:`,
+            refreshError
+          );
+          await this.accountRepository.update(account.id, {
+            status: 'expired' as any,
+          });
+          return;
+        }
+      }
+
+      throw e;
+    } finally {
+      await context?.close().catch(() => undefined);
+      if (shouldClose && browser) {
+        await browser.close().catch(() => undefined);
+      }
+    }
+  }
+}
+

+ 1 - 0
server/src/services/WorkService.ts

@@ -746,6 +746,7 @@ export class WorkService {
       yesterdayCommentCount: work.yesterdayCommentCount,
       yesterdayShareCount: work.yesterdayShareCount,
       yesterdayCollectCount: work.yesterdayCollectCount,
+      yesterdayRecommendCount: (work as any).yesterdayRecommendCount,
       yesterdayFansIncrease: work.yesterdayFansIncrease,
       yesterdayCoverClickRate: work.yesterdayCoverClickRate,
       yesterdayAvgWatchDuration: work.yesterdayAvgWatchDuration,

تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
+ 323 - 0
server/tmp/baijiahao-storage-state/10.json


+ 2 - 0
shared/src/types/work.ts

@@ -32,6 +32,8 @@ export interface Work {
   yesterdayCommentCount?: number;
   yesterdayShareCount?: number;
   yesterdayCollectCount?: number;
+  /** 昨日推荐量 */
+  yesterdayRecommendCount?: number;
   yesterdayFansIncrease?: number;
   yesterdayCoverClickRate?: string;
   yesterdayAvgWatchDuration?: string;

برخی فایل ها در این مقایسه diff نمایش داده نمی شوند زیرا تعداد فایل ها بسیار زیاد است