Selaa lähdekoodia

Update HeadlessBrowserService to enhance API request handling and logging

- Modified the work_list API request to increase the number of items fetched per page from 12 to 20, reducing the number of requests.
- Improved logging to provide detailed information about API responses and pagination status.
- Enhanced cursor management logic to ensure accurate tracking of data retrieval and pagination.
Ethanfly 14 tuntia sitten
vanhempi
commit
75f5be9922

BIN
server/python/platforms/__pycache__/douyin.cpython-313.pyc


+ 31 - 11
server/src/services/HeadlessBrowserService.ts

@@ -2353,7 +2353,9 @@ class HeadlessBrowserService {
 
         const data = await page.evaluate(async (cursor: number) => {
           // 使用新的 work_list API 接口
-          const url = `https://creator.douyin.com/janus/douyin/creator/pc/work_list?scene=star_atlas&device_platform=android&status=0&count=12&max_cursor=${cursor}&cookie_enabled=true&browser_language=zh-CN&browser_platform=Win32&browser_name=Mozilla&browser_online=true&timezone_name=Asia%2FShanghai&aid=1128`;
+          // status: 不传或传空表示获取全部状态的作品
+          // count: 每页获取数量,增加到20减少请求次数
+          const url = `https://creator.douyin.com/janus/douyin/creator/pc/work_list?scene=star_atlas&device_platform=android&count=20&max_cursor=${cursor}&cookie_enabled=true&browser_language=zh-CN&browser_platform=Win32&browser_name=Mozilla&browser_online=true&timezone_name=Asia%2FShanghai&aid=1128`;
 
           const resp = await fetch(url, {
             credentials: 'include',
@@ -2365,6 +2367,8 @@ class HeadlessBrowserService {
           return resp.json();
         }, maxCursor);
 
+        logger.info(`[DirectAPI] API response: has_more=${data?.has_more}, max_cursor=${data?.max_cursor}, aweme_list_length=${data?.aweme_list?.length || 0}`);
+
         // 解析 aweme_list 中的作品数据
         const awemeList = data?.aweme_list || [];
         logger.info(`[DirectAPI] Page ${pageCount}: got ${awemeList.length} works from aweme_list`);
@@ -2403,24 +2407,40 @@ class HeadlessBrowserService {
         }
 
         // 检查是否有更多数据
-        hasMore = data?.has_more === true;
-
-        // 更新游标:使用返回的 max_cursor 或基于最后一个作品的 create_time
-        if (data?.max_cursor) {
-          maxCursor = data.max_cursor;
+        // 注意: 抖音 API 返回的 has_more 可能不准确,我们额外判断
+        const apiHasMore = data?.has_more === true || data?.has_more === 1;
+
+        // 更新游标:使用返回的 max_cursor
+        if (data?.max_cursor !== undefined && data?.max_cursor !== null) {
+          // 只有当 max_cursor 有变化时才继续
+          if (data.max_cursor !== maxCursor) {
+            maxCursor = data.max_cursor;
+            hasMore = apiHasMore && awemeList.length > 0;
+          } else {
+            // max_cursor 没变化,说明到底了
+            hasMore = false;
+          }
         } else if (awemeList.length > 0) {
           // 如果没有 max_cursor,使用最后一个作品的 create_time 作为游标
           const lastAweme = awemeList[awemeList.length - 1];
           if (lastAweme.create_time) {
-            maxCursor = lastAweme.create_time * 1000; // 转换为毫秒
+            const newCursor = lastAweme.create_time;
+            if (newCursor !== maxCursor) {
+              maxCursor = newCursor;
+              hasMore = apiHasMore;
+            } else {
+              hasMore = false;
+            }
+          } else {
+            hasMore = false;
           }
-        }
-
-        // 如果没有获取到数据,停止循环
-        if (awemeList.length === 0) {
+        } else {
+          // 没有获取到数据,停止循环
           hasMore = false;
         }
 
+        logger.info(`[DirectAPI] Page ${pageCount} result: got ${awemeList.length} works, hasMore=${hasMore}, nextCursor=${maxCursor}`);
+
         // 稍微延迟,避免请求过快
         if (hasMore) {
           await new Promise(resolve => setTimeout(resolve, 500));