|
|
@@ -1048,7 +1048,34 @@ class HeadlessBrowserService {
|
|
|
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
|
});
|
|
|
|
|
|
- await context.addCookies(cookies);
|
|
|
+ // 规范化 cookies 的 sameSite 值,Playwright 只接受 Strict/Lax/None
|
|
|
+ const validSameSiteValues = ["Strict", "Lax", "None"];
|
|
|
+ const normalizedCookies = cookies.map(cookie => {
|
|
|
+ const sameSite = cookie.sameSite as string | undefined;
|
|
|
+ let normalizedSameSite: "Strict" | "Lax" | "None" | undefined = "Lax";
|
|
|
+
|
|
|
+ if (sameSite && validSameSiteValues.includes(sameSite)) {
|
|
|
+ normalizedSameSite = sameSite as "Strict" | "Lax" | "None";
|
|
|
+ } else if (sameSite) {
|
|
|
+ // 尝试大小写不敏感匹配
|
|
|
+ const lowerSameSite = sameSite.toLowerCase();
|
|
|
+ if (lowerSameSite === "strict") normalizedSameSite = "Strict";
|
|
|
+ else if (lowerSameSite === "lax") normalizedSameSite = "Lax";
|
|
|
+ else if (lowerSameSite === "none") normalizedSameSite = "None";
|
|
|
+ else {
|
|
|
+ // 无效值,使用默认 Lax
|
|
|
+ logger.warn("[Cookie] Invalid sameSite value: " + sameSite + ", defaulting to Lax");
|
|
|
+ normalizedSameSite = "Lax";
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return {
|
|
|
+ ...cookie,
|
|
|
+ sameSite: normalizedSameSite
|
|
|
+ };
|
|
|
+ });
|
|
|
+
|
|
|
+ await context.addCookies(normalizedCookies);
|
|
|
const page = await context.newPage();
|
|
|
|
|
|
let accountInfo: AccountInfo;
|
|
|
@@ -1114,6 +1141,11 @@ class HeadlessBrowserService {
|
|
|
short_id?: string; // 短ID
|
|
|
follower_count?: number;
|
|
|
};
|
|
|
+ dataOverview?: {
|
|
|
+ fans_count?: number;
|
|
|
+ total_works?: number;
|
|
|
+ total_play?: number;
|
|
|
+ };
|
|
|
worksList?: Array<{
|
|
|
awemeId: string;
|
|
|
title: string;
|
|
|
@@ -1203,6 +1235,18 @@ class HeadlessBrowserService {
|
|
|
const data = await response.json();
|
|
|
logger.info(`[Douyin API] Captured response from: ${url.split('?')[0]}`);
|
|
|
|
|
|
+ // 处理 data/overview API - 获取总作品数
|
|
|
+ if (url.includes('/data/overview') || url.includes('/creator-micro/data')) {
|
|
|
+ if (data?.data) {
|
|
|
+ capturedData.dataOverview = {
|
|
|
+ fans_count: data.data.fans_count || data.data.follower_count,
|
|
|
+ total_works: data.data.total_item_cnt || data.data.works_count || data.data.video_count,
|
|
|
+ total_play: data.data.total_play_cnt,
|
|
|
+ };
|
|
|
+ logger.info(`[Douyin API] Captured data overview: total_works=${capturedData.dataOverview.total_works}, fans_count=${capturedData.dataOverview.fans_count}`);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
// 尝试多种数据结构
|
|
|
const user = data?.user || data?.data?.user || data?.data || data;
|
|
|
if (user) {
|
|
|
@@ -1252,6 +1296,25 @@ class HeadlessBrowserService {
|
|
|
// 额外等待确保 API 响应被捕获
|
|
|
await page.waitForTimeout(2000);
|
|
|
|
|
|
+ // 访问数据中心页面,触发 data/overview API
|
|
|
+ logger.info('[Douyin] Navigating to data center to trigger data/overview API...');
|
|
|
+ try {
|
|
|
+ await page.goto('https://creator.douyin.com/creator-micro/data-center/operation', {
|
|
|
+ waitUntil: 'domcontentloaded',
|
|
|
+ timeout: 15000,
|
|
|
+ });
|
|
|
+ await page.waitForTimeout(3000);
|
|
|
+
|
|
|
+ // 检查是否获取到 dataOverview
|
|
|
+ if (capturedData.dataOverview?.total_works) {
|
|
|
+ logger.info(`[Douyin] Captured dataOverview from data center: total_works=${capturedData.dataOverview.total_works}`);
|
|
|
+ } else {
|
|
|
+ logger.warn('[Douyin] Failed to capture dataOverview from data center');
|
|
|
+ }
|
|
|
+ } catch (error) {
|
|
|
+ logger.warn('[Douyin] Failed to navigate to data center:', error);
|
|
|
+ }
|
|
|
+
|
|
|
// 检查登录状态 - 如果没有从 API 获取到,通过 URL 判断
|
|
|
if (!isLoggedIn) {
|
|
|
const currentUrl = page.url();
|
|
|
@@ -1433,6 +1496,12 @@ class HeadlessBrowserService {
|
|
|
avatarUrl = capturedData.userInfo?.avatar || accountData.avatar || avatarUrl;
|
|
|
fansCount = capturedData.userInfo?.follower_count || accountData.fans || fansCount;
|
|
|
|
|
|
+ // 优先从 dataOverview 获取作品数(最准确)
|
|
|
+ if (capturedData.dataOverview?.total_works && capturedData.dataOverview.total_works > 0) {
|
|
|
+ worksCount = capturedData.dataOverview.total_works;
|
|
|
+ logger.info(`[Douyin] Using dataOverview.total_works as works count: ${worksCount}`);
|
|
|
+ }
|
|
|
+
|
|
|
// 通过 API 获取作品列表
|
|
|
logger.info('[Douyin] Fetching works via API...');
|
|
|
const apiResult = await this.fetchWorksDirectApi(page);
|
|
|
@@ -1440,9 +1509,14 @@ class HeadlessBrowserService {
|
|
|
logger.info(`[Douyin] fetchWorksDirectApi returned: works.length=${apiResult.works.length}, total=${apiResult.total}`);
|
|
|
|
|
|
if (apiResult.works.length > 0) {
|
|
|
- // 使用 items 累计数量作为作品数(apiResult.total 现在是累计的 items.length)
|
|
|
- // 如果 total 为 0,则使用 works 列表长度
|
|
|
- worksCount = apiResult.total > 0 ? apiResult.total : apiResult.works.length;
|
|
|
+ // 如果之前从 dataOverview 获取到了作品数,优先使用它
|
|
|
+ if (worksCount === 0) {
|
|
|
+ // 使用 API 返回的总数,如果为 0 则使用实际获取到的作品列表长度
|
|
|
+ worksCount = apiResult.total > 0 ? apiResult.total : apiResult.works.length;
|
|
|
+ logger.info(`[Douyin] Using API result as works count: ${worksCount} (total=${apiResult.total}, works.length=${apiResult.works.length})`);
|
|
|
+ } else {
|
|
|
+ logger.info(`[Douyin] Already have works count from dataOverview: ${worksCount}, skipping API result`);
|
|
|
+ }
|
|
|
worksList = apiResult.works.map(w => ({
|
|
|
videoId: w.awemeId,
|
|
|
title: w.title,
|
|
|
@@ -1461,9 +1535,14 @@ class HeadlessBrowserService {
|
|
|
} else {
|
|
|
logger.warn(`[Douyin] fetchWorksDirectApi returned 0 works`);
|
|
|
if (capturedData.worksList && capturedData.worksList.length > 0) {
|
|
|
+ // 如果之前从 dataOverview 获取到了作品数,优先使用它
|
|
|
+ if (worksCount === 0) {
|
|
|
// 如果直接 API 调用失败,使用监听到的数据
|
|
|
logger.info(`[Douyin] Falling back to intercepted API data: ${capturedData.worksList.length} works`);
|
|
|
worksCount = capturedData.total || capturedData.worksList.length;
|
|
|
+ } else {
|
|
|
+ logger.info(`[Douyin] Already have works count from dataOverview: ${worksCount}, skipping intercepted data`);
|
|
|
+ }
|
|
|
worksList = capturedData.worksList.map(w => ({
|
|
|
videoId: w.awemeId,
|
|
|
title: w.title,
|
|
|
@@ -3862,6 +3941,12 @@ class HeadlessBrowserService {
|
|
|
|
|
|
// 获取作品数
|
|
|
const awemeList = data?.aweme_list || [];
|
|
|
+
|
|
|
+ // 记录完整的API响应数据,用于调试
|
|
|
+ if (pageCount === 1) {
|
|
|
+ logger.info(`[DirectAPI] First page API response: ${JSON.stringify(data).substring(0, 1000)}`);
|
|
|
+ }
|
|
|
+
|
|
|
logger.info(`[DirectAPI] API response: status_code=${data?.status_code}, has_more=${data?.has_more}, max_cursor=${data?.max_cursor}, aweme_list_length=${awemeList.length}`);
|
|
|
|
|
|
// 检查 API 返回状态
|
|
|
@@ -3887,13 +3972,43 @@ class HeadlessBrowserService {
|
|
|
}
|
|
|
|
|
|
// 优先从第一个作品的 author.aweme_count 获取真实作品数(只在第一页获取)
|
|
|
- if (pageCount === 1 && awemeList.length > 0) {
|
|
|
+ if (pageCount === 1) {
|
|
|
const firstAweme = awemeList[0];
|
|
|
- const authorAwemeCount = firstAweme?.author?.aweme_count;
|
|
|
- if (authorAwemeCount !== undefined && authorAwemeCount > 0) {
|
|
|
- totalCount = authorAwemeCount;
|
|
|
+
|
|
|
+ // 方案1: 从 author.aweme_count 获取(最准确)
|
|
|
+ if (awemeList.length > 0 && firstAweme?.author?.aweme_count !== undefined && firstAweme.author.aweme_count > 0) {
|
|
|
+ totalCount = firstAweme.author.aweme_count;
|
|
|
logger.info(`[DirectAPI] Using author.aweme_count as total works: ${totalCount}`);
|
|
|
}
|
|
|
+
|
|
|
+ // 方案2: 如果 author.aweme_count 不存在,尝试从 API 响应的 total 字段获取
|
|
|
+ if (totalCount === 0 && data?.total !== undefined && data.total > 0) {
|
|
|
+ totalCount = data.total;
|
|
|
+ logger.info(`[DirectAPI] Using API response total field: ${totalCount}`);
|
|
|
+ }
|
|
|
+
|
|
|
+ // 方案3: 如果前两个都没有,尝试从 author 字段的其他属性获取
|
|
|
+ if (totalCount === 0 && awemeList.length > 0 && firstAweme?.author) {
|
|
|
+ const author = firstAweme.author;
|
|
|
+ const possibleCountFields = ['aweme_count', 'work_count', 'video_count', 'item_count'];
|
|
|
+ for (const field of possibleCountFields) {
|
|
|
+ if (author[field] !== undefined && author[field] > 0) {
|
|
|
+ totalCount = author[field];
|
|
|
+ logger.info(`[DirectAPI] Using author.${field} as total works: ${totalCount}`);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 方案4: 如果以上都没有,记录完整的 author 对象以便调试
|
|
|
+ if (totalCount === 0 && awemeList.length > 0 && firstAweme?.author) {
|
|
|
+ logger.warn(`[DirectAPI] Could not find total works count. Author object: ${JSON.stringify(firstAweme.author).substring(0, 300)}`);
|
|
|
+ }
|
|
|
+
|
|
|
+ // 方案5: 如果第一页没有数据,也记录完整响应以便调试
|
|
|
+ if (totalCount === 0 && awemeList.length === 0) {
|
|
|
+ logger.warn(`[DirectAPI] First page returned 0 works. Full response: ${JSON.stringify(data).substring(0, 500)}`);
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
// 解析 aweme_list 中的作品数据
|