|
|
@@ -36,37 +36,37 @@ const PLATFORM_API_CONFIG: Record<string, {
|
|
|
// 使用 appinfo 接口检查 Cookie 有效性
|
|
|
checkUrl: 'https://baijiahao.baidu.com/builder/app/appinfo',
|
|
|
isValidResponse: (data: unknown) => {
|
|
|
- const resp = data as {
|
|
|
- errno?: number;
|
|
|
+ const resp = data as {
|
|
|
+ errno?: number;
|
|
|
errmsg?: string;
|
|
|
- data?: {
|
|
|
- user?: {
|
|
|
- name?: string;
|
|
|
+ data?: {
|
|
|
+ user?: {
|
|
|
+ name?: string;
|
|
|
app_id?: string | number;
|
|
|
userid?: number;
|
|
|
status?: string;
|
|
|
- }
|
|
|
- }
|
|
|
+ }
|
|
|
+ }
|
|
|
};
|
|
|
-
|
|
|
+
|
|
|
logger.info(`[Baijiahao] API response: errno=${resp?.errno}, errmsg=${resp?.errmsg}, user.name=${resp?.data?.user?.name}, user.app_id=${resp?.data?.user?.app_id}, user.status=${resp?.data?.user?.status}`);
|
|
|
-
|
|
|
+
|
|
|
// errno 为 0 表示请求成功
|
|
|
const isErrnoOk = resp?.errno === 0;
|
|
|
-
|
|
|
+
|
|
|
// 必须有用户信息(name 或 app_id)
|
|
|
const hasUserInfo = !!(resp?.data?.user?.name || resp?.data?.user?.app_id);
|
|
|
-
|
|
|
+
|
|
|
// 用户状态不能是 'banned' 或其他异常状态
|
|
|
const userStatus = resp?.data?.user?.status;
|
|
|
const isStatusOk = !userStatus || userStatus === 'audit' || userStatus === 'pass' || userStatus === 'active';
|
|
|
-
|
|
|
+
|
|
|
const isValid = isErrnoOk && hasUserInfo && isStatusOk;
|
|
|
-
|
|
|
+
|
|
|
if (!isValid) {
|
|
|
logger.warn(`[Baijiahao] Cookie invalid: errno=${resp?.errno}, hasUserInfo=${hasUserInfo}, status=${userStatus}`);
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
return isValid;
|
|
|
},
|
|
|
},
|
|
|
@@ -132,7 +132,7 @@ class HeadlessBrowserService {
|
|
|
*/
|
|
|
async checkCookieValid(platform: PlatformType, cookies: CookieData[]): Promise<boolean> {
|
|
|
logger.info(`[checkCookieValid] Checking cookie for ${platform}, cookie count: ${cookies.length}`);
|
|
|
-
|
|
|
+
|
|
|
// 优先使用 Python 服务检查(通过浏览器访问后台页面,检测是否被重定向到登录页)
|
|
|
const pythonAvailable = await this.checkPythonServiceAvailable();
|
|
|
if (pythonAvailable) {
|
|
|
@@ -241,10 +241,10 @@ class HeadlessBrowserService {
|
|
|
|
|
|
const data = await response.json();
|
|
|
logger.info(`[API] Raw response for ${platform}:`, JSON.stringify(data).substring(0, 500));
|
|
|
-
|
|
|
+
|
|
|
const isValid = apiConfig.isValidResponse(data);
|
|
|
- const statusCode = (data as { status_code?: number; errno?: number; ret?: { errno?: number } })?.status_code
|
|
|
- ?? (data as { errno?: number })?.errno
|
|
|
+ const statusCode = (data as { status_code?: number; errno?: number; ret?: { errno?: number } })?.status_code
|
|
|
+ ?? (data as { errno?: number })?.errno
|
|
|
?? (data as { ret?: { errno?: number } })?.ret?.errno;
|
|
|
|
|
|
logger.info(`API check cookie for ${platform}: valid=${isValid}, statusCode=${statusCode}`);
|
|
|
@@ -267,7 +267,7 @@ class HeadlessBrowserService {
|
|
|
// 百家号特殊处理:根据 errno 判断
|
|
|
if (platform === 'baijiahao') {
|
|
|
const errno = (data as { errno?: number })?.errno;
|
|
|
-
|
|
|
+
|
|
|
// errno 为 0 表示请求成功,但可能没有用户信息(已在 isValidResponse 中检查)
|
|
|
if (errno === 0) {
|
|
|
// 如果 isValid 为 false,说明虽然请求成功但没有用户信息,可能是 Cookie 无效
|
|
|
@@ -277,7 +277,7 @@ class HeadlessBrowserService {
|
|
|
}
|
|
|
return true;
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
// errno 非 0 表示请求失败,可能是 Cookie 无效
|
|
|
// 常见错误码:
|
|
|
// - 110: 未登录
|
|
|
@@ -441,7 +441,7 @@ class HeadlessBrowserService {
|
|
|
*/
|
|
|
async capturePageScreenshot(platform: PlatformType, cookies: CookieData[]): Promise<string | null> {
|
|
|
const browser = await chromium.launch({ headless: true });
|
|
|
-
|
|
|
+
|
|
|
try {
|
|
|
const context = await browser.newContext({
|
|
|
viewport: { width: 1920, height: 1080 },
|
|
|
@@ -449,41 +449,41 @@ class HeadlessBrowserService {
|
|
|
timezoneId: 'Asia/Shanghai',
|
|
|
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
|
});
|
|
|
-
|
|
|
+
|
|
|
await context.addCookies(cookies);
|
|
|
const page = await context.newPage();
|
|
|
-
|
|
|
+
|
|
|
const config = this.getPlatformConfig(platform);
|
|
|
-
|
|
|
+
|
|
|
logger.info(`[Screenshot] Navigating to ${platform} home page: ${config.homeUrl}`);
|
|
|
-
|
|
|
+
|
|
|
// 访问平台主页
|
|
|
await page.goto(config.homeUrl, {
|
|
|
waitUntil: 'domcontentloaded',
|
|
|
timeout: 30000,
|
|
|
});
|
|
|
-
|
|
|
+
|
|
|
// 等待页面加载
|
|
|
await page.waitForTimeout(3000);
|
|
|
-
|
|
|
+
|
|
|
const url = page.url();
|
|
|
logger.info(`[Screenshot] Current URL: ${url}`);
|
|
|
-
|
|
|
+
|
|
|
// 截图
|
|
|
const screenshotBuffer = await page.screenshot({
|
|
|
type: 'jpeg',
|
|
|
quality: 80,
|
|
|
fullPage: false,
|
|
|
});
|
|
|
-
|
|
|
+
|
|
|
const base64Screenshot = screenshotBuffer.toString('base64');
|
|
|
-
|
|
|
+
|
|
|
await page.close();
|
|
|
await context.close();
|
|
|
await browser.close();
|
|
|
-
|
|
|
+
|
|
|
logger.info(`[Screenshot] Captured screenshot for ${platform}, size: ${Math.round(base64Screenshot.length / 1024)}KB`);
|
|
|
-
|
|
|
+
|
|
|
return base64Screenshot;
|
|
|
} catch (error) {
|
|
|
logger.error(`[Screenshot] Failed to capture screenshot for ${platform}:`, error);
|
|
|
@@ -578,15 +578,10 @@ class HeadlessBrowserService {
|
|
|
*/
|
|
|
async fetchAccountInfo(platform: PlatformType, cookies: CookieData[]): Promise<AccountInfo> {
|
|
|
logger.info(`[fetchAccountInfo] Starting for platform: ${platform}`);
|
|
|
-
|
|
|
- // 百家号使用直接 API 获取账号信息和作品列表
|
|
|
- if (platform === 'baijiahao') {
|
|
|
- logger.info(`[fetchAccountInfo] Using direct API for baijiahao`);
|
|
|
- return this.fetchBaijiahaoAccountInfoDirectApi(cookies);
|
|
|
- }
|
|
|
|
|
|
// 对于支持的平台,尝试使用 Python API 获取作品列表和账号信息
|
|
|
- const supportedPlatforms: PlatformType[] = ['douyin', 'xiaohongshu', 'kuaishou', 'weixin_video'];
|
|
|
+ // 包括百家号,通过 Python API 执行,逻辑与 Node 服务端保持一致
|
|
|
+ const supportedPlatforms: PlatformType[] = ['douyin', 'xiaohongshu', 'kuaishou', 'weixin_video', 'baijiahao'];
|
|
|
|
|
|
if (supportedPlatforms.includes(platform)) {
|
|
|
const pythonAvailable = await this.checkPythonServiceAvailable();
|
|
|
@@ -1256,7 +1251,7 @@ class HeadlessBrowserService {
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
// 方法2: 通过 .finder-uniq-id 选择器获取
|
|
|
if (!result.finderId) {
|
|
|
const finderUniqIdEl = document.querySelector('.finder-uniq-id');
|
|
|
@@ -1274,7 +1269,7 @@ class HeadlessBrowserService {
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
// 方法3: 从页面文本中正则匹配
|
|
|
if (!result.finderId) {
|
|
|
const bodyText = document.body.innerText || '';
|
|
|
@@ -1291,11 +1286,11 @@ class HeadlessBrowserService {
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
// ===== 2. 获取账号名称 =====
|
|
|
// 优先使用 h2.finder-nickname
|
|
|
- const nicknameEl = document.querySelector('h2.finder-nickname') ||
|
|
|
- document.querySelector('.finder-nickname');
|
|
|
+ const nicknameEl = document.querySelector('h2.finder-nickname') ||
|
|
|
+ document.querySelector('.finder-nickname');
|
|
|
if (nicknameEl) {
|
|
|
const text = nicknameEl.textContent?.trim();
|
|
|
if (text && text.length >= 2 && text.length <= 30) {
|
|
|
@@ -1303,7 +1298,7 @@ class HeadlessBrowserService {
|
|
|
console.log('[WeixinVideo] Found name from .finder-nickname:', result.name);
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
// 备选选择器
|
|
|
if (!result.name) {
|
|
|
const nameSelectors = [
|
|
|
@@ -1329,7 +1324,7 @@ class HeadlessBrowserService {
|
|
|
result.avatar = avatarEl.src;
|
|
|
console.log('[WeixinVideo] Found avatar from img.avatar:', result.avatar);
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
// 备选选择器
|
|
|
if (!result.avatar) {
|
|
|
const avatarSelectors = [
|
|
|
@@ -1368,14 +1363,14 @@ class HeadlessBrowserService {
|
|
|
}
|
|
|
});
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
// 备选:从页面整体文本中匹配
|
|
|
if (result.fans === undefined || result.works === undefined) {
|
|
|
const bodyText = document.body.innerText || '';
|
|
|
-
|
|
|
+
|
|
|
if (result.fans === undefined) {
|
|
|
const fansMatch = bodyText.match(/关注者\s*(\d+(?:\.\d+)?[万wW]?)/) ||
|
|
|
- bodyText.match(/粉丝\s*(\d+(?:\.\d+)?[万wW]?)/);
|
|
|
+ bodyText.match(/粉丝\s*(\d+(?:\.\d+)?[万wW]?)/);
|
|
|
if (fansMatch) {
|
|
|
let count = parseFloat(fansMatch[1]);
|
|
|
if (fansMatch[1].includes('万') || fansMatch[1].toLowerCase().includes('w')) {
|
|
|
@@ -1385,10 +1380,10 @@ class HeadlessBrowserService {
|
|
|
console.log('[WeixinVideo] Found fans from text:', result.fans);
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
if (result.works === undefined) {
|
|
|
const worksMatch = bodyText.match(/视频\s*(\d+)/) ||
|
|
|
- bodyText.match(/作品\s*(\d+)/);
|
|
|
+ bodyText.match(/作品\s*(\d+)/);
|
|
|
if (worksMatch) {
|
|
|
result.works = parseInt(worksMatch[1], 10);
|
|
|
console.log('[WeixinVideo] Found works from text:', result.works);
|
|
|
@@ -1426,7 +1421,7 @@ class HeadlessBrowserService {
|
|
|
// 如果首页没有获取到视频号 ID,尝试访问账号设置页面
|
|
|
if (!finderId || finderId.length < 10) {
|
|
|
logger.info('[WeixinVideo] Finder ID not found on home page, trying account settings page...');
|
|
|
-
|
|
|
+
|
|
|
try {
|
|
|
// 访问账号设置页面
|
|
|
await page.goto('https://channels.weixin.qq.com/platform/account', {
|
|
|
@@ -1434,12 +1429,12 @@ class HeadlessBrowserService {
|
|
|
timeout: 30000,
|
|
|
});
|
|
|
await page.waitForTimeout(2000);
|
|
|
-
|
|
|
+
|
|
|
// 从账号设置页面提取视频号 ID
|
|
|
const settingsData = await page.evaluate(() => {
|
|
|
const result: { finderId?: string; name?: string } = {};
|
|
|
const bodyText = document.body.innerText || '';
|
|
|
-
|
|
|
+
|
|
|
// 尝试多种匹配模式
|
|
|
const patterns = [
|
|
|
/视频号ID[::\s]*([a-zA-Z0-9_]+)/,
|
|
|
@@ -1449,7 +1444,7 @@ class HeadlessBrowserService {
|
|
|
/finder_username[::\s]*([a-zA-Z0-9_]+)/i,
|
|
|
/唯一标识[::\s]*([a-zA-Z0-9_]+)/,
|
|
|
];
|
|
|
-
|
|
|
+
|
|
|
for (const pattern of patterns) {
|
|
|
const match = bodyText.match(pattern);
|
|
|
if (match && match[1]) {
|
|
|
@@ -1458,7 +1453,7 @@ class HeadlessBrowserService {
|
|
|
break;
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
// 从元素中查找
|
|
|
if (!result.finderId) {
|
|
|
const idSelectors = [
|
|
|
@@ -1479,12 +1474,12 @@ class HeadlessBrowserService {
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
return result;
|
|
|
});
|
|
|
-
|
|
|
+
|
|
|
logger.info(`[WeixinVideo] Extracted data from settings page:`, settingsData);
|
|
|
-
|
|
|
+
|
|
|
if (settingsData.finderId) {
|
|
|
finderId = settingsData.finderId;
|
|
|
accountId = `weixin_video_${settingsData.finderId}`;
|
|
|
@@ -1696,7 +1691,7 @@ class HeadlessBrowserService {
|
|
|
const bodyText = await page.textContent('body');
|
|
|
// 匹配小红书号格式:小红书号:xxxxxxx
|
|
|
const xhsIdMatch = bodyText?.match(/小红书号[::]\s*([a-zA-Z0-9_]+)/) ||
|
|
|
- bodyText?.match(/红书号[::]\s*([a-zA-Z0-9_]+)/);
|
|
|
+ bodyText?.match(/红书号[::]\s*([a-zA-Z0-9_]+)/);
|
|
|
if (xhsIdMatch) {
|
|
|
accountId = `xiaohongshu_${xhsIdMatch[1]}`;
|
|
|
logger.info(`[Xiaohongshu] Found 小红书号 from page text: ${accountId}`);
|
|
|
@@ -2085,17 +2080,33 @@ class HeadlessBrowserService {
|
|
|
*/
|
|
|
private async fetchBaijiahaoAccountInfoDirectApi(cookies: CookieData[]): Promise<AccountInfo> {
|
|
|
logger.info(`[Baijiahao API] Fetching account info via direct API...`);
|
|
|
-
|
|
|
- const cookieString = cookies.map(c => `${c.name}=${c.value}`).join('; ');
|
|
|
- const headers = {
|
|
|
- 'Accept': 'application/json, text/plain, */*',
|
|
|
+
|
|
|
+ // 构建 Cookie 字符串,确保格式正确
|
|
|
+ const cookieString = cookies
|
|
|
+ .map(c => `${c.name.trim()}=${c.value.trim()}`)
|
|
|
+ .filter(c => c.includes('=')) // 过滤掉无效的 cookie
|
|
|
+ .join('; ');
|
|
|
+
|
|
|
+ logger.debug(`[Baijiahao API] Cookie string length: ${cookieString.length}, cookie count: ${cookies.length}`);
|
|
|
+
|
|
|
+ const headers: Record<string, string> = {
|
|
|
+ 'Accept': '*/*',
|
|
|
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
|
|
+ 'Accept-Encoding': 'gzip, deflate, br',
|
|
|
+ 'Connection': 'keep-alive',
|
|
|
'Cookie': cookieString,
|
|
|
- 'Referer': 'https://baijiahao.baidu.com/builder/rc/home',
|
|
|
+ // 'Referer': 'https://baijiahao.baidu.com/builder/rc/home',
|
|
|
+ // 'Origin': 'https://baijiahao.baidu.com',
|
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
|
+ // 'Sec-Fetch-Dest': 'empty',
|
|
|
+ // 'Sec-Fetch-Mode': 'cors',
|
|
|
+ // 'Sec-Fetch-Site': 'same-origin',
|
|
|
};
|
|
|
|
|
|
let accountInfo: AccountInfo = this.getDefaultAccountInfo('baijiahao');
|
|
|
+ // 标记哪些字段已成功获取
|
|
|
+ let fansCountFetched = false;
|
|
|
+ let worksCountFetched = false;
|
|
|
|
|
|
try {
|
|
|
// 1. 获取账号基本信息 (appinfo API)
|
|
|
@@ -2104,12 +2115,12 @@ class HeadlessBrowserService {
|
|
|
method: 'GET',
|
|
|
headers,
|
|
|
});
|
|
|
-
|
|
|
+
|
|
|
if (!appInfoResponse.ok) {
|
|
|
logger.error(`[Baijiahao API] appinfo request failed: ${appInfoResponse.status}`);
|
|
|
throw new Error(`appinfo request failed: ${appInfoResponse.status}`);
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
const appInfoData = await appInfoResponse.json() as {
|
|
|
errno?: number;
|
|
|
errmsg?: string;
|
|
|
@@ -2123,26 +2134,26 @@ class HeadlessBrowserService {
|
|
|
};
|
|
|
};
|
|
|
};
|
|
|
-
|
|
|
+
|
|
|
logger.info(`[Baijiahao API] appinfo response: errno=${appInfoData.errno}, errmsg=${appInfoData.errmsg}`);
|
|
|
-
|
|
|
+
|
|
|
if (appInfoData.errno !== 0) {
|
|
|
logger.error(`[Baijiahao API] appinfo API error: errno=${appInfoData.errno}, errmsg=${appInfoData.errmsg}`);
|
|
|
throw new Error(`appinfo API error: ${appInfoData.errmsg || 'Unknown error'}`);
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
if (!appInfoData.data?.user) {
|
|
|
logger.error(`[Baijiahao API] No user data in appinfo response`);
|
|
|
throw new Error('No user data in appinfo response');
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
const user = appInfoData.data.user;
|
|
|
accountInfo.accountId = user.app_id ? `bjh_${user.app_id}` : accountInfo.accountId;
|
|
|
accountInfo.accountName = user.name || accountInfo.accountName;
|
|
|
// 处理头像 URL(可能是相对路径)
|
|
|
if (user.avatar) {
|
|
|
- accountInfo.avatarUrl = user.avatar.startsWith('http')
|
|
|
- ? user.avatar
|
|
|
+ accountInfo.avatarUrl = user.avatar.startsWith('http')
|
|
|
+ ? user.avatar
|
|
|
: `https:${user.avatar}`;
|
|
|
}
|
|
|
logger.info(`[Baijiahao API] Got account info: name=${accountInfo.accountName}, id=${accountInfo.accountId}, avatar=${accountInfo.avatarUrl}`);
|
|
|
@@ -2154,7 +2165,7 @@ class HeadlessBrowserService {
|
|
|
method: 'GET',
|
|
|
headers,
|
|
|
});
|
|
|
-
|
|
|
+
|
|
|
if (growthInfoResponse.ok) {
|
|
|
const growthData = await growthInfoResponse.json() as {
|
|
|
errno?: number;
|
|
|
@@ -2163,12 +2174,18 @@ class HeadlessBrowserService {
|
|
|
total_fans?: number;
|
|
|
};
|
|
|
};
|
|
|
-
|
|
|
+
|
|
|
logger.info(`[Baijiahao API] growth info response: errno=${growthData.errno}`);
|
|
|
-
|
|
|
+
|
|
|
if (growthData.errno === 0 && growthData.data) {
|
|
|
- accountInfo.fansCount = growthData.data.total_fans || 0;
|
|
|
- logger.info(`[Baijiahao API] Got fans count: ${accountInfo.fansCount}`);
|
|
|
+ const fansCount = growthData.data.total_fans;
|
|
|
+ if (fansCount !== undefined && fansCount !== null) {
|
|
|
+ accountInfo.fansCount = fansCount;
|
|
|
+ fansCountFetched = true;
|
|
|
+ logger.info(`[Baijiahao API] Got fans count: ${accountInfo.fansCount}`);
|
|
|
+ } else {
|
|
|
+ logger.warn(`[Baijiahao API] growth info API returned no fans count`);
|
|
|
+ }
|
|
|
} else {
|
|
|
logger.warn(`[Baijiahao API] growth info API error: errno=${growthData.errno}, errmsg=${growthData.errmsg}`);
|
|
|
}
|
|
|
@@ -2182,68 +2199,162 @@ class HeadlessBrowserService {
|
|
|
|
|
|
// 3. 获取作品列表 (分页获取所有作品)
|
|
|
logger.info(`[Baijiahao API] Step 3: Fetching works list...`);
|
|
|
+ setTimeout(() => {
|
|
|
+ console.log('1000ms');
|
|
|
+ }, 1000);
|
|
|
const worksList: WorkItem[] = [];
|
|
|
let currentPage = 1;
|
|
|
const pageSize = 20;
|
|
|
let hasMore = true;
|
|
|
let totalWorks = 0;
|
|
|
+ let worksListError = false;
|
|
|
|
|
|
- while (hasMore) {
|
|
|
+ while (hasMore && !worksListError) {
|
|
|
try {
|
|
|
const listUrl = `https://baijiahao.baidu.com/pcui/article/lists?currentPage=${currentPage}&pageSize=${pageSize}&search=&type=&collection=&startDate=&endDate=&clearBeforeFetch=false&dynamic=0`;
|
|
|
-
|
|
|
+
|
|
|
logger.info(`[Baijiahao API] Fetching works page ${currentPage}...`);
|
|
|
-
|
|
|
+ logger.debug(`[Baijiahao API] Request headers include Cookie: ${!!headers.Cookie}, Cookie length: ${headers.Cookie?.length || 0}`);
|
|
|
+
|
|
|
const listResponse = await fetch(listUrl, {
|
|
|
method: 'GET',
|
|
|
headers,
|
|
|
});
|
|
|
|
|
|
if (!listResponse.ok) {
|
|
|
+ const errorText = await listResponse.text();
|
|
|
logger.warn(`[Baijiahao API] Failed to fetch works list page ${currentPage}: ${listResponse.status}`);
|
|
|
+ logger.warn(`[Baijiahao API] Error response body: ${errorText}`);
|
|
|
break;
|
|
|
}
|
|
|
|
|
|
- const listData = await listResponse.json() as {
|
|
|
+ const responseText = await listResponse.text();
|
|
|
+ logger.info(`[Baijiahao API] ========== Works API Response (Page ${currentPage}) ==========`);
|
|
|
+ logger.info(`[Baijiahao API] Full response: ${responseText}`);
|
|
|
+ logger.info(`[Baijiahao API] ============================================================`);
|
|
|
+
|
|
|
+ const listData = JSON.parse(responseText) as {
|
|
|
errno?: number;
|
|
|
errmsg?: string;
|
|
|
data?: {
|
|
|
list?: Array<{
|
|
|
id?: string;
|
|
|
+ article_id?: string;
|
|
|
title?: string;
|
|
|
- cover_images?: string[];
|
|
|
+ cover_images?: string | string[];
|
|
|
+ created_at?: string;
|
|
|
create_time?: string;
|
|
|
status?: string;
|
|
|
+ read_amount?: number;
|
|
|
read_count?: number;
|
|
|
+ like_amount?: number;
|
|
|
like_count?: number;
|
|
|
+ comment_amount?: number;
|
|
|
comment_count?: number;
|
|
|
+ share_amount?: number;
|
|
|
share_count?: number;
|
|
|
}>;
|
|
|
- total?: number;
|
|
|
+ page?: {
|
|
|
+ currentPage?: number;
|
|
|
+ pageSize?: number;
|
|
|
+ totalCount?: number;
|
|
|
+ totalPage?: number;
|
|
|
+ };
|
|
|
+ total?: number; // 兼容旧格式
|
|
|
};
|
|
|
};
|
|
|
|
|
|
+ // 处理分散认证问题 (errno=10001402),重试一次
|
|
|
+ if (listData.errno === 10001402) {
|
|
|
+ logger.warn(`[Baijiahao API] Dispersed authentication issue (errno=10001402) on page ${currentPage}, retrying after 3 seconds...`);
|
|
|
+ logger.debug(`[Baijiahao API] Request URL: ${listUrl}`);
|
|
|
+ logger.debug(`[Baijiahao API] Cookie header present: ${!!headers.Cookie}, length: ${headers.Cookie?.length || 0}`);
|
|
|
+
|
|
|
+ await new Promise(resolve => setTimeout(resolve, 3000));
|
|
|
+
|
|
|
+ // 重试一次,确保 headers 包含 Cookie
|
|
|
+ const retryResponse = await fetch(listUrl, {
|
|
|
+ method: 'GET',
|
|
|
+ headers: {
|
|
|
+ ...headers,
|
|
|
+ 'Cookie': cookieString, // 确保 Cookie 被正确传递
|
|
|
+ },
|
|
|
+ });
|
|
|
+
|
|
|
+ if (retryResponse.ok) {
|
|
|
+ const retryResponseText = await retryResponse.text();
|
|
|
+ logger.info(`[Baijiahao API] ========== Works API Retry Response (Page ${currentPage}) ==========`);
|
|
|
+ logger.info(`[Baijiahao API] Full retry response: ${retryResponseText}`);
|
|
|
+ logger.info(`[Baijiahao API] ============================================================`);
|
|
|
+
|
|
|
+ const retryData = JSON.parse(retryResponseText) as typeof listData;
|
|
|
+ if (retryData.errno === 0) {
|
|
|
+ logger.info(`[Baijiahao API] Retry successful for page ${currentPage}`);
|
|
|
+ Object.assign(listData, retryData);
|
|
|
+ } else if (retryData.errno === 10001402) {
|
|
|
+ logger.error(`[Baijiahao API] Retry still failed with errno=10001402, cookie may be invalid or expired`);
|
|
|
+ logger.error(`[Baijiahao API] Retry response data: ${JSON.stringify(retryData, null, 2)}`);
|
|
|
+ // 如果重试仍然失败,可能是 Cookie 问题,记录详细信息
|
|
|
+ logger.error(`[Baijiahao API] Cookie info: ${cookieString.substring(0, 200)}...`);
|
|
|
+ // 标记错误,但不完全失败,继续返回已获取的账号信息
|
|
|
+ worksListError = true;
|
|
|
+ logger.warn(`[Baijiahao API] Works list fetch failed, but will return other account info (name, fans count)`);
|
|
|
+ break;
|
|
|
+ } else {
|
|
|
+ logger.warn(`[Baijiahao API] Retry still failed: errno=${retryData.errno}, errmsg=${retryData.errmsg}`);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ logger.warn(`[Baijiahao API] Retry request failed: ${retryResponse.status}`);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
if (listData.errno !== 0) {
|
|
|
logger.warn(`[Baijiahao API] API returned error on page ${currentPage}: errno=${listData.errno}, errmsg=${listData.errmsg}`);
|
|
|
+ logger.warn(`[Baijiahao API] Error response data: ${JSON.stringify(listData, null, 2)}`);
|
|
|
+ // 如果不是 10001402 错误,标记为错误但继续返回其他信息
|
|
|
+ if (listData.errno !== 10001402) {
|
|
|
+ worksListError = true;
|
|
|
+ logger.warn(`[Baijiahao API] Works list fetch failed with errno=${listData.errno}, but will return other account info`);
|
|
|
+ }
|
|
|
break;
|
|
|
}
|
|
|
|
|
|
const list = listData.data?.list || [];
|
|
|
- totalWorks = listData.data?.total || 0;
|
|
|
+ // 优先使用 data.page.totalCount,如果没有则使用 data.total(兼容旧格式)
|
|
|
+ totalWorks = listData.data?.page?.totalCount || listData.data?.total || 0;
|
|
|
logger.info(`[Baijiahao API] Got ${list.length} works on page ${currentPage}, total: ${totalWorks}`);
|
|
|
|
|
|
for (const item of list) {
|
|
|
+ // 处理 cover_images 可能是字符串(JSON)或数组
|
|
|
+ let coverUrl = '';
|
|
|
+ if (item.cover_images) {
|
|
|
+ if (Array.isArray(item.cover_images)) {
|
|
|
+ coverUrl = item.cover_images[0] || '';
|
|
|
+ } else if (typeof item.cover_images === 'string') {
|
|
|
+ try {
|
|
|
+ const coverArray = JSON.parse(item.cover_images);
|
|
|
+ if (Array.isArray(coverArray) && coverArray.length > 0) {
|
|
|
+ coverUrl = typeof coverArray[0] === 'string' ? coverArray[0] : coverArray[0]?.src || coverArray[0]?.ori_src || '';
|
|
|
+ }
|
|
|
+ } catch {
|
|
|
+ coverUrl = item.cover_images;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
worksList.push({
|
|
|
- videoId: item.id || `bjh_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`,
|
|
|
+ videoId: item.id || item.article_id || `bjh_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`,
|
|
|
title: item.title || '',
|
|
|
- coverUrl: item.cover_images?.[0] || '',
|
|
|
+ coverUrl: coverUrl,
|
|
|
duration: '00:00',
|
|
|
- publishTime: item.create_time || new Date().toISOString(),
|
|
|
+ publishTime: item.created_at || item.create_time || new Date().toISOString(),
|
|
|
status: item.status || 'published',
|
|
|
- playCount: item.read_count || 0,
|
|
|
- likeCount: item.like_count || 0,
|
|
|
- commentCount: item.comment_count || 0,
|
|
|
- shareCount: item.share_count || 0,
|
|
|
+ playCount: item.read_amount || item.read_count || 0,
|
|
|
+ likeCount: item.like_amount || item.like_count || 0,
|
|
|
+ commentCount: item.comment_amount || item.comment_count || 0,
|
|
|
+ shareCount: item.share_amount || item.share_count || 0,
|
|
|
});
|
|
|
}
|
|
|
|
|
|
@@ -2266,8 +2377,23 @@ class HeadlessBrowserService {
|
|
|
}
|
|
|
|
|
|
accountInfo.worksList = worksList;
|
|
|
- accountInfo.worksCount = worksList.length;
|
|
|
- logger.info(`[Baijiahao API] Successfully fetched account info: name=${accountInfo.accountName}, fans=${accountInfo.fansCount}, works=${accountInfo.worksCount}`);
|
|
|
+ // 使用 API 返回的 total 字段作为作品总数,而不是已获取的作品列表长度
|
|
|
+ // 因为可能只获取了部分作品(最多 10 页),但 total 是真实的总数
|
|
|
+ if (totalWorks > 0) {
|
|
|
+ accountInfo.worksCount = totalWorks;
|
|
|
+ worksCountFetched = true;
|
|
|
+ } else if (worksList.length > 0) {
|
|
|
+ // 如果 API 没有返回 total,但获取到了作品列表,使用列表长度
|
|
|
+ accountInfo.worksCount = worksList.length;
|
|
|
+ worksCountFetched = true;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (worksListError) {
|
|
|
+ logger.warn(`[Baijiahao API] Works list fetch encountered errors, but returning partial account info`);
|
|
|
+ logger.info(`[Baijiahao API] Account info (partial): name=${accountInfo.accountName}, fans=${accountInfo.fansCount} (fetched: ${fansCountFetched}), works=${accountInfo.worksCount} (fetched: ${worksCountFetched})`);
|
|
|
+ } else {
|
|
|
+ logger.info(`[Baijiahao API] Successfully fetched account info: name=${accountInfo.accountName}, fans=${accountInfo.fansCount} (fetched: ${fansCountFetched}), works=${accountInfo.worksCount} (fetched: ${worksCountFetched}, API total: ${totalWorks}, fetched list: ${worksList.length})`);
|
|
|
+ }
|
|
|
|
|
|
return accountInfo;
|
|
|
} catch (error) {
|
|
|
@@ -3339,7 +3465,7 @@ class HeadlessBrowserService {
|
|
|
// 首先导航到作品管理页面,确保 API 有正确的上下文和权限
|
|
|
const contentManageUrl = 'https://creator.douyin.com/creator-micro/content/manage';
|
|
|
const currentUrl = page.url();
|
|
|
-
|
|
|
+
|
|
|
if (!currentUrl.includes('/content/manage')) {
|
|
|
logger.info(`[DirectAPI] Navigating to content manage page...`);
|
|
|
await page.goto(contentManageUrl, {
|
|
|
@@ -3347,7 +3473,7 @@ class HeadlessBrowserService {
|
|
|
timeout: 30000,
|
|
|
});
|
|
|
await page.waitForTimeout(2000);
|
|
|
-
|
|
|
+
|
|
|
// 检查是否需要登录
|
|
|
const newUrl = page.url();
|
|
|
if (newUrl.includes('login') || newUrl.includes('passport')) {
|
|
|
@@ -3384,7 +3510,7 @@ class HeadlessBrowserService {
|
|
|
// 获取作品数
|
|
|
const awemeList = data?.aweme_list || [];
|
|
|
logger.info(`[DirectAPI] API response: status_code=${data?.status_code}, has_more=${data?.has_more}, max_cursor=${data?.max_cursor}, aweme_list_length=${awemeList.length}`);
|
|
|
-
|
|
|
+
|
|
|
// 检查 API 返回状态
|
|
|
if (data?.status_code !== 0 && data?.status_code !== undefined) {
|
|
|
logger.warn(`[DirectAPI] API returned error status_code: ${data.status_code}`);
|