|
|
@@ -535,7 +535,9 @@ class HeadlessBrowserService {
|
|
|
try {
|
|
|
const accountInfo = await this.fetchAccountInfoWithPlaywright(platform, cookies);
|
|
|
accountInfo.worksList = worksList;
|
|
|
+ // 直接使用 Python API 获取的作品数量(最准确,排除了已删除/私密视频)
|
|
|
accountInfo.worksCount = worksList.length;
|
|
|
+ logger.info(`[fetchAccountInfo] Using Python API works count for ${platform}: ${accountInfo.worksCount}`);
|
|
|
return accountInfo;
|
|
|
} catch (playwrightError) {
|
|
|
logger.warn(`[Playwright] Failed to get account info for ${platform}:`, playwrightError);
|
|
|
@@ -593,6 +595,9 @@ class HeadlessBrowserService {
|
|
|
case 'xiaohongshu':
|
|
|
accountInfo = await this.fetchXiaohongshuAccountInfo(page, context, cookies);
|
|
|
break;
|
|
|
+ case 'weixin_video':
|
|
|
+ accountInfo = await this.fetchWeixinVideoAccountInfo(page, context, cookies);
|
|
|
+ break;
|
|
|
default:
|
|
|
accountInfo = this.getDefaultAccountInfo(platform);
|
|
|
}
|
|
|
@@ -672,10 +677,22 @@ class HeadlessBrowserService {
|
|
|
// 监听 work_list 接口 - 获取作品列表
|
|
|
if (url.includes('/work_list') || url.includes('/janus/douyin/creator/pc/work_list')) {
|
|
|
const data = await response.json();
|
|
|
- if (data?.aweme_list) {
|
|
|
- // 获取总数
|
|
|
- if (data.total !== undefined) {
|
|
|
- capturedData.total = data.total;
|
|
|
+ if (data?.aweme_list && data.aweme_list.length > 0) {
|
|
|
+ // 优先从 author.aweme_count 获取真实的作品数(最准确)
|
|
|
+ const firstAweme = data.aweme_list[0];
|
|
|
+ const authorAwemeCount = firstAweme?.author?.aweme_count;
|
|
|
+ if (authorAwemeCount !== undefined && authorAwemeCount > 0) {
|
|
|
+ capturedData.total = authorAwemeCount;
|
|
|
+ logger.info(`[Douyin API] Using author.aweme_count as works count: ${authorAwemeCount}`);
|
|
|
+ } else {
|
|
|
+ // 备用方案:使用 items 数组长度
|
|
|
+ const itemsCount = data?.items?.length || 0;
|
|
|
+ if (itemsCount > 0) {
|
|
|
+ capturedData.total = (capturedData.total || 0) + itemsCount;
|
|
|
+ } else {
|
|
|
+ // 如果没有 items,使用 aweme_list 长度
|
|
|
+ capturedData.total = (capturedData.total || 0) + data.aweme_list.length;
|
|
|
+ }
|
|
|
}
|
|
|
// 解析作品列表
|
|
|
capturedData.worksList = data.aweme_list.map((aweme: Record<string, unknown>) => {
|
|
|
@@ -697,7 +714,7 @@ class HeadlessBrowserService {
|
|
|
},
|
|
|
};
|
|
|
});
|
|
|
- logger.info(`[Douyin API] work_list: total=${capturedData.total}, items=${capturedData.worksList?.length}`);
|
|
|
+ logger.info(`[Douyin API] work_list: itemsCount=${capturedData.total}, aweme_list_length=${capturedData.worksList?.length}`);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
@@ -943,11 +960,13 @@ class HeadlessBrowserService {
|
|
|
|
|
|
// 通过 API 获取作品列表
|
|
|
logger.info('[Douyin] Fetching works via API...');
|
|
|
- const apiWorks = await this.fetchWorksDirectApi(page);
|
|
|
+ const apiResult = await this.fetchWorksDirectApi(page);
|
|
|
|
|
|
- if (apiWorks.length > 0) {
|
|
|
- worksCount = apiWorks.length;
|
|
|
- worksList = apiWorks.map(w => ({
|
|
|
+ if (apiResult.works.length > 0) {
|
|
|
+ // 使用 items 累计数量作为作品数(apiResult.total 现在是累计的 items.length)
|
|
|
+ // 如果 total 为 0,则使用 works 列表长度
|
|
|
+ worksCount = apiResult.total > 0 ? apiResult.total : apiResult.works.length;
|
|
|
+ worksList = apiResult.works.map(w => ({
|
|
|
videoId: w.awemeId,
|
|
|
title: w.title,
|
|
|
coverUrl: w.coverUrl,
|
|
|
@@ -959,7 +978,7 @@ class HeadlessBrowserService {
|
|
|
commentCount: w.commentCount,
|
|
|
shareCount: 0,
|
|
|
}));
|
|
|
- logger.info(`[Douyin] Got ${worksCount} works from API`);
|
|
|
+ logger.info(`[Douyin] Got ${apiResult.works.length} works from API, total count: ${worksCount}`);
|
|
|
} else if (capturedData.worksList && capturedData.worksList.length > 0) {
|
|
|
// 如果直接 API 调用失败,使用监听到的数据
|
|
|
worksCount = capturedData.total || capturedData.worksList.length;
|
|
|
@@ -1099,6 +1118,180 @@ class HeadlessBrowserService {
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
+ * 获取微信视频号账号信息
|
|
|
+ */
|
|
|
+ private async fetchWeixinVideoAccountInfo(
|
|
|
+ page: Page,
|
|
|
+ _context: BrowserContext,
|
|
|
+ cookies: CookieData[]
|
|
|
+ ): Promise<AccountInfo> {
|
|
|
+ let accountId = `weixin_video_${Date.now()}`;
|
|
|
+ let accountName = '视频号账号';
|
|
|
+ let avatarUrl = '';
|
|
|
+ let fansCount = 0;
|
|
|
+ let worksCount = 0;
|
|
|
+
|
|
|
+ try {
|
|
|
+ // 从 Cookie 中提取用户标识
|
|
|
+ const uinCookie = cookies.find(c => c.name === 'wxuin' || c.name === 'uin');
|
|
|
+ if (uinCookie?.value) {
|
|
|
+ accountId = `weixin_video_${uinCookie.value}`;
|
|
|
+ }
|
|
|
+
|
|
|
+ // 访问视频号创作者平台首页
|
|
|
+ await page.goto('https://channels.weixin.qq.com/platform/home', {
|
|
|
+ waitUntil: 'domcontentloaded',
|
|
|
+ timeout: 30000,
|
|
|
+ });
|
|
|
+
|
|
|
+ await page.waitForTimeout(3000);
|
|
|
+
|
|
|
+ // 检查是否需要登录
|
|
|
+ const currentUrl = page.url();
|
|
|
+ if (currentUrl.includes('login') || currentUrl.includes('passport')) {
|
|
|
+ logger.warn('[WeixinVideo] Cookie expired, needs login');
|
|
|
+ return { accountId, accountName, avatarUrl, fansCount, worksCount };
|
|
|
+ }
|
|
|
+
|
|
|
+ // 从页面提取账号信息
|
|
|
+ const accountData = await page.evaluate(() => {
|
|
|
+ const result: { name?: string; avatar?: string; fans?: number; works?: number; finderId?: string } = {};
|
|
|
+
|
|
|
+ try {
|
|
|
+ // 查找头像 - 视频号创作者平台头像选择器
|
|
|
+ const avatarSelectors = [
|
|
|
+ '.finder-avatar img',
|
|
|
+ '.account-avatar img',
|
|
|
+ '.user-avatar img',
|
|
|
+ '[class*="avatar"] img',
|
|
|
+ '[class*="Avatar"] img',
|
|
|
+ 'img[class*="avatar"]',
|
|
|
+ '.header-user img',
|
|
|
+ '.header img[src*="wx.qlogo"]',
|
|
|
+ '.header img[src*="mmbiz"]',
|
|
|
+ 'img[src*="wx.qlogo"]',
|
|
|
+ 'img[src*="mmbiz.qpic"]',
|
|
|
+ ];
|
|
|
+
|
|
|
+ for (const selector of avatarSelectors) {
|
|
|
+ const el = document.querySelector(selector) as HTMLImageElement;
|
|
|
+ if (el?.src && el.src.startsWith('http')) {
|
|
|
+ result.avatar = el.src;
|
|
|
+ console.log('[WeixinVideo] Found avatar:', el.src);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 查找用户名
|
|
|
+ const nameSelectors = [
|
|
|
+ '.finder-nickname',
|
|
|
+ '.account-name',
|
|
|
+ '.user-name',
|
|
|
+ '[class*="nickname"]',
|
|
|
+ '[class*="userName"]',
|
|
|
+ '[class*="user-name"]',
|
|
|
+ '.header-user-name',
|
|
|
+ 'h2.name',
|
|
|
+ '.name-text',
|
|
|
+ ];
|
|
|
+
|
|
|
+ for (const selector of nameSelectors) {
|
|
|
+ const el = document.querySelector(selector);
|
|
|
+ const text = el?.textContent?.trim();
|
|
|
+ if (text && text.length >= 2 && text.length <= 30) {
|
|
|
+ result.name = text;
|
|
|
+ console.log('[WeixinVideo] Found name:', text);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 查找视频号 ID
|
|
|
+ const bodyText = document.body.innerText || '';
|
|
|
+ const finderIdMatch = bodyText.match(/视频号ID[::]\s*([a-zA-Z0-9_]+)/);
|
|
|
+ if (finderIdMatch) {
|
|
|
+ result.finderId = finderIdMatch[1];
|
|
|
+ }
|
|
|
+
|
|
|
+ // 尝试从页面文本中提取粉丝数和作品数
|
|
|
+ const statsTexts = document.querySelectorAll('[class*="stat"], [class*="count"], [class*="number"]');
|
|
|
+ statsTexts.forEach(el => {
|
|
|
+ const text = el.textContent || '';
|
|
|
+ const parent = el.parentElement?.textContent || '';
|
|
|
+
|
|
|
+ // 粉丝数
|
|
|
+ if (parent.includes('粉丝') || text.includes('粉丝')) {
|
|
|
+ const match = text.match(/(\d+(?:\.\d+)?[万wW]?)/);
|
|
|
+ if (match) {
|
|
|
+ let count = parseFloat(match[1]);
|
|
|
+ if (match[1].includes('万') || match[1].toLowerCase().includes('w')) {
|
|
|
+ count = count * 10000;
|
|
|
+ }
|
|
|
+ result.fans = Math.floor(count);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 作品数
|
|
|
+ if (parent.includes('作品') || parent.includes('视频') || text.includes('作品')) {
|
|
|
+ const match = text.match(/(\d+)/);
|
|
|
+ if (match) {
|
|
|
+ result.works = parseInt(match[1], 10);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ });
|
|
|
+
|
|
|
+ // 备选:遍历页面查找用户名(如果上面没找到)
|
|
|
+ if (!result.name) {
|
|
|
+ const allElements = document.querySelectorAll('span, div, h1, h2, h3');
|
|
|
+ for (const el of allElements) {
|
|
|
+ const text = el.textContent?.trim();
|
|
|
+ const rect = (el as HTMLElement).getBoundingClientRect();
|
|
|
+ // 在页面顶部区域查找可能的用户名
|
|
|
+ if (text && rect.top < 200 && rect.width > 0 &&
|
|
|
+ text.length >= 2 && text.length <= 20 &&
|
|
|
+ /[\u4e00-\u9fa5a-zA-Z]/.test(text) &&
|
|
|
+ !/粉丝|关注|作品|视频|数据|登录|注册|设置|首页/.test(text)) {
|
|
|
+ result.name = text;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ } catch (e) {
|
|
|
+ console.error('[WeixinVideo] Extract error:', e);
|
|
|
+ }
|
|
|
+
|
|
|
+ return result;
|
|
|
+ });
|
|
|
+
|
|
|
+ logger.info(`[WeixinVideo] Extracted account data:`, accountData);
|
|
|
+
|
|
|
+ // 更新账号信息
|
|
|
+ if (accountData.name) {
|
|
|
+ accountName = accountData.name;
|
|
|
+ }
|
|
|
+ if (accountData.avatar) {
|
|
|
+ avatarUrl = accountData.avatar;
|
|
|
+ }
|
|
|
+ if (accountData.fans !== undefined) {
|
|
|
+ fansCount = accountData.fans;
|
|
|
+ }
|
|
|
+ if (accountData.works !== undefined) {
|
|
|
+ worksCount = accountData.works;
|
|
|
+ }
|
|
|
+ if (accountData.finderId) {
|
|
|
+ accountId = `weixin_video_${accountData.finderId}`;
|
|
|
+ }
|
|
|
+
|
|
|
+ logger.info(`[WeixinVideo] Account info: id=${accountId}, name=${accountName}, avatar=${avatarUrl ? 'yes' : 'no'}, fans=${fansCount}`);
|
|
|
+
|
|
|
+ } catch (error) {
|
|
|
+ logger.warn('Failed to fetch WeixinVideo account info:', error);
|
|
|
+ }
|
|
|
+
|
|
|
+ return { accountId, accountName, avatarUrl, fansCount, worksCount };
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
* 获取小红书账号信息 - 通过 API 方式获取
|
|
|
*/
|
|
|
private async fetchXiaohongshuAccountInfo(
|
|
|
@@ -1578,13 +1771,14 @@ class HeadlessBrowserService {
|
|
|
|
|
|
logger.info(`[Xiaohongshu] Fetched ${worksList.length} works via API`);
|
|
|
|
|
|
- // 更新作品数:优先使用从 API tags 获取的总数
|
|
|
- if (totalNotesCount > 0) {
|
|
|
- worksCount = totalNotesCount;
|
|
|
- logger.info(`[Xiaohongshu] Using total notes count from API: ${worksCount}`);
|
|
|
- } else if (worksList.length > 0) {
|
|
|
+ // 更新作品数:直接使用获取到的 notes 数量(更准确)
|
|
|
+ // 只有当 notes 为空时才使用 tags 中的 notes_count
|
|
|
+ if (worksList.length > 0) {
|
|
|
worksCount = worksList.length;
|
|
|
- logger.info(`[Xiaohongshu] Using works list length: ${worksCount}`);
|
|
|
+ logger.info(`[Xiaohongshu] Using actual notes count: ${worksCount}`);
|
|
|
+ } else if (totalNotesCount > 0) {
|
|
|
+ worksCount = totalNotesCount;
|
|
|
+ logger.info(`[Xiaohongshu] Using notes count from tags: ${worksCount}`);
|
|
|
}
|
|
|
} catch (worksError) {
|
|
|
logger.warn('[Xiaohongshu] Failed to fetch works list:', worksError);
|
|
|
@@ -2695,14 +2889,20 @@ class HeadlessBrowserService {
|
|
|
/**
|
|
|
* 直接调用抖音 API 获取作品列表
|
|
|
* 使用新的 work_list 接口,支持分页加载
|
|
|
+ * 返回作品列表和总作品数
|
|
|
+ *
|
|
|
+ * 注意:需要先导航到作品管理页面才能正确调用 API
|
|
|
*/
|
|
|
- private async fetchWorksDirectApi(page: Page): Promise<Array<{
|
|
|
- awemeId: string;
|
|
|
- title: string;
|
|
|
- coverUrl: string;
|
|
|
- commentCount: number;
|
|
|
- createTime?: number;
|
|
|
- }>> {
|
|
|
+ private async fetchWorksDirectApi(page: Page): Promise<{
|
|
|
+ works: Array<{
|
|
|
+ awemeId: string;
|
|
|
+ title: string;
|
|
|
+ coverUrl: string;
|
|
|
+ commentCount: number;
|
|
|
+ createTime?: number;
|
|
|
+ }>;
|
|
|
+ total: number;
|
|
|
+ }> {
|
|
|
const works: Array<{
|
|
|
awemeId: string;
|
|
|
title: string;
|
|
|
@@ -2710,8 +2910,29 @@ class HeadlessBrowserService {
|
|
|
commentCount: number;
|
|
|
createTime?: number;
|
|
|
}> = [];
|
|
|
+ let totalCount = 0; // 从 API 获取的总作品数
|
|
|
|
|
|
try {
|
|
|
+ // 首先导航到作品管理页面,确保 API 有正确的上下文和权限
|
|
|
+ const contentManageUrl = 'https://creator.douyin.com/creator-micro/content/manage';
|
|
|
+ const currentUrl = page.url();
|
|
|
+
|
|
|
+ if (!currentUrl.includes('/content/manage')) {
|
|
|
+ logger.info(`[DirectAPI] Navigating to content manage page...`);
|
|
|
+ await page.goto(contentManageUrl, {
|
|
|
+ waitUntil: 'domcontentloaded',
|
|
|
+ timeout: 30000,
|
|
|
+ });
|
|
|
+ await page.waitForTimeout(2000);
|
|
|
+
|
|
|
+ // 检查是否需要登录
|
|
|
+ const newUrl = page.url();
|
|
|
+ if (newUrl.includes('login') || newUrl.includes('passport')) {
|
|
|
+ logger.warn('[DirectAPI] Not logged in, cannot fetch works');
|
|
|
+ return { works, total: 0 };
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
let hasMore = true;
|
|
|
let maxCursor = 0;
|
|
|
let pageCount = 0;
|
|
|
@@ -2723,9 +2944,9 @@ class HeadlessBrowserService {
|
|
|
|
|
|
const data = await page.evaluate(async (cursor: number) => {
|
|
|
// 使用新的 work_list API 接口
|
|
|
- // status: 不传或传空表示获取全部状态的作品
|
|
|
- // count: 每页获取数量,增加到20减少请求次数
|
|
|
- const url = `https://creator.douyin.com/janus/douyin/creator/pc/work_list?scene=star_atlas&device_platform=android&count=20&max_cursor=${cursor}&cookie_enabled=true&browser_language=zh-CN&browser_platform=Win32&browser_name=Mozilla&browser_online=true&timezone_name=Asia%2FShanghai&aid=1128`;
|
|
|
+ // status: 0 表示获取全部已发布的作品
|
|
|
+ // count: 每页获取数量
|
|
|
+ const url = `https://creator.douyin.com/janus/douyin/creator/pc/work_list?status=0&scene=star_atlas&device_platform=android&count=20&max_cursor=${cursor}&cookie_enabled=true&browser_language=zh-CN&browser_platform=Win32&browser_name=Mozilla&browser_online=true&timezone_name=Asia%2FShanghai&aid=1128`;
|
|
|
|
|
|
const resp = await fetch(url, {
|
|
|
credentials: 'include',
|
|
|
@@ -2737,10 +2958,31 @@ class HeadlessBrowserService {
|
|
|
return resp.json();
|
|
|
}, maxCursor);
|
|
|
|
|
|
- logger.info(`[DirectAPI] API response: has_more=${data?.has_more}, max_cursor=${data?.max_cursor}, aweme_list_length=${data?.aweme_list?.length || 0}`);
|
|
|
+ // 获取作品数
|
|
|
+ const awemeList = data?.aweme_list || [];
|
|
|
+ logger.info(`[DirectAPI] API response: status_code=${data?.status_code}, has_more=${data?.has_more}, max_cursor=${data?.max_cursor}, aweme_list_length=${awemeList.length}`);
|
|
|
+
|
|
|
+ // 检查 API 返回状态
|
|
|
+ if (data?.status_code !== 0 && data?.status_code !== undefined) {
|
|
|
+ logger.warn(`[DirectAPI] API returned error status_code: ${data.status_code}`);
|
|
|
+ // status_code: 8 表示未授权,可能需要重新登录
|
|
|
+ if (data.status_code === 8) {
|
|
|
+ logger.warn('[DirectAPI] status_code 8: Not authorized, may need re-login');
|
|
|
+ }
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ // 优先从第一个作品的 author.aweme_count 获取真实作品数(只在第一页获取)
|
|
|
+ if (pageCount === 1 && awemeList.length > 0) {
|
|
|
+ const firstAweme = awemeList[0];
|
|
|
+ const authorAwemeCount = firstAweme?.author?.aweme_count;
|
|
|
+ if (authorAwemeCount !== undefined && authorAwemeCount > 0) {
|
|
|
+ totalCount = authorAwemeCount;
|
|
|
+ logger.info(`[DirectAPI] Using author.aweme_count as total works: ${totalCount}`);
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
// 解析 aweme_list 中的作品数据
|
|
|
- const awemeList = data?.aweme_list || [];
|
|
|
logger.info(`[DirectAPI] Page ${pageCount}: got ${awemeList.length} works from aweme_list`);
|
|
|
|
|
|
for (const aweme of awemeList) {
|
|
|
@@ -2817,12 +3059,12 @@ class HeadlessBrowserService {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- logger.info(`[DirectAPI] Total fetched ${works.length} works from ${pageCount} pages`);
|
|
|
+ logger.info(`[DirectAPI] Total fetched ${works.length} works from ${pageCount} pages, items count: ${totalCount}`);
|
|
|
} catch (e) {
|
|
|
logger.warn('[DirectAPI] Failed to fetch works:', e);
|
|
|
}
|
|
|
|
|
|
- return works;
|
|
|
+ return { works, total: totalCount };
|
|
|
}
|
|
|
|
|
|
/**
|