|
|
@@ -693,28 +693,96 @@ class XiaohongshuPublisher(BasePublisher):
|
|
|
print(f"[{self.platform_name}] 当前页面: {current_url}", flush=True)
|
|
|
if "login" in current_url:
|
|
|
raise Exception("Cookie 已过期,请重新登录")
|
|
|
+
|
|
|
+ # 等待页面完全加载,确保签名函数可用
|
|
|
+ print(f"[{self.platform_name}] 等待页面完全加载和签名函数初始化...", flush=True)
|
|
|
+ await asyncio.sleep(3)
|
|
|
+
|
|
|
+ # 检查签名函数是否可用
|
|
|
+ sign_check_attempts = 0
|
|
|
+ max_sign_check_attempts = 10
|
|
|
+ while sign_check_attempts < max_sign_check_attempts:
|
|
|
+ sign_available = await self.page.evaluate("""() => {
|
|
|
+ return typeof window !== 'undefined' && typeof window._webmsxyw === 'function';
|
|
|
+ }""")
|
|
|
+ if sign_available:
|
|
|
+ print(f"[{self.platform_name}] ✓ 签名函数 _webmsxyw 已可用", flush=True)
|
|
|
+ break
|
|
|
+ sign_check_attempts += 1
|
|
|
+ print(f"[{self.platform_name}] ⏳ 等待签名函数... ({sign_check_attempts}/{max_sign_check_attempts})", flush=True)
|
|
|
+ await asyncio.sleep(1)
|
|
|
+
|
|
|
+ if sign_check_attempts >= max_sign_check_attempts:
|
|
|
+ print(f"[{self.platform_name}] ⚠️ 警告: 签名函数 _webmsxyw 在 {max_sign_check_attempts} 次检查后仍不可用", flush=True)
|
|
|
+ print(f"[{self.platform_name}] 继续尝试,但 API 调用可能会失败", flush=True)
|
|
|
|
|
|
async def fetch_notes_page(p):
|
|
|
+ # 再次检查签名函数(每次调用前都检查)
|
|
|
+ sign_available = await self.page.evaluate("""() => {
|
|
|
+ return typeof window !== 'undefined' && typeof window._webmsxyw === 'function';
|
|
|
+ }""")
|
|
|
+
|
|
|
+ if not sign_available:
|
|
|
+ print(f"[{self.platform_name}] ⚠️ 签名函数 _webmsxyw 不可用,等待...", flush=True)
|
|
|
+ await asyncio.sleep(2)
|
|
|
+
|
|
|
return await self.page.evaluate(
|
|
|
"""async (pageNum) => {
|
|
|
try {
|
|
|
- const url = `https://edith.xiaohongshu.com/web_api/sns/v5/creator/note/user/posted?tab=0&page=${pageNum}`;
|
|
|
- const headers = { 'Accept': 'application/json' };
|
|
|
+ // 使用正确的 API 端点:/api/galaxy/v2/creator/note/user/posted
|
|
|
+ const url = `/api/galaxy/v2/creator/note/user/posted?tab=0&page=${pageNum}`;
|
|
|
+ const headers = {
|
|
|
+ 'Accept': 'application/json, text/plain, */*',
|
|
|
+ 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
|
|
|
+ 'Referer': 'https://creator.xiaohongshu.com/new/note-manager',
|
|
|
+ 'Sec-Fetch-Dest': 'empty',
|
|
|
+ 'Sec-Fetch-Mode': 'cors',
|
|
|
+ 'Sec-Fetch-Site': 'same-origin'
|
|
|
+ };
|
|
|
+
|
|
|
+ // 尝试获取签名
|
|
|
+ let signResult = { hasSign: false, x_s: '', x_t: '', x_s_common: '', error: '' };
|
|
|
if (typeof window !== 'undefined' && typeof window._webmsxyw === 'function') {
|
|
|
try {
|
|
|
const sign = window._webmsxyw(url, '');
|
|
|
headers['x-s'] = sign['X-s'];
|
|
|
headers['x-t'] = String(sign['X-t']);
|
|
|
+ // 检查是否有 x-s-common
|
|
|
+ if (sign['X-s-common']) {
|
|
|
+ headers['x-s-common'] = sign['X-s-common'];
|
|
|
+ }
|
|
|
+ signResult = {
|
|
|
+ hasSign: true,
|
|
|
+ x_s: sign['X-s'] ? sign['X-s'].substring(0, 50) + '...' : '',
|
|
|
+ x_t: String(sign['X-t']),
|
|
|
+ x_s_common: sign['X-s-common'] ? sign['X-s-common'].substring(0, 50) + '...' : '',
|
|
|
+ error: ''
|
|
|
+ };
|
|
|
+ console.log('签名生成成功:', signResult);
|
|
|
} catch (e) {
|
|
|
- // ignore sign errors and fallback
|
|
|
+ signResult.error = e.toString();
|
|
|
+ console.error('签名生成失败:', e);
|
|
|
}
|
|
|
+ } else {
|
|
|
+ signResult.error = '_webmsxyw function not found';
|
|
|
+ console.error('签名函数不存在');
|
|
|
}
|
|
|
+
|
|
|
const res = await fetch(url, {
|
|
|
method: 'GET',
|
|
|
credentials: 'include',
|
|
|
headers
|
|
|
});
|
|
|
- return await res.json();
|
|
|
+
|
|
|
+ const responseData = await res.json();
|
|
|
+ return {
|
|
|
+ ...responseData,
|
|
|
+ _debug: {
|
|
|
+ signResult: signResult,
|
|
|
+ status: res.status,
|
|
|
+ statusText: res.statusText
|
|
|
+ }
|
|
|
+ };
|
|
|
} catch (e) {
|
|
|
return { success: false, error: e.toString() };
|
|
|
}
|
|
|
@@ -765,13 +833,30 @@ class XiaohongshuPublisher(BasePublisher):
|
|
|
resp = None
|
|
|
for attempt in range(1, 4):
|
|
|
resp = await fetch_notes_page(page)
|
|
|
+
|
|
|
+ # 打印调试信息
|
|
|
+ if resp and isinstance(resp, dict) and resp.get('_debug'):
|
|
|
+ debug_info = resp.get('_debug', {})
|
|
|
+ sign_result = debug_info.get('signResult', {})
|
|
|
+ print(f"[{self.platform_name}] 🔍 调试信息: 签名可用: {sign_result.get('hasSign', False)}, X-S: {sign_result.get('x_s', '')}, X-T: {sign_result.get('x_t', '')}, X-S-Common: {sign_result.get('x_s_common', '')}, 签名错误: {sign_result.get('error', '')}, HTTP 状态: {debug_info.get('status', 'N/A')}", flush=True)
|
|
|
+ resp.pop('_debug', None)
|
|
|
+
|
|
|
if resp and (resp.get('success') or resp.get('code') == 0) and resp.get('data'):
|
|
|
break
|
|
|
print(f"[{self.platform_name}] 拉取作品列表失败,重试 {attempt}/3: {str(resp)[:200]}", flush=True)
|
|
|
await asyncio.sleep(1.2 * attempt)
|
|
|
|
|
|
if not resp or not (resp.get('success') or resp.get('code') == 0) or not resp.get('data'):
|
|
|
- raise Exception(f"无法获取作品列表数据: {resp.get('msg') if isinstance(resp, dict) else resp}")
|
|
|
+ error_msg = resp.get('msg') if isinstance(resp, dict) else str(resp)
|
|
|
+ # 打印详细的错误信息
|
|
|
+ if isinstance(resp, dict):
|
|
|
+ if resp.get('msg'):
|
|
|
+ print(f"[{self.platform_name}] 错误消息: {resp.get('msg')}", flush=True)
|
|
|
+ if resp.get('message'):
|
|
|
+ print(f"[{self.platform_name}] 错误消息: {resp.get('message')}", flush=True)
|
|
|
+ if resp.get('error'):
|
|
|
+ print(f"[{self.platform_name}] 错误: {resp.get('error')}", flush=True)
|
|
|
+ raise Exception(f"无法获取作品列表数据: {error_msg}")
|
|
|
|
|
|
data = resp.get('data', {}) or {}
|
|
|
notes = data.get('notes', []) or []
|
|
|
@@ -858,36 +943,112 @@ class XiaohongshuPublisher(BasePublisher):
|
|
|
|
|
|
print(f"[{self.platform_name}] 访问笔记管理页面...", flush=True)
|
|
|
try:
|
|
|
- await self.page.goto("https://creator.xiaohongshu.com/new/note-manager", wait_until="domcontentloaded", timeout=30000)
|
|
|
+ await self.page.goto("https://creator.xiaohongshu.com/new/note-manager", wait_until="domcontentloaded", timeout=60000)
|
|
|
+ print(f"[{self.platform_name}] 页面加载成功", flush=True)
|
|
|
except Exception as nav_error:
|
|
|
print(f"[{self.platform_name}] 导航超时,但继续尝试: {nav_error}", flush=True)
|
|
|
+ # 即使超时也检查当前页面状态
|
|
|
+ try:
|
|
|
+ await asyncio.sleep(2)
|
|
|
+ current_url = self.page.url
|
|
|
+ print(f"[{self.platform_name}] 超时后当前页面: {current_url}", flush=True)
|
|
|
+ except Exception as e:
|
|
|
+ print(f"[{self.platform_name}] 检查页面状态时出错: {e}", flush=True)
|
|
|
|
|
|
current_url = self.page.url
|
|
|
print(f"[{self.platform_name}] 当前页面: {current_url}", flush=True)
|
|
|
if "login" in current_url:
|
|
|
raise Exception("Cookie 已过期,请重新登录")
|
|
|
+
|
|
|
+ # 等待页面完全加载,确保签名函数可用
|
|
|
+ print(f"[{self.platform_name}] 等待页面完全加载和签名函数初始化...", flush=True)
|
|
|
+ await asyncio.sleep(3)
|
|
|
+
|
|
|
+ # 检查签名函数是否可用
|
|
|
+ sign_check_attempts = 0
|
|
|
+ max_sign_check_attempts = 10
|
|
|
+ while sign_check_attempts < max_sign_check_attempts:
|
|
|
+ sign_available = await self.page.evaluate("""() => {
|
|
|
+ return typeof window !== 'undefined' && typeof window._webmsxyw === 'function';
|
|
|
+ }""")
|
|
|
+ if sign_available:
|
|
|
+ print(f"[{self.platform_name}] ✓ 签名函数 _webmsxyw 已可用", flush=True)
|
|
|
+ break
|
|
|
+ sign_check_attempts += 1
|
|
|
+ print(f"[{self.platform_name}] ⏳ 等待签名函数... ({sign_check_attempts}/{max_sign_check_attempts})", flush=True)
|
|
|
+ await asyncio.sleep(1)
|
|
|
+
|
|
|
+ if sign_check_attempts >= max_sign_check_attempts:
|
|
|
+ print(f"[{self.platform_name}] ⚠️ 警告: 签名函数 _webmsxyw 在 {max_sign_check_attempts} 次检查后仍不可用", flush=True)
|
|
|
+ print(f"[{self.platform_name}] 继续尝试,但 API 调用可能会失败", flush=True)
|
|
|
|
|
|
async def fetch_notes_page(p):
|
|
|
+ # 再次检查签名函数(每次调用前都检查)
|
|
|
+ sign_available = await self.page.evaluate("""() => {
|
|
|
+ return typeof window !== 'undefined' && typeof window._webmsxyw === 'function';
|
|
|
+ }""")
|
|
|
+
|
|
|
+ if not sign_available:
|
|
|
+ print(f"[{self.platform_name}] ⚠️ 签名函数 _webmsxyw 不可用,等待...", flush=True)
|
|
|
+ await asyncio.sleep(2)
|
|
|
+
|
|
|
return await self.page.evaluate(
|
|
|
"""async (pageNum) => {
|
|
|
try {
|
|
|
- const url = `https://edith.xiaohongshu.com/web_api/sns/v5/creator/note/user/posted?tab=0&page=${pageNum}`;
|
|
|
- const headers = { 'Accept': 'application/json' };
|
|
|
+ // 使用正确的 API 端点:/api/galaxy/v2/creator/note/user/posted
|
|
|
+ const url = `/api/galaxy/v2/creator/note/user/posted?tab=0&page=${pageNum}`;
|
|
|
+ const headers = {
|
|
|
+ 'Accept': 'application/json, text/plain, */*',
|
|
|
+ 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
|
|
|
+ 'Referer': 'https://creator.xiaohongshu.com/new/note-manager',
|
|
|
+ 'Sec-Fetch-Dest': 'empty',
|
|
|
+ 'Sec-Fetch-Mode': 'cors',
|
|
|
+ 'Sec-Fetch-Site': 'same-origin'
|
|
|
+ };
|
|
|
+
|
|
|
+ // 尝试获取签名
|
|
|
+ let signResult = { hasSign: false, x_s: '', x_t: '', x_s_common: '', error: '' };
|
|
|
if (typeof window !== 'undefined' && typeof window._webmsxyw === 'function') {
|
|
|
try {
|
|
|
const sign = window._webmsxyw(url, '');
|
|
|
headers['x-s'] = sign['X-s'];
|
|
|
headers['x-t'] = String(sign['X-t']);
|
|
|
+ // 检查是否有 x-s-common
|
|
|
+ if (sign['X-s-common']) {
|
|
|
+ headers['x-s-common'] = sign['X-s-common'];
|
|
|
+ }
|
|
|
+ signResult = {
|
|
|
+ hasSign: true,
|
|
|
+ x_s: sign['X-s'] ? sign['X-s'].substring(0, 50) + '...' : '',
|
|
|
+ x_t: String(sign['X-t']),
|
|
|
+ x_s_common: sign['X-s-common'] ? sign['X-s-common'].substring(0, 50) + '...' : '',
|
|
|
+ error: ''
|
|
|
+ };
|
|
|
+ console.log('签名生成成功:', signResult);
|
|
|
} catch (e) {
|
|
|
- // ignore sign errors and fallback
|
|
|
+ signResult.error = e.toString();
|
|
|
+ console.error('签名生成失败:', e);
|
|
|
}
|
|
|
+ } else {
|
|
|
+ signResult.error = '_webmsxyw function not found';
|
|
|
+ console.error('签名函数不存在');
|
|
|
}
|
|
|
+
|
|
|
const res = await fetch(url, {
|
|
|
method: 'GET',
|
|
|
credentials: 'include',
|
|
|
headers
|
|
|
});
|
|
|
- return await res.json();
|
|
|
+
|
|
|
+ const responseData = await res.json();
|
|
|
+ return {
|
|
|
+ ...responseData,
|
|
|
+ _debug: {
|
|
|
+ signResult: signResult,
|
|
|
+ status: res.status,
|
|
|
+ statusText: res.statusText
|
|
|
+ }
|
|
|
+ };
|
|
|
} catch (e) {
|
|
|
return { success: false, error: e.toString() };
|
|
|
}
|
|
|
@@ -945,7 +1106,7 @@ class XiaohongshuPublisher(BasePublisher):
|
|
|
async def handle_response(response):
|
|
|
nonlocal captured_total
|
|
|
url = response.url
|
|
|
- if "edith.xiaohongshu.com" not in url or "creator/note/user/posted" not in url:
|
|
|
+ if ("creator.xiaohongshu.com" not in url and "edith.xiaohongshu.com" not in url) or "creator/note/user/posted" not in url:
|
|
|
return
|
|
|
try:
|
|
|
json_data = await response.json()
|
|
|
@@ -1000,9 +1161,22 @@ class XiaohongshuPublisher(BasePublisher):
|
|
|
|
|
|
try:
|
|
|
try:
|
|
|
- await self.page.goto("https://creator.xiaohongshu.com/new/note-manager", wait_until="networkidle", timeout=60000)
|
|
|
+ # 使用更宽松的等待条件,避免超时
|
|
|
+ await self.page.goto("https://creator.xiaohongshu.com/new/note-manager", wait_until="domcontentloaded", timeout=90000)
|
|
|
+ print(f"[{self.platform_name}] 页面加载成功", flush=True)
|
|
|
except Exception as nav_error:
|
|
|
print(f"[{self.platform_name}] 导航异常(继续):{nav_error}", flush=True)
|
|
|
+ # 即使超时也继续尝试,可能页面已经部分加载
|
|
|
+ try:
|
|
|
+ await asyncio.sleep(3)
|
|
|
+ current_url = self.page.url
|
|
|
+ print(f"[{self.platform_name}] 超时后当前页面: {current_url}", flush=True)
|
|
|
+ if "login" in current_url:
|
|
|
+ raise Exception("Cookie 已过期,请重新登录")
|
|
|
+ except Exception as e:
|
|
|
+ if "Cookie" in str(e):
|
|
|
+ raise
|
|
|
+ print(f"[{self.platform_name}] 检查页面状态时出错: {e}", flush=True)
|
|
|
|
|
|
await asyncio.sleep(2.0)
|
|
|
|
|
|
@@ -1085,25 +1259,95 @@ class XiaohongshuPublisher(BasePublisher):
|
|
|
except Exception:
|
|
|
pass
|
|
|
|
|
|
+ # 添加请求监听,捕获请求头信息
|
|
|
+ captured_requests = []
|
|
|
+ async def handle_request(request):
|
|
|
+ url = request.url
|
|
|
+ if ("creator.xiaohongshu.com" in url or "edith.xiaohongshu.com" in url) and "creator/note/user/posted" in url:
|
|
|
+ headers = request.headers
|
|
|
+ captured_requests.append({
|
|
|
+ "url": url,
|
|
|
+ "method": request.method,
|
|
|
+ "headers": dict(headers),
|
|
|
+ "timestamp": asyncio.get_event_loop().time()
|
|
|
+ })
|
|
|
+ # 打印关键头部信息
|
|
|
+ x_s = headers.get('x-s', '')
|
|
|
+ x_t = headers.get('x-t', '')
|
|
|
+ x_s_common = headers.get('x-s-common', '')
|
|
|
+ print(f"[{self.platform_name}] 📡 API 请求: {url}", flush=True)
|
|
|
+ print(f"[{self.platform_name}] Method: {request.method}", flush=True)
|
|
|
+ print(f"[{self.platform_name}] X-S: {x_s[:50] if x_s else '(none)'}...", flush=True)
|
|
|
+ print(f"[{self.platform_name}] X-T: {x_t}", flush=True)
|
|
|
+ print(f"[{self.platform_name}] X-S-Common: {x_s_common[:50] if x_s_common else '(none)'}...", flush=True)
|
|
|
+ print(f"[{self.platform_name}] Cookie: {headers.get('cookie', '')[:100]}...", flush=True)
|
|
|
+
|
|
|
+ self.page.on("request", handle_request)
|
|
|
+
|
|
|
iters = 0
|
|
|
+ page_count = 0 # 统计实际获取到的页数
|
|
|
+ print(f"[{self.platform_name}] ========== 开始自动分页获取作品 ==========", flush=True)
|
|
|
+ print(f"[{self.platform_name}] 最大迭代次数: {max_iters}, 每页大小: {api_page_size}", flush=True)
|
|
|
+
|
|
|
while iters < max_iters:
|
|
|
iters += 1
|
|
|
+ print(f"\n[{self.platform_name}] ---------- 第 {iters} 次请求 (cursor={cursor}) ----------", flush=True)
|
|
|
resp = await fetch_notes_page(cursor)
|
|
|
+
|
|
|
+ # 打印调试信息
|
|
|
+ if resp and isinstance(resp, dict) and resp.get('_debug'):
|
|
|
+ debug_info = resp.get('_debug', {})
|
|
|
+ sign_result = debug_info.get('signResult', {})
|
|
|
+ print(f"[{self.platform_name}] 🔍 调试信息:", flush=True)
|
|
|
+ print(f"[{self.platform_name}] 签名可用: {sign_result.get('hasSign', False)}", flush=True)
|
|
|
+ if sign_result.get('x_s'):
|
|
|
+ print(f"[{self.platform_name}] X-S: {sign_result.get('x_s', '')}", flush=True)
|
|
|
+ if sign_result.get('x_t'):
|
|
|
+ print(f"[{self.platform_name}] X-T: {sign_result.get('x_t', '')}", flush=True)
|
|
|
+ if sign_result.get('error'):
|
|
|
+ print(f"[{self.platform_name}] 签名错误: {sign_result.get('error', '')}", flush=True)
|
|
|
+ print(f"[{self.platform_name}] HTTP 状态: {debug_info.get('status', 'N/A')} {debug_info.get('statusText', '')}", flush=True)
|
|
|
+ # 移除调试信息,避免影响后续处理
|
|
|
+ resp.pop('_debug', None)
|
|
|
+
|
|
|
if not resp or not isinstance(resp, dict):
|
|
|
- print(f"[{self.platform_name}] 第 {iters} 次拉取无响应,cursor={cursor}", flush=True)
|
|
|
+ print(f"[{self.platform_name}] ❌ 第 {iters} 次拉取无响应,cursor={cursor}", flush=True)
|
|
|
+ print(f"[{self.platform_name}] 响应类型: {type(resp)}, 响应内容: {str(resp)[:500]}", flush=True)
|
|
|
break
|
|
|
if not (resp.get('success') or resp.get('code') == 0) or not resp.get('data'):
|
|
|
- print(f"[{self.platform_name}] 拉取失败 cursor={cursor}: {str(resp)[:200]}", flush=True)
|
|
|
+ error_msg = str(resp)[:500]
|
|
|
+ print(f"[{self.platform_name}] ❌ 拉取失败 cursor={cursor}", flush=True)
|
|
|
+ print(f"[{self.platform_name}] 响应详情: {error_msg}", flush=True)
|
|
|
+ print(f"[{self.platform_name}] success={resp.get('success')}, code={resp.get('code')}, has_data={bool(resp.get('data'))}", flush=True)
|
|
|
+ # 打印详细的错误信息
|
|
|
+ if resp.get('msg'):
|
|
|
+ print(f"[{self.platform_name}] 错误消息: {resp.get('msg')}", flush=True)
|
|
|
+ if resp.get('message'):
|
|
|
+ print(f"[{self.platform_name}] 错误消息: {resp.get('message')}", flush=True)
|
|
|
+ if resp.get('error'):
|
|
|
+ print(f"[{self.platform_name}] 错误: {resp.get('error')}", flush=True)
|
|
|
+ # 打印调试信息
|
|
|
+ if resp.get('_debug'):
|
|
|
+ debug_info = resp.get('_debug', {})
|
|
|
+ print(f"[{self.platform_name}] HTTP 状态: {debug_info.get('status', 'N/A')} {debug_info.get('statusText', '')}", flush=True)
|
|
|
+ sign_result = debug_info.get('signResult', {})
|
|
|
+ if sign_result.get('error'):
|
|
|
+ print(f"[{self.platform_name}] 签名错误: {sign_result.get('error')}", flush=True)
|
|
|
if iters == 1:
|
|
|
+ print(f"[{self.platform_name}] 第一次请求失败,切换到滚动模式", flush=True)
|
|
|
return await collect_by_scrolling()
|
|
|
break
|
|
|
|
|
|
data = resp.get('data', {}) or {}
|
|
|
notes = data.get('notes', []) or []
|
|
|
if not notes:
|
|
|
- print(f"[{self.platform_name}] cursor={cursor} 无作品,停止", flush=True)
|
|
|
+ print(f"[{self.platform_name}] ⚠️ cursor={cursor} 无作品,停止分页", flush=True)
|
|
|
break
|
|
|
|
|
|
+ # 统计页数
|
|
|
+ page_count += 1
|
|
|
+ print(f"[{self.platform_name}] ✅ 第 {page_count} 页获取成功,本页作品数: {len(notes)}", flush=True)
|
|
|
+
|
|
|
tags = data.get('tags', []) or []
|
|
|
if tags:
|
|
|
preferred = 0
|
|
|
@@ -1113,13 +1357,19 @@ class XiaohongshuPublisher(BasePublisher):
|
|
|
break
|
|
|
if preferred:
|
|
|
total = max(total, int(preferred))
|
|
|
+ print(f"[{self.platform_name}] 📊 从 tags 获取总数: {total} (preferred)", flush=True)
|
|
|
else:
|
|
|
- total = max(total, max([int(t.get('notes_count', 0) or t.get('notesCount', 0) or t.get('count', 0) or 0) for t in tags] + [0]))
|
|
|
+ tag_total = max([int(t.get('notes_count', 0) or t.get('notesCount', 0) or t.get('count', 0) or 0) for t in tags] + [0])
|
|
|
+ total = max(total, tag_total)
|
|
|
+ if tag_total > 0:
|
|
|
+ print(f"[{self.platform_name}] 📊 从 tags 获取总数: {total}", flush=True)
|
|
|
if not total:
|
|
|
t2 = int(data.get('total', 0) or data.get('total_count', 0) or data.get('totalCount', 0) or 0)
|
|
|
if not t2 and isinstance(data.get('page', {}), dict):
|
|
|
t2 = int(data.get('page', {}).get('total', 0) or data.get('page', {}).get('totalCount', 0) or 0)
|
|
|
total = max(total, t2)
|
|
|
+ if t2 > 0:
|
|
|
+ print(f"[{self.platform_name}] 📊 从 data.total 获取总数: {total}", flush=True)
|
|
|
|
|
|
parsed = parse_notes(notes)
|
|
|
new_items = []
|
|
|
@@ -1129,14 +1379,17 @@ class XiaohongshuPublisher(BasePublisher):
|
|
|
new_items.append(w)
|
|
|
works.extend(new_items)
|
|
|
|
|
|
- print(f"[{self.platform_name}] cursor={cursor} got={len(notes)}, new={len(new_items)}, total_now={len(works)}, declared_total={total}", flush=True)
|
|
|
+ print(f"[{self.platform_name}] 📈 累计统计: 本页新作品={len(new_items)}, 累计作品数={len(works)}, 声明总数={total}", flush=True)
|
|
|
|
|
|
if total and len(works) >= total:
|
|
|
+ print(f"[{self.platform_name}] ✅ 已获取全部作品 (累计={len(works)} >= 总数={total}),停止分页", flush=True)
|
|
|
break
|
|
|
if len(new_items) == 0:
|
|
|
+ print(f"[{self.platform_name}] ⚠️ 本页无新作品,停止分页", flush=True)
|
|
|
break
|
|
|
|
|
|
next_page = data.get('page', "")
|
|
|
+ old_cursor = cursor
|
|
|
if next_page == cursor:
|
|
|
next_page = ""
|
|
|
if next_page == -1 or str(next_page) == "-1":
|
|
|
@@ -1146,26 +1399,57 @@ class XiaohongshuPublisher(BasePublisher):
|
|
|
cursor = cursor + 1
|
|
|
else:
|
|
|
cursor = len(works) // api_page_size
|
|
|
+ print(f"[{self.platform_name}] 🔄 下一页 cursor: {old_cursor} -> {cursor} (自动递增)", flush=True)
|
|
|
else:
|
|
|
cursor = next_page
|
|
|
+ print(f"[{self.platform_name}] 🔄 下一页 cursor: {old_cursor} -> {cursor} (API返回)", flush=True)
|
|
|
|
|
|
await asyncio.sleep(0.5)
|
|
|
+
|
|
|
+ # 移除请求监听器
|
|
|
+ try:
|
|
|
+ self.page.remove_listener("request", handle_request)
|
|
|
+ except Exception:
|
|
|
+ pass
|
|
|
+
|
|
|
+ print(f"\n[{self.platform_name}] ========== 分页完成 ==========", flush=True)
|
|
|
+ print(f"[{self.platform_name}] 📊 分页统计: 总请求次数={iters}, 成功获取页数={page_count}, 累计作品数={len(works)}, 声明总数={total}", flush=True)
|
|
|
+ if captured_requests:
|
|
|
+ print(f"[{self.platform_name}] 📡 捕获到 {len(captured_requests)} 个 API 请求", flush=True)
|
|
|
+ for i, req in enumerate(captured_requests[:3], 1): # 只显示前3个
|
|
|
+ print(f"[{self.platform_name}] 请求 {i}: {req['method']} {req['url']}", flush=True)
|
|
|
+ if 'x-s' in req['headers']:
|
|
|
+ print(f"[{self.platform_name}] X-S: {req['headers']['x-s'][:50]}...", flush=True)
|
|
|
+ if 'x-t' in req['headers']:
|
|
|
+ print(f"[{self.platform_name}] X-T: {req['headers']['x-t']}", flush=True)
|
|
|
+ print(f"[{self.platform_name}] ========================================\n", flush=True)
|
|
|
|
|
|
except Exception as e:
|
|
|
import traceback
|
|
|
+ error_trace = traceback.format_exc()
|
|
|
print(f"[{self.platform_name}] 发生异常: {e}", flush=True)
|
|
|
traceback.print_exc()
|
|
|
- return WorksResult(success=False, platform=self.platform_name, error=str(e))
|
|
|
+ return WorksResult(
|
|
|
+ success=False,
|
|
|
+ platform=self.platform_name,
|
|
|
+ error=str(e),
|
|
|
+ debug_info=f"异常详情: {error_trace[:500]}"
|
|
|
+ )
|
|
|
finally:
|
|
|
await self.close_browser()
|
|
|
|
|
|
+ debug_info = f"总请求次数={iters}, 成功获取页数={page_count}, 累计作品数={len(works)}, 声明总数={total}"
|
|
|
+ if len(works) == 0:
|
|
|
+ debug_info += " | 警告: 没有获取到任何作品,可能原因: Cookie失效、API调用失败、或账号无作品"
|
|
|
+
|
|
|
return WorksResult(
|
|
|
success=True,
|
|
|
platform=self.platform_name,
|
|
|
works=works,
|
|
|
total=total or len(works),
|
|
|
has_more=False,
|
|
|
- next_page=-1
|
|
|
+ next_page=-1,
|
|
|
+ debug_info=debug_info
|
|
|
)
|
|
|
|
|
|
async def get_comments(self, cookies: str, work_id: str, cursor: str = "") -> CommentsResult:
|