|
|
@@ -18,7 +18,7 @@ from .base import (
|
|
|
)
|
|
|
from playwright.async_api import async_playwright
|
|
|
|
|
|
-
|
|
|
+stored_cookies = None
|
|
|
|
|
|
# 添加 matrix 项目路径,用于导入签名脚本
|
|
|
MATRIX_PATH = Path(__file__).parent.parent.parent.parent / "matrix"
|
|
|
@@ -1639,7 +1639,7 @@ class XiaohongshuPublisher(BasePublisher):
|
|
|
browser = None
|
|
|
print(222222222222222222222222222222222222)
|
|
|
print(work_id)
|
|
|
-
|
|
|
+ global stored_cookies
|
|
|
try:
|
|
|
# --- Step 1: 初始化浏览器和 Cookie ---
|
|
|
cookie_list = self.parse_cookies(cookies)
|
|
|
@@ -1649,7 +1649,11 @@ class XiaohongshuPublisher(BasePublisher):
|
|
|
viewport={"width": 1400, "height": 900},
|
|
|
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
|
|
)
|
|
|
- await context.add_cookies(cookie_list)
|
|
|
+ if os.path.exists("cookies.json"):
|
|
|
+ with open("cookies.json", "r") as f:
|
|
|
+ stored_cookies = json.load(f)
|
|
|
+ if stored_cookies:
|
|
|
+ await context.add_cookies(stored_cookies)
|
|
|
page = await context.new_page()
|
|
|
|
|
|
# --- Step 2: 打开小红书主页 ---
|
|
|
@@ -1660,6 +1664,12 @@ class XiaohongshuPublisher(BasePublisher):
|
|
|
try:
|
|
|
if await page.is_visible(".login-container", timeout=3000):
|
|
|
await page.wait_for_selector(".login-container", state="hidden", timeout=120000)
|
|
|
+ stored_cookies = await context.cookies()
|
|
|
+ with open("xiaohongshu_cookies.json", "w") as f:
|
|
|
+ json.dump(stored_cookies, f)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
except Exception as e:
|
|
|
pass # 忽略超时,继续执行
|
|
|
|
|
|
@@ -1700,6 +1710,18 @@ class XiaohongshuPublisher(BasePublisher):
|
|
|
# --- Step 6 & 7: 依次点击封面图,捕获评论并结构化 ---
|
|
|
for i, img in enumerate(note_imgs):
|
|
|
try:
|
|
|
+ # >>> 新增:从 img 提取 note_id 并与 work_id 比较 <<<
|
|
|
+ note_id = await img.evaluate('''el => {
|
|
|
+ const item = el.closest('.note-item');
|
|
|
+ if (!item) return null;
|
|
|
+ const link = item.querySelector('a[href^="/explore/"]');
|
|
|
+ return link ? link.href.split('/').pop() : null;
|
|
|
+ }''')
|
|
|
+ if note_id != work_id:
|
|
|
+ print(f"note_id {note_id} 与目标 work_id {work_id} 不匹配,跳出循环")
|
|
|
+ continue
|
|
|
+ # <<< 新增结束 >>>
|
|
|
+
|
|
|
await img.scroll_into_view_if_needed()
|
|
|
await asyncio.sleep(0.5)
|
|
|
|