Jelajahi Sumber

Merge branch 'main' of http://gitlab.pubdata.cn/hlm/multi-platform-media-manage

Ethanfly 1 hari lalu
induk
melakukan
51d50af414

+ 25 - 3
server/python/platforms/xiaohongshu.py

@@ -18,7 +18,7 @@ from .base import (
 )
 from playwright.async_api import async_playwright
 
-
+stored_cookies = None
 
 # 添加 matrix 项目路径,用于导入签名脚本
 MATRIX_PATH = Path(__file__).parent.parent.parent.parent / "matrix"
@@ -1639,7 +1639,7 @@ class XiaohongshuPublisher(BasePublisher):
         browser = None
         print(222222222222222222222222222222222222)
         print(work_id)
-
+        global stored_cookies
         try:
             # --- Step 1: 初始化浏览器和 Cookie ---
             cookie_list = self.parse_cookies(cookies)
@@ -1649,7 +1649,11 @@ class XiaohongshuPublisher(BasePublisher):
                 viewport={"width": 1400, "height": 900},
                 user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
             )
-            await context.add_cookies(cookie_list)
+            if os.path.exists("cookies.json"):
+                with open("cookies.json", "r") as f:
+                        stored_cookies = json.load(f)
+            if stored_cookies:
+                await context.add_cookies(stored_cookies)
             page = await context.new_page()
 
             # --- Step 2: 打开小红书主页 ---
@@ -1660,6 +1664,12 @@ class XiaohongshuPublisher(BasePublisher):
             try:
                 if await page.is_visible(".login-container", timeout=3000):
                     await page.wait_for_selector(".login-container", state="hidden", timeout=120000)
+                    stored_cookies = await context.cookies()
+                    with open("xiaohongshu_cookies.json", "w") as f:
+                        json.dump(stored_cookies, f)
+                    
+
+
             except Exception as e:
                 pass  # 忽略超时,继续执行
 
@@ -1700,6 +1710,18 @@ class XiaohongshuPublisher(BasePublisher):
             # --- Step 6 & 7: 依次点击封面图,捕获评论并结构化 ---
             for i, img in enumerate(note_imgs):
                 try:
+                    # >>> 新增:从 img 提取 note_id 并与 work_id 比较 <<<
+                    note_id = await img.evaluate('''el => {
+                        const item = el.closest('.note-item');
+                        if (!item) return null;
+                        const link = item.querySelector('a[href^="/explore/"]');
+                        return link ? link.href.split('/').pop() : null;
+                    }''')
+                    if note_id != work_id:
+                        print(f"note_id {note_id} 与目标 work_id {work_id} 不匹配,跳出循环")
+                        continue
+                    # <<< 新增结束 >>>
+
                     await img.scroll_into_view_if_needed()
                     await asyncio.sleep(0.5)
 

+ 0 - 0
server/python/xiaohongshu_cookies.json