|
@@ -5,6 +5,7 @@
|
|
|
"""
|
|
"""
|
|
|
|
|
|
|
|
import asyncio
|
|
import asyncio
|
|
|
|
|
+import json
|
|
|
import os
|
|
import os
|
|
|
from datetime import datetime
|
|
from datetime import datetime
|
|
|
from typing import List
|
|
from typing import List
|
|
@@ -969,19 +970,97 @@ class WeixinPublisher(BasePublisher):
|
|
|
status='need_action'
|
|
status='need_action'
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
|
|
+ async def _get_works_fallback_dom(self, page_size: int) -> tuple:
|
|
|
|
|
+ """API 失败时从当前页面 DOM 抓取作品列表(兼容新账号/不同入口)"""
|
|
|
|
|
+ works: List[WorkItem] = []
|
|
|
|
|
+ total = 0
|
|
|
|
|
+ has_more = False
|
|
|
|
|
+ try:
|
|
|
|
|
+ for selector in ["div.post-feed-item", "[class*='post-feed']", "[class*='feed-item']", "div[class*='post']"]:
|
|
|
|
|
+ try:
|
|
|
|
|
+ await self.page.wait_for_selector(selector, timeout=8000)
|
|
|
|
|
+ break
|
|
|
|
|
+ except Exception:
|
|
|
|
|
+ continue
|
|
|
|
|
+ post_items = self.page.locator("div.post-feed-item")
|
|
|
|
|
+ item_count = await post_items.count()
|
|
|
|
|
+ if item_count == 0:
|
|
|
|
|
+ post_items = self.page.locator("[class*='post-feed']")
|
|
|
|
|
+ item_count = await post_items.count()
|
|
|
|
|
+ for i in range(min(item_count, page_size)):
|
|
|
|
|
+ try:
|
|
|
|
|
+ item = post_items.nth(i)
|
|
|
|
|
+ cover_el = item.locator("div.media img.thumb").first
|
|
|
|
|
+ cover_url = await cover_el.get_attribute("src") or "" if await cover_el.count() > 0 else ""
|
|
|
|
|
+ if not cover_url:
|
|
|
|
|
+ cover_el = item.locator("img").first
|
|
|
|
|
+ cover_url = await cover_el.get_attribute("src") or "" if await cover_el.count() > 0 else ""
|
|
|
|
|
+ title_el = item.locator("div.post-title").first
|
|
|
|
|
+ title = (await title_el.text_content() or "").strip() if await title_el.count() > 0 else ""
|
|
|
|
|
+ time_el = item.locator("div.post-time span").first
|
|
|
|
|
+ publish_time = (await time_el.text_content() or "").strip() if await time_el.count() > 0 else ""
|
|
|
|
|
+ play_count = like_count = comment_count = share_count = collect_count = 0
|
|
|
|
|
+ data_items = item.locator("div.post-data div.data-item")
|
|
|
|
|
+ for j in range(await data_items.count()):
|
|
|
|
|
+ data_item = data_items.nth(j)
|
|
|
|
|
+ count_text = (await data_item.locator("span.count").text_content() or "0").strip()
|
|
|
|
|
+ if await data_item.locator("span.weui-icon-outlined-eyes-on").count() > 0:
|
|
|
|
|
+ play_count = self._parse_count(count_text)
|
|
|
|
|
+ elif await data_item.locator("span.weui-icon-outlined-like").count() > 0:
|
|
|
|
|
+ like_count = self._parse_count(count_text)
|
|
|
|
|
+ elif await data_item.locator("span.weui-icon-outlined-comment").count() > 0:
|
|
|
|
|
+ comment_count = self._parse_count(count_text)
|
|
|
|
|
+ elif await data_item.locator("use[xlink\\:href='#icon-share']").count() > 0:
|
|
|
|
|
+ share_count = self._parse_count(count_text)
|
|
|
|
|
+ elif await data_item.locator("use[xlink\\:href='#icon-thumb']").count() > 0:
|
|
|
|
|
+ collect_count = self._parse_count(count_text)
|
|
|
|
|
+ work_id = f"weixin_{i}_{hash(title)}_{hash(publish_time)}"
|
|
|
|
|
+ works.append(WorkItem(
|
|
|
|
|
+ work_id=work_id,
|
|
|
|
|
+ title=title or "无标题",
|
|
|
|
|
+ cover_url=cover_url,
|
|
|
|
|
+ duration=0,
|
|
|
|
|
+ status="published",
|
|
|
|
|
+ publish_time=publish_time,
|
|
|
|
|
+ play_count=play_count,
|
|
|
|
|
+ like_count=like_count,
|
|
|
|
|
+ comment_count=comment_count,
|
|
|
|
|
+ share_count=share_count,
|
|
|
|
|
+ collect_count=collect_count,
|
|
|
|
|
+ ))
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ print(f"[{self.platform_name}] DOM 解析作品 {i} 失败: {e}", flush=True)
|
|
|
|
|
+ continue
|
|
|
|
|
+ total = len(works)
|
|
|
|
|
+ has_more = item_count > page_size
|
|
|
|
|
+ print(f"[{self.platform_name}] DOM 回退获取 {len(works)} 条", flush=True)
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ print(f"[{self.platform_name}] DOM 回退失败: {e}", flush=True)
|
|
|
|
|
+ return (works, total, has_more, "")
|
|
|
|
|
+
|
|
|
async def get_works(self, cookies: str, page: int = 0, page_size: int = 20) -> WorksResult:
|
|
async def get_works(self, cookies: str, page: int = 0, page_size: int = 20) -> WorksResult:
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
- print(f"1111111111111111111")
|
|
|
|
|
- """获取视频号作品列表"""
|
|
|
|
|
|
|
+ """获取视频号作品列表(调用 post_list 接口)
|
|
|
|
|
+ page: 页码从 0 开始,或上一页返回的 rawKeyBuff/lastBuff 字符串
|
|
|
|
|
+ """
|
|
|
|
|
+ # 分页:首页 currentPage=1/rawKeyBuff=null,下一页用 currentPage 递增或 rawKeyBuff
|
|
|
|
|
+ if page is None or page == "" or (isinstance(page, int) and page == 0):
|
|
|
|
|
+ current_page = 1
|
|
|
|
|
+ raw_key_buff = None
|
|
|
|
|
+ elif isinstance(page, int):
|
|
|
|
|
+ current_page = page + 1
|
|
|
|
|
+ raw_key_buff = None
|
|
|
|
|
+ else:
|
|
|
|
|
+ current_page = 1
|
|
|
|
|
+ raw_key_buff = str(page)
|
|
|
|
|
+ ts_ms = str(int(time.time() * 1000))
|
|
|
print(f"\n{'='*60}")
|
|
print(f"\n{'='*60}")
|
|
|
- print(f"[{self.platform_name}] 获取作品列表")
|
|
|
|
|
- print(f"[{self.platform_name}] page={page}, page_size={page_size}")
|
|
|
|
|
|
|
+ print(f"[{self.platform_name}] 获取作品列表 currentPage={current_page}, pageSize={page_size}, rawKeyBuff={raw_key_buff[:40] if raw_key_buff else 'null'}...")
|
|
|
print(f"{'='*60}")
|
|
print(f"{'='*60}")
|
|
|
|
|
|
|
|
works: List[WorkItem] = []
|
|
works: List[WorkItem] = []
|
|
|
total = 0
|
|
total = 0
|
|
|
has_more = False
|
|
has_more = False
|
|
|
|
|
+ next_page = ""
|
|
|
|
|
|
|
|
try:
|
|
try:
|
|
|
await self.init_browser()
|
|
await self.init_browser()
|
|
@@ -991,131 +1070,136 @@ class WeixinPublisher(BasePublisher):
|
|
|
if not self.page:
|
|
if not self.page:
|
|
|
raise Exception("Page not initialized")
|
|
raise Exception("Page not initialized")
|
|
|
|
|
|
|
|
- # 访问视频号创作者中心
|
|
|
|
|
- await self.page.goto("https://channels.weixin.qq.com/platform/post/list")
|
|
|
|
|
- await asyncio.sleep(5)
|
|
|
|
|
- print(f"1111111111111111")
|
|
|
|
|
- # 检查登录状态
|
|
|
|
|
|
|
+ await self.page.goto("https://channels.weixin.qq.com/micro/content/post/list", timeout=30000)
|
|
|
|
|
+ await asyncio.sleep(3)
|
|
|
|
|
+
|
|
|
current_url = self.page.url
|
|
current_url = self.page.url
|
|
|
if "login" in current_url:
|
|
if "login" in current_url:
|
|
|
- print(f"2111111111111111")
|
|
|
|
|
- raise Exception("Cookie 已过期,请重新登录")
|
|
|
|
|
|
|
+ raise Exception("Cookie 已过期,请重新登录")
|
|
|
|
|
|
|
|
- # 视频号使用页面爬取方式获取作品列表
|
|
|
|
|
- # 等待作品列表加载(增加等待时间,并添加截图调试)
|
|
|
|
|
- try:
|
|
|
|
|
- await self.page.wait_for_selector('div.post-feed-item', timeout=15000)
|
|
|
|
|
- except:
|
|
|
|
|
- # 超时后打印当前 URL 和截图
|
|
|
|
|
- current_url = self.page.url
|
|
|
|
|
- print(f"[{self.platform_name}] 等待超时,当前 URL: {current_url}")
|
|
|
|
|
- # 截图保存
|
|
|
|
|
- screenshot_path = f"weixin_timeout_{int(asyncio.get_event_loop().time())}.png"
|
|
|
|
|
- await self.page.screenshot(path=screenshot_path)
|
|
|
|
|
- print(f"[{self.platform_name}] 截图已保存: {screenshot_path}")
|
|
|
|
|
- raise Exception(f"页面加载超时,当前 URL: {current_url}")
|
|
|
|
|
|
|
+ api_url = "https://channels.weixin.qq.com/micro/content/cgi-bin/mmfinderassistant-bin/post/post_list"
|
|
|
|
|
+ req_body = {
|
|
|
|
|
+ "pageSize": page_size,
|
|
|
|
|
+ "currentPage": current_page,
|
|
|
|
|
+ "userpageType": 11,
|
|
|
|
|
+ "stickyOrder": True,
|
|
|
|
|
+ "timestamp": ts_ms,
|
|
|
|
|
+ "_log_finder_uin": "",
|
|
|
|
|
+ "_log_finder_id": "",
|
|
|
|
|
+ "rawKeyBuff": raw_key_buff,
|
|
|
|
|
+ "pluginSessionId": None,
|
|
|
|
|
+ "scene": 7,
|
|
|
|
|
+ "reqScene": 7,
|
|
|
|
|
+ }
|
|
|
|
|
+ body_str = json.dumps(req_body)
|
|
|
|
|
|
|
|
- # 打印 DOM 结构
|
|
|
|
|
- page_html = await self.page.content()
|
|
|
|
|
- print(f"[{self.platform_name}] ========== 页面 DOM 开始 ==========")
|
|
|
|
|
- print(page_html[:5000]) # 打印前5000个字符
|
|
|
|
|
- print(f"[{self.platform_name}] ========== 页面 DOM 结束 ==========")
|
|
|
|
|
|
|
+ response = await self.page.evaluate("""
|
|
|
|
|
+ async ([url, bodyStr]) => {
|
|
|
|
|
+ try {
|
|
|
|
|
+ const resp = await fetch(url, {
|
|
|
|
|
+ method: 'POST',
|
|
|
|
|
+ credentials: 'include',
|
|
|
|
|
+ headers: {
|
|
|
|
|
+ 'Content-Type': 'application/json',
|
|
|
|
|
+ 'Accept': '*/*',
|
|
|
|
|
+ 'Referer': 'https://channels.weixin.qq.com/micro/content/post/list'
|
|
|
|
|
+ },
|
|
|
|
|
+ body: bodyStr
|
|
|
|
|
+ });
|
|
|
|
|
+ return await resp.json();
|
|
|
|
|
+ } catch (e) {
|
|
|
|
|
+ return { error: e.toString() };
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ """, [api_url, body_str])
|
|
|
|
|
|
|
|
- # 获取所有作品项
|
|
|
|
|
- post_items = self.page.locator('div.post-feed-item')
|
|
|
|
|
- item_count = await post_items.count()
|
|
|
|
|
|
|
+ is_first_page = current_page == 1 and raw_key_buff is None
|
|
|
|
|
+ if response.get("error"):
|
|
|
|
|
+ print(f"[{self.platform_name}] API 请求失败: {response.get('error')}", flush=True)
|
|
|
|
|
+ if is_first_page:
|
|
|
|
|
+ works, total, has_more, next_page = await self._get_works_fallback_dom(page_size)
|
|
|
|
|
+ if works:
|
|
|
|
|
+ return WorksResult(success=True, platform=self.platform_name, works=works, total=total, has_more=has_more, next_page=next_page)
|
|
|
|
|
+ return WorksResult(success=False, platform=self.platform_name, error=response.get("error", "API 请求失败"))
|
|
|
|
|
|
|
|
- print(f"[{self.platform_name}] 找到 {item_count} 个作品项")
|
|
|
|
|
|
|
+ err_code = response.get("errCode", -1)
|
|
|
|
|
+ if err_code != 0:
|
|
|
|
|
+ err_msg = response.get("errMsg", "unknown")
|
|
|
|
|
+ print(f"[{self.platform_name}] API errCode={err_code}, errMsg={err_msg}, 完整响应(前800字): {json.dumps(response, ensure_ascii=False)[:800]}", flush=True)
|
|
|
|
|
+ if is_first_page:
|
|
|
|
|
+ works, total, has_more, next_page = await self._get_works_fallback_dom(page_size)
|
|
|
|
|
+ if works:
|
|
|
|
|
+ return WorksResult(success=True, platform=self.platform_name, works=works, total=total, has_more=has_more, next_page=next_page)
|
|
|
|
|
+ return WorksResult(success=False, platform=self.platform_name, error=f"errCode={err_code}, errMsg={err_msg}")
|
|
|
|
|
|
|
|
- for i in range(min(item_count, page_size)):
|
|
|
|
|
|
|
+ data = response.get("data") or {}
|
|
|
|
|
+ raw_list = data.get("list") or []
|
|
|
|
|
+ total = int(data.get("totalCount") or 0)
|
|
|
|
|
+ has_more = bool(data.get("continueFlag", False))
|
|
|
|
|
+ next_page = (data.get("lastBuff") or "").strip()
|
|
|
|
|
+
|
|
|
|
|
+ print(f"[{self.platform_name}] API 响应: list_len={len(raw_list)}, totalCount={total}, continueFlag={has_more}, lastBuff={next_page[:50] if next_page else ''}...")
|
|
|
|
|
+
|
|
|
|
|
+ if is_first_page and len(raw_list) == 0:
|
|
|
|
|
+ works_fb, total_fb, has_more_fb, _ = await self._get_works_fallback_dom(page_size)
|
|
|
|
|
+ if works_fb:
|
|
|
|
|
+ return WorksResult(success=True, platform=self.platform_name, works=works_fb, total=total_fb, has_more=has_more_fb, next_page="")
|
|
|
|
|
+
|
|
|
|
|
+ for item in raw_list:
|
|
|
try:
|
|
try:
|
|
|
- item = post_items.nth(i)
|
|
|
|
|
-
|
|
|
|
|
- # 获取封面
|
|
|
|
|
- cover_el = item.locator('div.media img.thumb').first
|
|
|
|
|
- cover_url = ''
|
|
|
|
|
- if await cover_el.count() > 0:
|
|
|
|
|
- cover_url = await cover_el.get_attribute('src') or ''
|
|
|
|
|
|
|
+ work_id = str(item.get("objectId") or item.get("id") or "").strip()
|
|
|
|
|
+ if not work_id:
|
|
|
|
|
+ work_id = f"weixin_{hash(item.get('createTime',0))}_{hash(item.get('desc', {}).get('description',''))}"
|
|
|
|
|
|
|
|
- # 获取标题
|
|
|
|
|
- title_el = item.locator('div.post-title').first
|
|
|
|
|
- title = ''
|
|
|
|
|
- if await title_el.count() > 0:
|
|
|
|
|
- title = await title_el.text_content() or ''
|
|
|
|
|
- title = title.strip()
|
|
|
|
|
|
|
+ desc = item.get("desc") or {}
|
|
|
|
|
+ title = (desc.get("description") or "").strip() or "无标题"
|
|
|
|
|
+ cover_url = ""
|
|
|
|
|
+ duration = 0
|
|
|
|
|
+ media_list = desc.get("media") or []
|
|
|
|
|
+ if media_list and isinstance(media_list[0], dict):
|
|
|
|
|
+ m = media_list[0]
|
|
|
|
|
+ cover_url = (m.get("coverUrl") or m.get("thumbUrl") or "").strip()
|
|
|
|
|
+ duration = int(m.get("videoPlayLen") or 0)
|
|
|
|
|
|
|
|
- # 获取发布时间
|
|
|
|
|
- time_el = item.locator('div.post-time span').first
|
|
|
|
|
- publish_time = ''
|
|
|
|
|
- if await time_el.count() > 0:
|
|
|
|
|
- publish_time = await time_el.text_content() or ''
|
|
|
|
|
- publish_time = publish_time.strip()
|
|
|
|
|
-
|
|
|
|
|
- # 获取统计数据
|
|
|
|
|
- import re
|
|
|
|
|
- data_items = item.locator('div.post-data div.data-item')
|
|
|
|
|
- data_count = await data_items.count()
|
|
|
|
|
-
|
|
|
|
|
- play_count = 0
|
|
|
|
|
- like_count = 0
|
|
|
|
|
- comment_count = 0
|
|
|
|
|
- share_count = 0
|
|
|
|
|
- collect_count = 0
|
|
|
|
|
-
|
|
|
|
|
- for j in range(data_count):
|
|
|
|
|
- data_item = data_items.nth(j)
|
|
|
|
|
- count_text = await data_item.locator('span.count').text_content() or '0'
|
|
|
|
|
- count_text = count_text.strip()
|
|
|
|
|
-
|
|
|
|
|
- # 判断图标类型
|
|
|
|
|
- if await data_item.locator('span.weui-icon-outlined-eyes-on').count() > 0:
|
|
|
|
|
- # 播放量
|
|
|
|
|
- play_count = self._parse_count(count_text)
|
|
|
|
|
- elif await data_item.locator('span.weui-icon-outlined-like').count() > 0:
|
|
|
|
|
- # 点赞
|
|
|
|
|
- like_count = self._parse_count(count_text)
|
|
|
|
|
- elif await data_item.locator('span.weui-icon-outlined-comment').count() > 0:
|
|
|
|
|
- # 评论
|
|
|
|
|
- comment_count = self._parse_count(count_text)
|
|
|
|
|
- elif await data_item.locator('use[xlink\\:href="#icon-share"]').count() > 0:
|
|
|
|
|
- # 分享
|
|
|
|
|
- share_count = self._parse_count(count_text)
|
|
|
|
|
- elif await data_item.locator('use[xlink\\:href="#icon-thumb"]').count() > 0:
|
|
|
|
|
- # 收藏
|
|
|
|
|
- collect_count = self._parse_count(count_text)
|
|
|
|
|
|
|
+ create_ts = item.get("createTime") or 0
|
|
|
|
|
+ if isinstance(create_ts, (int, float)) and create_ts:
|
|
|
|
|
+ publish_time = datetime.fromtimestamp(create_ts).strftime("%Y-%m-%d %H:%M:%S")
|
|
|
|
|
+ else:
|
|
|
|
|
+ publish_time = str(create_ts) if create_ts else ""
|
|
|
|
|
|
|
|
- # 生成临时 work_id
|
|
|
|
|
- work_id = f"weixin_{i}_{hash(title)}_{hash(publish_time)}"
|
|
|
|
|
|
|
+ read_count = int(item.get("readCount") or 0)
|
|
|
|
|
+ like_count = int(item.get("likeCount") or 0)
|
|
|
|
|
+ comment_count = int(item.get("commentCount") or 0)
|
|
|
|
|
+ forward_count = int(item.get("forwardCount") or 0)
|
|
|
|
|
+ fav_count = int(item.get("favCount") or 0)
|
|
|
|
|
|
|
|
works.append(WorkItem(
|
|
works.append(WorkItem(
|
|
|
work_id=work_id,
|
|
work_id=work_id,
|
|
|
- title=title or '无标题',
|
|
|
|
|
|
|
+ title=title,
|
|
|
cover_url=cover_url,
|
|
cover_url=cover_url,
|
|
|
- duration=0,
|
|
|
|
|
- status='published',
|
|
|
|
|
|
|
+ duration=duration,
|
|
|
|
|
+ status="published",
|
|
|
publish_time=publish_time,
|
|
publish_time=publish_time,
|
|
|
- play_count=play_count,
|
|
|
|
|
|
|
+ play_count=read_count,
|
|
|
like_count=like_count,
|
|
like_count=like_count,
|
|
|
comment_count=comment_count,
|
|
comment_count=comment_count,
|
|
|
- share_count=share_count,
|
|
|
|
|
- collect_count=collect_count,
|
|
|
|
|
|
|
+ share_count=forward_count,
|
|
|
|
|
+ collect_count=fav_count,
|
|
|
))
|
|
))
|
|
|
except Exception as e:
|
|
except Exception as e:
|
|
|
- print(f"[{self.platform_name}] 解析作品 {i} 失败: {e}")
|
|
|
|
|
- import traceback
|
|
|
|
|
- traceback.print_exc()
|
|
|
|
|
|
|
+ print(f"[{self.platform_name}] 解析作品项失败: {e}", flush=True)
|
|
|
continue
|
|
continue
|
|
|
|
|
|
|
|
- total = len(works)
|
|
|
|
|
- has_more = item_count > page_size
|
|
|
|
|
- print(f"[{self.platform_name}] 获取到 {total} 个作品")
|
|
|
|
|
|
|
+ if total == 0 and works:
|
|
|
|
|
+ total = len(works)
|
|
|
|
|
+ print(f"[{self.platform_name}] 本页获取 {len(works)} 条,totalCount={total}, next_page={bool(next_page)}")
|
|
|
|
|
|
|
|
except Exception as e:
|
|
except Exception as e:
|
|
|
import traceback
|
|
import traceback
|
|
|
traceback.print_exc()
|
|
traceback.print_exc()
|
|
|
return WorksResult(success=False, platform=self.platform_name, error=str(e))
|
|
return WorksResult(success=False, platform=self.platform_name, error=str(e))
|
|
|
|
|
|
|
|
- return WorksResult(success=True, platform=self.platform_name, works=works, total=total, has_more=has_more)
|
|
|
|
|
|
|
+ return WorksResult(success=True, platform=self.platform_name, works=works, total=total, has_more=has_more, next_page=next_page)
|
|
|
|
|
|
|
|
async def get_comments(self, cookies: str, work_id: str, cursor: str = "") -> CommentsResult:
|
|
async def get_comments(self, cookies: str, work_id: str, cursor: str = "") -> CommentsResult:
|
|
|
"""获取视频号作品评论"""
|
|
"""获取视频号作品评论"""
|