|
@@ -63,6 +63,7 @@ class XhsBrowser(BaseBrowser):
|
|
def login(self):
|
|
def login(self):
|
|
self.__init_browser__()
|
|
self.__init_browser__()
|
|
# 暂时采用手动登录
|
|
# 暂时采用手动登录
|
|
|
|
+ self.page.goto(XHS_URL, wait_until='domcontentloaded')
|
|
self.page.wait_for_timeout(60_000)
|
|
self.page.wait_for_timeout(60_000)
|
|
self.playwright.stop()
|
|
self.playwright.stop()
|
|
|
|
|
|
@@ -79,13 +80,14 @@ class XhsBrowser(BaseBrowser):
|
|
# note_info = self.xhs_client.get_note_by_id(note_id=note_id)
|
|
# note_info = self.xhs_client.get_note_by_id(note_id=note_id)
|
|
note_info = self.get_note(note_id=note_id)
|
|
note_info = self.get_note(note_id=note_id)
|
|
huitun_note['authorInfo'] = note_info.get('user')
|
|
huitun_note['authorInfo'] = note_info.get('user')
|
|
- huitun_note['imageList'] = [img.get('url_default') for img in note_info.get('image_list')]
|
|
|
|
|
|
+ if note_info.get('imageList'):
|
|
|
|
+ huitun_note['imageList'] = [img.get('urlDefault') for img in note_info.get('imageList')]
|
|
sleep(2)
|
|
sleep(2)
|
|
except Exception as e:
|
|
except Exception as e:
|
|
utils.logger.error(f"爬取小红书异常 {e}")
|
|
utils.logger.error(f"爬取小红书异常 {e}")
|
|
|
|
|
|
def get_note(self, note_id: str):
|
|
def get_note(self, note_id: str):
|
|
# note = self.xhs_client.get_note_by_id(note_id=note_id)
|
|
# note = self.xhs_client.get_note_by_id(note_id=note_id)
|
|
- self.page.goto(f'{XHS_URL}/explore/{note_id}')
|
|
|
|
|
|
+ self.page.goto(f'{XHS_URL}/explore/{note_id}', wait_until='domcontentloaded')
|
|
data = self.page.evaluate('noteId => window.__INITIAL_STATE__ && JSON.stringify(window.__INITIAL_STATE__.note.noteDetailMap[noteId].note)', note_id)
|
|
data = self.page.evaluate('noteId => window.__INITIAL_STATE__ && JSON.stringify(window.__INITIAL_STATE__.note.noteDetailMap[noteId].note)', note_id)
|
|
return json.loads(data)
|
|
return json.loads(data)
|