|
@@ -8,6 +8,7 @@ from playwright.sync_api import sync_playwright, Page, Playwright
|
|
|
import api
|
|
|
from browser import BaseBrowser
|
|
|
from util.lock_util import LockManager
|
|
|
+from util.url_util import get_id_by_url
|
|
|
from instagram.data_handler import *
|
|
|
|
|
|
IG_URL = 'https://www.instagram.com/'
|
|
@@ -44,7 +45,8 @@ class InstagramBrowser(BaseBrowser):
|
|
|
api.assert_not_none(url, 'url不能为空')
|
|
|
self.result = None
|
|
|
self.map_result = {}
|
|
|
- self.id = get_post_id(url)
|
|
|
+ self.id = get_id_by_url(url)
|
|
|
+ api.assert_not_none(self.id, 'cannot get post id from url')
|
|
|
self.browser.on('response', self.blog_info_handler)
|
|
|
self.page.goto(url)
|
|
|
self.page.wait_for_timeout(1000)
|
|
@@ -62,11 +64,12 @@ class InstagramBrowser(BaseBrowser):
|
|
|
def blog_info_handler(self, response):
|
|
|
if response is None or response.status != 200:
|
|
|
return
|
|
|
- if '/info' in response.url:
|
|
|
+ content_type = response.headers.get('content-type', '')
|
|
|
+ if '/info' in response.url and 'application/json' in content_type:
|
|
|
info = get_blog_by_rsp(response)
|
|
|
if info is not None:
|
|
|
self.result = info
|
|
|
- elif self.id in response.url:
|
|
|
+ elif self.id in response.url and 'text/html' in content_type:
|
|
|
logging.info(f'get {self.id} blog response')
|
|
|
doc = get_blog_by_doc(response)
|
|
|
if doc is not None:
|