|
@@ -15,6 +15,13 @@ IG_URL = 'https://www.instagram.com/'
|
|
|
lock_manager = LockManager()
|
|
|
|
|
|
|
|
|
+def get_post_id(url: str) -> str:
|
|
|
+ match = re.search(r'instagram\.com/(?:[^/]+/)?p/([^/?]+)', url)
|
|
|
+ return match.group(1) if match else None
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
class InstagramBrowser(BaseBrowser):
|
|
|
|
|
|
def __init__(self, account: str, playwright=None):
|
|
@@ -37,7 +44,7 @@ class InstagramBrowser(BaseBrowser):
|
|
|
api.assert_not_none(url, 'url不能为空')
|
|
|
self.result = None
|
|
|
self.map_result = {}
|
|
|
- self.id = url.lstrip('/').split('/')[-1]
|
|
|
+ self.id = get_post_id(url)
|
|
|
self.browser.on('response', self.blog_info_handler)
|
|
|
self.page.goto(url)
|
|
|
self.page.wait_for_timeout(1000)
|
|
@@ -55,9 +62,16 @@ class InstagramBrowser(BaseBrowser):
|
|
|
def blog_info_handler(self, response):
|
|
|
if response is None or response.status != 200:
|
|
|
return
|
|
|
- if self.id in response.url:
|
|
|
+ if '/info' in response.url:
|
|
|
+ info = get_blog_by_rsp(response)
|
|
|
+ if info is not None:
|
|
|
+ self.result = info
|
|
|
+ elif self.id in response.url:
|
|
|
logging.info(f'get {self.id} blog response')
|
|
|
- self.result = get_blog_by_doc(response)
|
|
|
+ doc = get_blog_by_doc(response)
|
|
|
+ if doc is not None:
|
|
|
+ self.result = doc
|
|
|
+
|
|
|
|
|
|
def user_info_handler(self, response):
|
|
|
if response is None or response.status != 200:
|