12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182 |
- """
- """
- import json
- import re
- import jsonpath
- from util import json_util
- blog_require_fields = ['code', 'pk', 'id', 'taken_at', 'image_versions2', 'user', 'media_type', 'carousel_media',
- 'carousel_media_count',
- 'like_count', 'comment_count', 'caption', 'caption_is_edited']
- user_require_fields = ['pk', 'id', 'username', 'full_name', 'profile_pic_url', 'latest_reel_media', 'follower_count',
- 'following_count', 'media_count']
- def get_blog_by_doc(response):
- item = get_blog_json2(response.text())
- item = {k: v for k, v in item.items() if k in blog_require_fields}
- item['cover'] = item['image_versions2']['candidates'][0]['url']
- item['image_versions2'] = None
- item['user'] = {k: v for k, v in item['user'].items() if k in user_require_fields}
- return item
- def get_blog_json(html_content):
-
- inside_items = False
- items_buffer = ""
- for line in html_content.splitlines():
-
- line = line.strip()
- if '"xdt_api__v1__media__shortcode__web_info"' in line:
- items_buffer = '{'
- inside_items = True
- continue
-
- if inside_items:
- items_buffer += line
-
- if '"items": [' in items_buffer and ']' in items_buffer:
- try:
-
- data = json.loads(items_buffer)
-
- return data
- except json.JSONDecodeError:
- continue
- return None
- def get_blog_json2(html_content):
-
- for line in html_content.splitlines():
-
- line = line.strip()
- if '"xdt_api__v1__media__shortcode__web_info"' in line:
- script_pattern = re.compile(r'<script\s+type="application/json"[^>]*>(.*?)</script>', re.DOTALL)
-
- json_str = re.findall(script_pattern, line)
- data = json.loads(json_str[0])
- jsonpath_expr = '$..xdt_api__v1__media__shortcode__web_info.items[0]'
- data = jsonpath.jsonpath(data, jsonpath_expr)
- if data:
- return data[0]
- return None
- def get_user_by_request(response):
- response_json = response.json()
- if response_json.get('status') == 'ok' and 'data' in response_json:
- user = response_json['data']['user']
- user = {k: v for k, v in user.items() if k in user_require_fields}
- return user
- else:
- return None
|