123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104 |
- """
- """
- import json
- import logging
- import re
- import jsonpath
- from util import json_util
- user_require_fields = ['id', 'name', 'profile_picture', 'profile_url']
- def get_post_by_doc(response):
- data = get_post_json(response.text())[0]['node']
- content_story = jsonpath.jsonpath(data, 'comet_sections.content.story')[0]
- logging.info('获取成功')
-
-
-
-
-
- actor = jsonpath.jsonpath(data, '$..comet_sections.context_layout.story.comet_sections.actor_photo.story.actors[0]')[0]
- actor = {k: v for k, v in actor.items() if k in user_require_fields}
- result = {
- 'text': content_story['message']['text'],
- 'attachments': jsonpath.jsonpath(data, '$..styles.attachment.all_subattachments.nodes'),
- 'post_id': content_story['post_id'],
- 'actor': actor,
- 'creation_time': jsonpath.jsonpath(data, '$..comet_sections.context_layout.story.comet_sections.metadata[0].story.creation_time')[0],
- 'id': content_story['id'],
- 'reaction_count': jsonpath.jsonpath(data, '$..comet_ufi_summary_and_actions_renderer.feedback.reaction_count.count')[0],
- 'comment_count': jsonpath.jsonpath(data, '$..comment_rendering_instance_for_feed_location.comments.total_count')[0],
- 'share_count': jsonpath.jsonpath(data, '$..comet_ufi_summary_and_actions_renderer.feedback.share_count.count')[0]
- }
-
- return result
- def get_blog_json(html_content):
-
- inside_items = False
- items_buffer = ""
- for line in html_content.splitlines():
-
- line = line.strip()
- if '"xdt_api__v1__media__shortcode__web_info"' in line:
- items_buffer = '{'
- inside_items = True
- continue
-
- if inside_items:
- items_buffer += line
-
- if '"items": [' in items_buffer and ']' in items_buffer:
- try:
-
- data = json.loads(items_buffer)
-
- return data
- except json.JSONDecodeError:
- continue
- return None
- def get_post_json(html_content):
-
- for line in html_content.splitlines():
-
- line = line.strip()
-
- pattern = r'adp_Comet\w+ContentQueryRelayPreloader_\w+",\{(.*)'
-
- match = re.search(pattern, line, re.DOTALL)
- if match:
-
- json_part = match.group(1)
- pattern = r'\"data\"\:\{(.*)}'
- json_part = re.search(pattern, json_part, re.DOTALL)
- data = json_util.parse_json_from_string('{' + json_part.group(1))
-
-
-
-
-
- return data
- return None
- def get_user_by_request(response):
- response_json = response.json()
- if response_json.get('status') == 'ok' and 'data' in response_json:
- user = response_json['data']['user']
- user = {k: v for k, v in user.items() if k in user_require_fields}
- return user
- else:
- return None
|