123456789101112131415161718192021222324252627282930313233343536373839404142 |
- import re
- import requests
- from urllib.parse import urlparse
- def get_expanded_url(url: str) -> str:
- """
- 展开短链接到完整URL
- """
- try:
- response = requests.head(url, allow_redirects=True)
- return response.url
- except Exception as e:
- return url
- def get_id_by_url(url: str) -> str:
- """
- 从URL中提取ID
-
- Args:
- url: 输入的URL字符串
-
- Returns:
- 从URL路径中提取的最后一个部分作为ID
- """
- # 短链接匹配模式(这里假设使用类似的正则表达式)
- # https://www.instagram.com/share/_nFwInAGM
- short_link_pattern = re.compile(r'http[s]?://www\.instagram\.com/share')
- # 如果是短链接,先展开
- if short_link_pattern.search(url):
- url = get_expanded_url(url)
- # 解析URL并获取路径
- parsed_url = urlparse(url)
- path_parts = parsed_url.path.split('/')
- # 返回路径最后一个部分
- return path_parts[-1]
|