url_util.py 982 B

123456789101112131415161718192021222324252627282930313233343536373839404142
  1. import re
  2. import requests
  3. from urllib.parse import urlparse
  4. def get_expanded_url(url: str) -> str:
  5. """
  6. 展开短链接到完整URL
  7. """
  8. try:
  9. response = requests.head(url, allow_redirects=True)
  10. return response.url
  11. except Exception as e:
  12. return url
  13. def get_id_by_url(url: str) -> str:
  14. """
  15. 从URL中提取ID
  16. Args:
  17. url: 输入的URL字符串
  18. Returns:
  19. 从URL路径中提取的最后一个部分作为ID
  20. """
  21. # 短链接匹配模式(这里假设使用类似的正则表达式)
  22. # https://www.instagram.com/share/_nFwInAGM
  23. short_link_pattern = re.compile(r'http[s]?://www\.instagram\.com/share')
  24. # 如果是短链接,先展开
  25. if short_link_pattern.search(url):
  26. url = get_expanded_url(url)
  27. # 解析URL并获取路径
  28. parsed_url = urlparse(url)
  29. path_parts = parsed_url.path.split('/')
  30. # 返回路径最后一个部分
  31. return path_parts[-1]