gaskrank.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..utils import (
  6. float_or_none,
  7. int_or_none,
  8. unified_strdate,
  9. )
  10. class GaskrankIE(InfoExtractor):
  11. """InfoExtractor for gaskrank.tv"""
  12. _VALID_URL = r'https?://(?:www\.)?gaskrank\.tv/tv/(?P<categories>[^/]+)/(?P<id>[^/]+)\.html?'
  13. _TESTS = [
  14. {
  15. 'url': 'http://www.gaskrank.tv/tv/motorrad-fun/strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden.htm',
  16. 'md5': '1ae88dbac97887d85ebd1157a95fc4f9',
  17. 'info_dict': {
  18. 'id': '201601/26955',
  19. 'ext': 'mp4',
  20. 'title': 'Strike! Einparken können nur Männer - Flurschaden hält sich in Grenzen *lol*',
  21. 'thumbnail': r're:^https?://.*\.jpg$',
  22. 'categories': ['motorrad-fun'],
  23. 'display_id': 'strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden',
  24. 'uploader_id': 'Bikefun',
  25. 'upload_date': '20170110',
  26. 'uploader_url': None,
  27. }
  28. },
  29. {
  30. 'url': 'http://www.gaskrank.tv/tv/racing/isle-of-man-tt-2011-michael-du-15920.htm',
  31. 'md5': 'c33ee32c711bc6c8224bfcbe62b23095',
  32. 'info_dict': {
  33. 'id': '201106/15920',
  34. 'ext': 'mp4',
  35. 'title': 'Isle of Man - Michael Dunlop vs Guy Martin - schwindelig kucken',
  36. 'thumbnail': r're:^https?://.*\.jpg$',
  37. 'categories': ['racing'],
  38. 'display_id': 'isle-of-man-tt-2011-michael-du-15920',
  39. 'uploader_id': 'IOM',
  40. 'upload_date': '20160506',
  41. 'uploader_url': 'www.iomtt.com',
  42. }
  43. }
  44. ]
  45. def _real_extract(self, url):
  46. """extract information from gaskrank.tv"""
  47. display_id = self._match_id(url)
  48. webpage = self._download_webpage(url, display_id)
  49. categories = [re.match(self._VALID_URL, url).group('categories')]
  50. title = self._og_search_title(webpage, default=None) or self._html_search_meta(
  51. 'title', webpage, fatal=True)
  52. mobj = re.search(
  53. r'Video von:\s*(?P<uploader_id>[^|]*?)\s*\|\s*vom:\s*(?P<upload_date>[0-9][0-9]\.[0-9][0-9]\.[0-9][0-9][0-9][0-9])',
  54. webpage)
  55. if mobj is not None:
  56. uploader_id = mobj.groupdict().get('uploader_id')
  57. upload_date = unified_strdate(mobj.groupdict().get('upload_date'))
  58. uploader_url = self._search_regex(
  59. r'Homepage:\s*<[^>]*>(?P<uploader_url>[^<]*)',
  60. webpage, 'uploader_url', default=None)
  61. tags = re.findall(
  62. r'/tv/tags/[^/]+/"\s*>(?P<tag>[^<]*?)<',
  63. webpage)
  64. view_count = self._search_regex(
  65. r'class\s*=\s*"gkRight"(?:[^>]*>\s*<[^>]*)*icon-eye-open(?:[^>]*>\s*<[^>]*)*>\s*(?P<view_count>[0-9\.]*)',
  66. webpage, 'view_count', default=None)
  67. if view_count:
  68. view_count = int_or_none(view_count.replace('.', ''))
  69. average_rating = self._search_regex(
  70. r'itemprop\s*=\s*"ratingValue"[^>]*>\s*(?P<average_rating>[0-9,]+)',
  71. webpage, 'average_rating')
  72. if average_rating:
  73. average_rating = float_or_none(average_rating.replace(',', '.'))
  74. video_id = self._search_regex(
  75. r'https?://movies\.gaskrank\.tv/([^-]*?)(-[^\.]*)?\.mp4',
  76. webpage, 'video id')
  77. entry = self._parse_html5_media_entries(url, webpage, video_id)[0]
  78. entry.update({
  79. 'id': video_id,
  80. 'title': title,
  81. 'categories': categories,
  82. 'display_id': display_id,
  83. 'uploader_id': uploader_id,
  84. 'upload_date': upload_date,
  85. 'uploader_url': uploader_url,
  86. 'tags': tags,
  87. 'view_count': view_count,
  88. 'average_rating': average_rating,
  89. })
  90. self._sort_formats(entry['formats'])
  91. return entry