Browse Source

[BehindKink] Add new extractor

5moufl 11 years ago
parent
commit
2bca84e345
2 changed files with 57 additions and 0 deletions
  1. 1 0
      youtube_dl/extractor/__init__.py
  2. 56 0
      youtube_dl/extractor/behindkink.py

+ 1 - 0
youtube_dl/extractor/__init__.py

@@ -25,6 +25,7 @@ from .bambuser import BambuserIE, BambuserChannelIE
 from .bandcamp import BandcampIE, BandcampAlbumIE
 from .bandcamp import BandcampIE, BandcampAlbumIE
 from .bbccouk import BBCCoUkIE
 from .bbccouk import BBCCoUkIE
 from .beeg import BeegIE
 from .beeg import BeegIE
+from .behindkink import BehindKinkIE
 from .bilibili import BiliBiliIE
 from .bilibili import BiliBiliIE
 from .blinkx import BlinkxIE
 from .blinkx import BlinkxIE
 from .bliptv import BlipTVIE, BlipTVUserIE
 from .bliptv import BlipTVIE, BlipTVUserIE

+ 56 - 0
youtube_dl/extractor/behindkink.py

@@ -0,0 +1,56 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import url_basename
+
+
+class BehindKinkIE(InfoExtractor):
+    _VALID_URL = r'(?:http://)(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/?_]+)'
+    _TEST = {
+        'url': 'http://www.behindkink.com/2014/08/14/ab1576-performers-voice-finally-heard-the-bill-is-killed/',
+        'md5': '41ad01222b8442089a55528fec43ec01',
+        'info_dict': {
+            'id': '36370',
+            'ext': 'mp4',
+            'title': 'AB1576 - PERFORMERS VOICE FINALLY HEARD - THE BILL IS KILLED!',
+            'description': 'The adult industry voice was finally heard as Assembly Bill 1576 remained\xa0 in suspense today at the Senate Appropriations Hearing. AB1576 was, among other industry damaging issues, a condom mandate...',
+            'upload_date': '20140814',
+            'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/08/36370_AB1576_Win.jpg',
+            'age_limit': 18,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        display_id = mobj.group('id')
+        year = mobj.group('year')
+        month = mobj.group('month')
+        day = mobj.group('day')
+        upload_date = year + month + day
+
+        webpage_url = 'http://www.behindkink.com/' + year + '/' + month + '/' + day + '/' + display_id
+        webpage = self._download_webpage(webpage_url, display_id)
+
+        self.report_extraction(display_id)
+        video_url = self._search_regex(
+            r"'file':\s*'([^']+)'",
+            webpage, 'URL base')
+
+        video_id = url_basename(video_url)
+        video_id = video_id.split('_')[0]
+        self.report_extraction(video_id)
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'ext': 'mp4',
+            'title': self._og_search_title(webpage),
+            'display_id': display_id,
+            'thumbnail': self._og_search_thumbnail(webpage),
+            'description': self._og_search_description(webpage),
+            'upload_date': upload_date,
+            'age_limit': 18,
+        }