Browse Source

Merge remote-tracking branch 'peugeot/beeg'

Philipp Hagemeister 11 years ago
parent
commit
2d7af09487
2 changed files with 49 additions and 0 deletions
  1. 1 0
      youtube_dl/extractor/__init__.py
  2. 48 0
      youtube_dl/extractor/beeg.py

+ 1 - 0
youtube_dl/extractor/__init__.py

@@ -24,6 +24,7 @@ from .auengine import AUEngineIE
 from .bambuser import BambuserIE, BambuserChannelIE
 from .bambuser import BambuserIE, BambuserChannelIE
 from .bandcamp import BandcampIE, BandcampAlbumIE
 from .bandcamp import BandcampIE, BandcampAlbumIE
 from .bbccouk import BBCCoUkIE
 from .bbccouk import BBCCoUkIE
+from .beeg import BeegIE
 from .bilibili import BiliBiliIE
 from .bilibili import BiliBiliIE
 from .blinkx import BlinkxIE
 from .blinkx import BlinkxIE
 from .bliptv import BlipTVIE, BlipTVUserIE
 from .bliptv import BlipTVIE, BlipTVUserIE

+ 48 - 0
youtube_dl/extractor/beeg.py

@@ -0,0 +1,48 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class BeegIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://beeg.com/5416503',
+        'md5': '634526ae978711f6b748fe0dd6c11f57',
+        'info_dict': {
+            'id': '5416503',
+            'ext': 'mp4',
+            'title': 'Sultry Striptease',
+            'description': 'md5:6db3c6177972822aaba18652ff59c773',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        video_url = self._html_search_regex(r"'480p'\s*:\s*'([^']+)'", webpage, 'video URL')
+
+        title = self._html_search_regex(r'<title>([^<]+)\s*-\s*beeg\.?</title>', webpage, 'title')
+        
+        description = self._html_search_regex(
+            r'<meta name="description" content="([^"]*)"', webpage, 'description', fatal=False)
+        
+        thumbnail = self._html_search_regex(
+            r'\'previewer.url\'\s*:\s*"([^"]*)"', webpage, 'thumbnail', fatal=False)
+
+        categories_str = self._html_search_regex(
+            r'<meta name="keywords" content="([^"]+)"', webpage, 'categories', fatal=False)
+        categories = categories_str.split(',')
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'categories': categories,
+        }