فهرست منبع

InstagramIE: fix the extraction of the uploader_id and the title

The page title is now 'Instagram', so we build it.
Also extract the description
Jaime Marquínez Ferrándiz 12 سال پیش
والد
کامیت
3f40217704
1فایلهای تغییر یافته به همراه10 افزوده شده و 12 حذف شده
  1. 10 12
      youtube_dl/extractor/instagram.py

+ 10 - 12
youtube_dl/extractor/instagram.py

@@ -10,7 +10,8 @@ class InstagramIE(InfoExtractor):
         u'md5': u'0d2da106a9d2631273e192b372806516',
         u'md5': u'0d2da106a9d2631273e192b372806516',
         u'info_dict': {
         u'info_dict': {
             u"uploader_id": u"naomipq", 
             u"uploader_id": u"naomipq", 
-            u"title": u"Video by naomipq"
+            u"title": u"Video by naomipq",
+            u'description': u'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
         }
         }
     }
     }
 
 
@@ -18,20 +19,17 @@ class InstagramIE(InfoExtractor):
         mobj = re.match(self._VALID_URL, url)
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group(1)
         video_id = mobj.group(1)
         webpage = self._download_webpage(url, video_id)
         webpage = self._download_webpage(url, video_id)
-        html_title = self._html_search_regex(
-            r'<title>(.+?)</title>',
-            webpage, u'title', flags=re.DOTALL)
-        title = re.sub(u'(?: *\(Videos?\))? \u2022 Instagram$', '', html_title).strip()
-        uploader_id = self._html_search_regex(
-            r'<div class="media-user" id="media_user">.*?<h2><a href="[^"]*">([^<]*)</a></h2>',
-            webpage, u'uploader id', fatal=False, flags=re.DOTALL)
-        ext = 'mp4'
+        uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"',
+            webpage, u'uploader id', fatal=False)
+        desc = self._search_regex(r'"caption":"(.*?)"', webpage, u'description',
+            fatal=False)
 
 
         return [{
         return [{
             'id':        video_id,
             'id':        video_id,
             'url':       self._og_search_video_url(webpage),
             'url':       self._og_search_video_url(webpage),
-            'ext':       ext,
-            'title':     title,
+            'ext':       'mp4',
+            'title':     u'Video by %s' % uploader_id,
             'thumbnail': self._og_search_thumbnail(webpage),
             'thumbnail': self._og_search_thumbnail(webpage),
-            'uploader_id' : uploader_id
+            'uploader_id' : uploader_id,
+            'description': desc,
         }]
         }]