فهرست منبع

changed spaces to tabs (by yt-dl standards), fixed bugs, but still won't download. need to figure out how the whole process works to integrate correctly

Kevin Ngo 14 سال پیش
والد
کامیت
b20d4f8626
1فایلهای تغییر یافته به همراه50 افزوده شده و 43 حذف شده
  1. 50 43
      youtube-dl

+ 50 - 43
youtube-dl

@@ -3481,20 +3481,20 @@ class XVideosIE(InfoExtractor):
 			self._downloader.trouble(u'\nERROR: unable to download ' + video_id)
 
 
-class SoundcloudIE(InformationExtractor):
+class SoundcloudIE(InfoExtractor):
 	"""Information extractor for soundcloud.com
-       To access the media, the uid of the song and a stream token
-       must be extracted from the page source and the script must make
-       a request to media.soundcloud.com/crossdomain.xml. Then
-       the media can be grabbed by requesting from an url composed
-       of the stream token and uid
-     """
+	   To access the media, the uid of the song and a stream token
+	   must be extracted from the page source and the script must make
+	   a request to media.soundcloud.com/crossdomain.xml. Then
+	   the media can be grabbed by requesting from an url composed
+	   of the stream token and uid
+	 """
 
 	_VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)'
 	IE_NAME = u'soundcloud'
 
-    def __init__(self, downloader=None):
-        InfoExtractor.__init__(self, downloader)
+	def __init__(self, downloader=None):
+		InfoExtractor.__init__(self, downloader)
 
 	def report_webpage(self, video_id):
 		"""Report information extraction."""
@@ -3504,8 +3504,8 @@ class SoundcloudIE(InformationExtractor):
 		"""Report information extraction."""
 		self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
 
-    def _real_initialize(self):
-        return
+	def _real_initialize(self):
+		return
 
 	def _real_extract(self, url):
 		htmlParser = HTMLParser.HTMLParser()
@@ -3515,10 +3515,10 @@ class SoundcloudIE(InformationExtractor):
 			self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
 			return
 
-        # extract uploader (which is in the url)
-        uploader = mobj.group(3).decode('utf-8')
-        # extract simple title (uploader + slug of song title)
-		slug_title =  mobj.group(4).decode('utf-8')
+		# extract uploader (which is in the url)
+		uploader = mobj.group(1).decode('utf-8')
+		# extract simple title (uploader + slug of song title)
+		slug_title =  mobj.group(2).decode('utf-8')
 		simple_title = uploader + '-' + slug_title
 
 		self.report_webpage('%s/%s' % (uploader, slug_title))
@@ -3532,32 +3532,36 @@ class SoundcloudIE(InformationExtractor):
 
 		self.report_extraction('%s/%s' % (uploader, slug_title))
 
-        # extract uid and access token
-        mobj = re.search('"uid":"([\w\d]+?)".*?stream_token=([\w\d]+)', page)   
-        if mobj:
-            video_id = match.group(1)
-            stream_token = match.group(2)
-
-        # construct media url (with uid/token) to request song
-        mediaURL = "http://media.soundcloud.com/stream/%s?stream_token=%s"
-        mediaURL = mediaURL % (video_id, stream_token)
-
-        # description
-        description = u'No description available'
-        mobj = re.search('track-description-value"><p>(.*?)</p>', page)
-        if mobj:
-            description = mobj.group(1)
-        
-        # upload date
-        mobj = re.search("pretty-date'>on ([\w]+ [\d]+, [\d]+ \d+:\d+)</abbr></h2>", page)
-        if mobj:
-            try:
-    		    upload_date = datetime.datetime.strptime(match.group(1), '%B %d, %Y %H:%M').strftime('%Y%m%d')
-            except:
-                pass
-
-        try:
-            self._download.process_info({
+		# extract uid and access token
+		mobj = re.search('"uid":"([\w\d]+?)".*?stream_token=([\w\d]+)', page)   
+		if mobj:
+			video_id = match.group(1)
+			stream_token = match.group(2)
+
+		# construct media url (with uid/token) to request song
+		mediaURL = "http://media.soundcloud.com/stream/%s?stream_token=%s"
+		mediaURL = mediaURL % (video_id, stream_token)
+
+		# description
+		description = u'No description available'
+		mobj = re.search('track-description-value"><p>(.*?)</p>', page)
+		if mobj:
+			description = mobj.group(1)
+		
+		# upload date
+		mobj = re.search("pretty-date'>on ([\w]+ [\d]+, [\d]+ \d+:\d+)</abbr></h2>", page)
+		if mobj:
+			try:
+				upload_date = datetime.datetime.strptime(match.group(1), '%B %d, %Y %H:%M').strftime('%Y%m%d')
+			except:
+				pass
+
+		# for soundcloud, a request must be made to a cross domain to establish
+		# needed cookies
+		request = urllib2.Request('http://media.soundcloud.com/crossdomain.xml', std_headers)
+
+		try:
+			self._downloader.process_info({
 				'id':		video_id,
 				'url':		video_url,
 				'uploader':	uploader,
@@ -3567,8 +3571,10 @@ class SoundcloudIE(InformationExtractor):
 				'ext':		u'mp3',
 				'format':	u'NA',
 				'player_url':	None,
-                'description': description
-            })
+				'description': description
+			})
+		except UnavailableVideoError:
+			self._downloader.trouble(u'\nERROR: unable to download video')
 
 class PostProcessor(object):
 	"""Post Processor class.
@@ -3966,6 +3972,7 @@ def gen_extractors():
 		EscapistIE(),
 		CollegeHumorIE(),
 		XVideosIE(),
+        SoundcloudIE(),
 
 		GenericIE()
 	]