Browse Source

Merge remote-tracking branch 'upstream/master'

Allan Zhou 12 years ago
parent
commit
591078babf

+ 2 - 2
devscripts/youtube_genalgo.py

@@ -14,9 +14,9 @@ tests = [
     # 89 
     # 89 
     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'",
     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'",
      "/?;:|}<[{=+-_)(*&^%$#@!MqBVCXZASDFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuyt"),
      "/?;:|}<[{=+-_)(*&^%$#@!MqBVCXZASDFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuyt"),
-    # 88
+    # 88 - vflapUV9V 2013/08/28
     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<",
     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<",
-     "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"),
+     "ioplkjhgfdsazxcvbnm12<4567890QWERTYUIOZLKJHGFDSAeXCVBNM!@#$%^&*()_-+={[]}|:;?/>.3"),
     # 87
     # 87
     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
      "uioplkjhgfdsazxcvbnm1t34567890QWE2TYUIOPLKJHGFDSAZXCVeNM!@#$^&*()_-+={[]}|:;?/>.<"),
      "uioplkjhgfdsazxcvbnm1t34567890QWE2TYUIOPLKJHGFDSAZXCVeNM!@#$^&*()_-+={[]}|:;?/>.<"),

+ 17 - 11
youtube_dl/FileDownloader.py

@@ -63,6 +63,17 @@ class FileDownloader(object):
         converted = float(bytes) / float(1024 ** exponent)
         converted = float(bytes) / float(1024 ** exponent)
         return '%.2f%s' % (converted, suffix)
         return '%.2f%s' % (converted, suffix)
 
 
+    @staticmethod
+    def format_seconds(seconds):
+        (mins, secs) = divmod(seconds, 60)
+        (hours, eta_mins) = divmod(mins, 60)
+        if hours > 99:
+            return '--:--:--'
+        if hours == 0:
+            return '%02d:%02d' % (mins, secs)
+        else:
+            return '%02d:%02d:%02d' % (hours, mins, secs)
+
     @staticmethod
     @staticmethod
     def calc_percent(byte_counter, data_len):
     def calc_percent(byte_counter, data_len):
         if data_len is None:
         if data_len is None:
@@ -78,14 +89,7 @@ class FileDownloader(object):
             return '--:--'
             return '--:--'
         rate = float(current) / dif
         rate = float(current) / dif
         eta = int((float(total) - float(current)) / rate)
         eta = int((float(total) - float(current)) / rate)
-        (eta_mins, eta_secs) = divmod(eta, 60)
-        (eta_hours, eta_mins) = divmod(eta_mins, 60)
-        if eta_hours > 99:
-            return '--:--:--'
-        if eta_hours == 0:
-            return '%02d:%02d' % (eta_mins, eta_secs)
-        else:
-            return '%02d:%02d:%02d' % (eta_hours, eta_mins, eta_secs)
+        return FileDownloader.format_seconds(eta)
 
 
     @staticmethod
     @staticmethod
     def calc_speed(start, now, bytes):
     def calc_speed(start, now, bytes):
@@ -234,12 +238,14 @@ class FileDownloader(object):
         """Report it was impossible to resume download."""
         """Report it was impossible to resume download."""
         self.to_screen(u'[download] Unable to resume')
         self.to_screen(u'[download] Unable to resume')
 
 
-    def report_finish(self):
+    def report_finish(self, data_len_str, tot_time):
         """Report download finished."""
         """Report download finished."""
         if self.params.get('noprogress', False):
         if self.params.get('noprogress', False):
             self.to_screen(u'[download] Download completed')
             self.to_screen(u'[download] Download completed')
         else:
         else:
-            self.to_screen(u'')
+            clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
+            self.to_screen(u'\r%s[download] 100%% of %s in %s' %
+                (clear_line, data_len_str, self.format_seconds(tot_time)))
 
 
     def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url):
     def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url):
         self.report_destination(filename)
         self.report_destination(filename)
@@ -542,7 +548,7 @@ class FileDownloader(object):
             self.report_error(u'Did not get any data blocks')
             self.report_error(u'Did not get any data blocks')
             return False
             return False
         stream.close()
         stream.close()
-        self.report_finish()
+        self.report_finish(data_len_str, (time.time() - start))
         if data_len is not None and byte_counter != data_len:
         if data_len is not None and byte_counter != data_len:
             raise ContentTooShortError(byte_counter, int(data_len))
             raise ContentTooShortError(byte_counter, int(data_len))
         self.try_rename(tmpfilename, filename)
         self.try_rename(tmpfilename, filename)

+ 2 - 2
youtube_dl/PostProcessor.py

@@ -137,7 +137,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
         try:
         try:
             FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
             FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
         except FFmpegPostProcessorError as err:
         except FFmpegPostProcessorError as err:
-            raise AudioConversionError(err.message)
+            raise AudioConversionError(err.msg)
 
 
     def run(self, information):
     def run(self, information):
         path = information['filepath']
         path = information['filepath']
@@ -207,7 +207,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
         except:
         except:
             etype,e,tb = sys.exc_info()
             etype,e,tb = sys.exc_info()
             if isinstance(e, AudioConversionError):
             if isinstance(e, AudioConversionError):
-                msg = u'audio conversion failed: ' + e.message
+                msg = u'audio conversion failed: ' + e.msg
             else:
             else:
                 msg = u'error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg')
                 msg = u'error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg')
             raise PostProcessingError(msg)
             raise PostProcessingError(msg)

+ 17 - 4
youtube_dl/YoutubeDL.py

@@ -97,6 +97,7 @@ class YoutubeDL(object):
     def __init__(self, params):
     def __init__(self, params):
         """Create a FileDownloader object with the given options."""
         """Create a FileDownloader object with the given options."""
         self._ies = []
         self._ies = []
+        self._ies_instances = {}
         self._pps = []
         self._pps = []
         self._progress_hooks = []
         self._progress_hooks = []
         self._download_retcode = 0
         self._download_retcode = 0
@@ -111,8 +112,21 @@ class YoutubeDL(object):
     def add_info_extractor(self, ie):
     def add_info_extractor(self, ie):
         """Add an InfoExtractor object to the end of the list."""
         """Add an InfoExtractor object to the end of the list."""
         self._ies.append(ie)
         self._ies.append(ie)
+        self._ies_instances[ie.ie_key()] = ie
         ie.set_downloader(self)
         ie.set_downloader(self)
 
 
+    def get_info_extractor(self, ie_key):
+        """
+        Get an instance of an IE with name ie_key, it will try to get one from
+        the _ies list, if there's no instance it will create a new one and add
+        it to the extractor list.
+        """
+        ie = self._ies_instances.get(ie_key)
+        if ie is None:
+            ie = get_info_extractor(ie_key)()
+            self.add_info_extractor(ie)
+        return ie
+
     def add_default_info_extractors(self):
     def add_default_info_extractors(self):
         """
         """
         Add the InfoExtractors returned by gen_extractors to the end of the list
         Add the InfoExtractors returned by gen_extractors to the end of the list
@@ -294,9 +308,7 @@ class YoutubeDL(object):
          '''
          '''
         
         
         if ie_key:
         if ie_key:
-            ie = get_info_extractor(ie_key)()
-            ie.set_downloader(self)
-            ies = [ie]
+            ies = [self.get_info_extractor(ie_key)]
         else:
         else:
             ies = self._ies
             ies = self._ies
 
 
@@ -448,7 +460,8 @@ class YoutubeDL(object):
         if self.params.get('forceid', False):
         if self.params.get('forceid', False):
             compat_print(info_dict['id'])
             compat_print(info_dict['id'])
         if self.params.get('forceurl', False):
         if self.params.get('forceurl', False):
-            compat_print(info_dict['url'])
+            # For RTMP URLs, also include the playpath
+            compat_print(info_dict['url'] + info_dict.get('play_path', u''))
         if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
         if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
             compat_print(info_dict['thumbnail'])
             compat_print(info_dict['thumbnail'])
         if self.params.get('forcedescription', False) and 'description' in info_dict:
         if self.params.get('forcedescription', False) and 'description' in info_dict:

+ 19 - 4
youtube_dl/__init__.py

@@ -45,6 +45,7 @@ import sys
 import warnings
 import warnings
 import platform
 import platform
 
 
+
 from .utils import *
 from .utils import *
 from .update import update_self
 from .update import update_self
 from .version import __version__
 from .version import __version__
@@ -99,6 +100,16 @@ def parseOpts(overrideArguments=None):
             pass
             pass
         return None
         return None
 
 
+    def _hide_login_info(opts):
+        opts = list(opts)
+        for private_opt in ['-p', '--password', '-u', '--username']:
+            try:
+                i = opts.index(private_opt)
+                opts[i+1] = '<PRIVATE>'
+            except ValueError:
+                pass
+        return opts
+
     max_width = 80
     max_width = 80
     max_help_position = 80
     max_help_position = 80
 
 
@@ -357,9 +368,9 @@ def parseOpts(overrideArguments=None):
         argv = systemConf + userConf + commandLineConf
         argv = systemConf + userConf + commandLineConf
         opts, args = parser.parse_args(argv)
         opts, args = parser.parse_args(argv)
         if opts.verbose:
         if opts.verbose:
-            sys.stderr.write(u'[debug] System config: ' + repr(systemConf) + '\n')
-            sys.stderr.write(u'[debug] User config: ' + repr(userConf) + '\n')
-            sys.stderr.write(u'[debug] Command-line args: ' + repr(commandLineConf) + '\n')
+            sys.stderr.write(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
+            sys.stderr.write(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')
+            sys.stderr.write(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')
 
 
     return parser, opts, args
     return parser, opts, args
 
 
@@ -430,6 +441,10 @@ def _real_main(argv=None):
     proxy_handler = compat_urllib_request.ProxyHandler(proxies)
     proxy_handler = compat_urllib_request.ProxyHandler(proxies)
     https_handler = make_HTTPS_handler(opts)
     https_handler = make_HTTPS_handler(opts)
     opener = compat_urllib_request.build_opener(https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
     opener = compat_urllib_request.build_opener(https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
+    # Delete the default user-agent header, which would otherwise apply in
+    # cases where our custom HTTP handler doesn't come into play
+    # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
+    opener.addheaders =[]
     compat_urllib_request.install_opener(opener)
     compat_urllib_request.install_opener(opener)
     socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
     socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
 
 
@@ -607,7 +622,7 @@ def _real_main(argv=None):
                 sys.exc_clear()
                 sys.exc_clear()
             except:
             except:
                 pass
                 pass
-        sys.stderr.write(u'[debug] Python version %s - %s' %(platform.python_version(), platform.platform()) + u'\n')
+        sys.stderr.write(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n')
         sys.stderr.write(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n')
         sys.stderr.write(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n')
 
 
     ydl.add_default_info_extractors()
     ydl.add_default_info_extractors()

+ 202 - 0
youtube_dl/aes.py

@@ -0,0 +1,202 @@
+__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_decrypt_text']
+
+import base64
+from math import ceil
+
+from .utils import bytes_to_intlist, intlist_to_bytes
+
+BLOCK_SIZE_BYTES = 16
+
+def aes_ctr_decrypt(data, key, counter):
+    """
+    Decrypt with aes in counter mode
+    
+    @param {int[]} data        cipher
+    @param {int[]} key         16/24/32-Byte cipher key
+    @param {instance} counter  Instance whose next_value function (@returns {int[]}  16-Byte block)
+                               returns the next counter block
+    @returns {int[]}           decrypted data
+    """
+    expanded_key = key_expansion(key)
+    block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
+    
+    decrypted_data=[]
+    for i in range(block_count):
+        counter_block = counter.next_value()
+        block = data[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES]
+        block += [0]*(BLOCK_SIZE_BYTES - len(block))
+        
+        cipher_counter_block = aes_encrypt(counter_block, expanded_key)
+        decrypted_data += xor(block, cipher_counter_block)
+    decrypted_data = decrypted_data[:len(data)]
+    
+    return decrypted_data
+
+def key_expansion(data):
+    """
+    Generate key schedule
+    
+    @param {int[]} data  16/24/32-Byte cipher key
+    @returns {int[]}     176/208/240-Byte expanded key 
+    """
+    data = data[:] # copy
+    rcon_iteration = 1
+    key_size_bytes = len(data)
+    expanded_key_size_bytes = (key_size_bytes // 4 + 7) * BLOCK_SIZE_BYTES
+    
+    while len(data) < expanded_key_size_bytes:
+        temp = data[-4:]
+        temp = key_schedule_core(temp, rcon_iteration)
+        rcon_iteration += 1
+        data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
+        
+        for _ in range(3):
+            temp = data[-4:]
+            data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
+        
+        if key_size_bytes == 32:
+            temp = data[-4:]
+            temp = sub_bytes(temp)
+            data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
+        
+        for _ in range(3 if key_size_bytes == 32  else 2 if key_size_bytes == 24 else 0):
+            temp = data[-4:]
+            data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
+    data = data[:expanded_key_size_bytes]
+    
+    return data
+
+def aes_encrypt(data, expanded_key):
+    """
+    Encrypt one block with aes
+    
+    @param {int[]} data          16-Byte state
+    @param {int[]} expanded_key  176/208/240-Byte expanded key 
+    @returns {int[]}             16-Byte cipher
+    """
+    rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
+    
+    data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
+    for i in range(1, rounds+1):
+        data = sub_bytes(data)
+        data = shift_rows(data)
+        if i != rounds:
+            data = mix_columns(data)
+        data = xor(data, expanded_key[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES])
+    
+    return data
+
+def aes_decrypt_text(data, password, key_size_bytes):
+    """
+    Decrypt text
+    - The first 8 Bytes of decoded 'data' are the 8 high Bytes of the counter
+    - The cipher key is retrieved by encrypting the first 16 Byte of 'password'
+      with the first 'key_size_bytes' Bytes from 'password' (if necessary filled with 0's)
+    - Mode of operation is 'counter'
+    
+    @param {str} data                    Base64 encoded string
+    @param {str,unicode} password        Password (will be encoded with utf-8)
+    @param {int} key_size_bytes          Possible values: 16 for 128-Bit, 24 for 192-Bit or 32 for 256-Bit
+    @returns {str}                       Decrypted data
+    """
+    NONCE_LENGTH_BYTES = 8
+    
+    data = bytes_to_intlist(base64.b64decode(data))
+    password = bytes_to_intlist(password.encode('utf-8'))
+    
+    key = password[:key_size_bytes] + [0]*(key_size_bytes - len(password))
+    key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES)
+    
+    nonce = data[:NONCE_LENGTH_BYTES]
+    cipher = data[NONCE_LENGTH_BYTES:]
+    
+    class Counter:
+        __value = nonce + [0]*(BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES)
+        def next_value(self):
+            temp = self.__value
+            self.__value = inc(self.__value)
+            return temp
+    
+    decrypted_data = aes_ctr_decrypt(cipher, key, Counter())
+    plaintext = intlist_to_bytes(decrypted_data)
+    
+    return plaintext
+
+RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36)
+SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
+        0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
+        0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
+        0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75,
+        0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84,
+        0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
+        0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8,
+        0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2,
+        0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
+        0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB,
+        0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
+        0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
+        0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
+        0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
+        0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
+        0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16)
+MIX_COLUMN_MATRIX = ((2,3,1,1),
+                     (1,2,3,1),
+                     (1,1,2,3),
+                     (3,1,1,2))
+
+def sub_bytes(data):
+    return [SBOX[x] for x in data]
+
+def rotate(data):
+    return data[1:] + [data[0]]
+
+def key_schedule_core(data, rcon_iteration):
+    data = rotate(data)
+    data = sub_bytes(data)
+    data[0] = data[0] ^ RCON[rcon_iteration]
+    
+    return data
+
+def xor(data1, data2):
+    return [x^y for x, y in zip(data1, data2)]
+
+def mix_column(data):
+    data_mixed = []
+    for row in range(4):
+        mixed = 0
+        for column in range(4):
+            addend = data[column]
+            if MIX_COLUMN_MATRIX[row][column] in (2,3):
+                addend <<= 1
+                if addend > 0xff:
+                    addend &= 0xff
+                    addend ^= 0x1b
+                if MIX_COLUMN_MATRIX[row][column] == 3:
+                    addend ^= data[column]
+            mixed ^= addend & 0xff
+        data_mixed.append(mixed)
+    return data_mixed
+
+def mix_columns(data):
+    data_mixed = []
+    for i in range(4):
+        column = data[i*4 : (i+1)*4]
+        data_mixed += mix_column(column)
+    return data_mixed
+
+def shift_rows(data):
+    data_shifted = []
+    for column in range(4):
+        for row in range(4):
+            data_shifted.append( data[((column + row) & 0b11) * 4 + row] )
+    return data_shifted
+
+def inc(data):
+    data = data[:] # copy
+    for i in range(len(data)-1,-1,-1):
+        if data[i] == 255:
+            data[i] = 0
+        else:
+            data[i] = data[i] + 1
+            break
+    return data

+ 10 - 2
youtube_dl/extractor/__init__.py

@@ -1,3 +1,5 @@
+from .appletrailers import AppleTrailersIE
+from .addanime import AddAnimeIE
 from .archiveorg import ArchiveOrgIE
 from .archiveorg import ArchiveOrgIE
 from .ard import ARDIE
 from .ard import ARDIE
 from .arte import ArteTvIE
 from .arte import ArteTvIE
@@ -6,7 +8,10 @@ from .bandcamp import BandcampIE
 from .bliptv import BlipTVIE, BlipTVUserIE
 from .bliptv import BlipTVIE, BlipTVUserIE
 from .breakcom import BreakIE
 from .breakcom import BreakIE
 from .brightcove import BrightcoveIE
 from .brightcove import BrightcoveIE
+from .c56 import C56IE
 from .canalplus import CanalplusIE
 from .canalplus import CanalplusIE
+from .canalc2 import Canalc2IE
+from .cnn import CNNIE
 from .collegehumor import CollegeHumorIE
 from .collegehumor import CollegeHumorIE
 from .comedycentral import ComedyCentralIE
 from .comedycentral import ComedyCentralIE
 from .condenast import CondeNastIE
 from .condenast import CondeNastIE
@@ -45,12 +50,14 @@ from .keek import KeekIE
 from .liveleak import LiveLeakIE
 from .liveleak import LiveLeakIE
 from .livestream import LivestreamIE
 from .livestream import LivestreamIE
 from .metacafe import MetacafeIE
 from .metacafe import MetacafeIE
+from .mit import TechTVMITIE, MITIE
 from .mixcloud import MixcloudIE
 from .mixcloud import MixcloudIE
 from .mtv import MTVIE
 from .mtv import MTVIE
 from .muzu import MuzuTVIE
 from .muzu import MuzuTVIE
 from .myspass import MySpassIE
 from .myspass import MySpassIE
 from .myvideo import MyVideoIE
 from .myvideo import MyVideoIE
 from .nba import NBAIE
 from .nba import NBAIE
+from .nbc import NBCNewsIE
 from .ooyala import OoyalaIE
 from .ooyala import OoyalaIE
 from .pbs import PBSIE
 from .pbs import PBSIE
 from .photobucket import PhotobucketIE
 from .photobucket import PhotobucketIE
@@ -63,6 +70,7 @@ from .roxwel import RoxwelIE
 from .rtlnow import RTLnowIE
 from .rtlnow import RTLnowIE
 from .sina import SinaIE
 from .sina import SinaIE
 from .slashdot import SlashdotIE
 from .slashdot import SlashdotIE
+from .sohu import SohuIE
 from .soundcloud import SoundcloudIE, SoundcloudSetIE
 from .soundcloud import SoundcloudIE, SoundcloudSetIE
 from .spiegel import SpiegelIE
 from .spiegel import SpiegelIE
 from .stanfordoc import StanfordOpenClassroomIE
 from .stanfordoc import StanfordOpenClassroomIE
@@ -73,18 +81,18 @@ from .ted import TEDIE
 from .tf1 import TF1IE
 from .tf1 import TF1IE
 from .thisav import ThisAVIE
 from .thisav import ThisAVIE
 from .traileraddict import TrailerAddictIE
 from .traileraddict import TrailerAddictIE
+from .trilulilu import TriluliluIE
 from .tudou import TudouIE
 from .tudou import TudouIE
 from .tumblr import TumblrIE
 from .tumblr import TumblrIE
 from .tutv import TutvIE
 from .tutv import TutvIE
-from .ustream import UstreamIE
 from .unistra import UnistraIE
 from .unistra import UnistraIE
+from .ustream import UstreamIE
 from .vbox7 import Vbox7IE
 from .vbox7 import Vbox7IE
 from .veoh import VeohIE
 from .veoh import VeohIE
 from .vevo import VevoIE
 from .vevo import VevoIE
 from .videofyme import VideofyMeIE
 from .videofyme import VideofyMeIE
 from .vimeo import VimeoIE, VimeoChannelIE
 from .vimeo import VimeoIE, VimeoChannelIE
 from .vine import VineIE
 from .vine import VineIE
-from .c56 import C56IE
 from .wat import WatIE
 from .wat import WatIE
 from .weibo import WeiboIE
 from .weibo import WeiboIE
 from .wimp import WimpIE
 from .wimp import WimpIE

+ 75 - 0
youtube_dl/extractor/addanime.py

@@ -0,0 +1,75 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_HTTPError,
+    compat_str,
+    compat_urllib_parse,
+    compat_urllib_parse_urlparse,
+
+    ExtractorError,
+)
+
+
+class AddAnimeIE(InfoExtractor):
+
+    _VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video.php\?(?:.*?)v=(?P<video_id>[\w_]+)(?:.*)'
+    IE_NAME = u'AddAnime'
+    _TEST = {
+        u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
+        u'file': u'24MR3YO5SAS9.flv',
+        u'md5': u'1036a0e0cd307b95bd8a8c3a5c8cfaf1',
+        u'info_dict': {
+            u"description": u"One Piece 606",
+            u"title": u"One Piece 606"
+        }
+    }
+
+    def _real_extract(self, url):
+        try:
+            mobj = re.match(self._VALID_URL, url)
+            video_id = mobj.group('video_id')
+            webpage = self._download_webpage(url, video_id)
+        except ExtractorError as ee:
+            if not isinstance(ee.cause, compat_HTTPError):
+                raise
+
+            redir_webpage = ee.cause.read().decode('utf-8')
+            action = self._search_regex(
+                r'<form id="challenge-form" action="([^"]+)"',
+                redir_webpage, u'Redirect form')
+            vc = self._search_regex(
+                r'<input type="hidden" name="jschl_vc" value="([^"]+)"/>',
+                redir_webpage, u'redirect vc value')
+            av = re.search(
+                r'a\.value = ([0-9]+)[+]([0-9]+)[*]([0-9]+);',
+                redir_webpage)
+            if av is None:
+                raise ExtractorError(u'Cannot find redirect math task')
+            av_res = int(av.group(1)) + int(av.group(2)) * int(av.group(3))
+
+            parsed_url = compat_urllib_parse_urlparse(url)
+            av_val = av_res + len(parsed_url.netloc)
+            confirm_url = (
+                parsed_url.scheme + u'://' + parsed_url.netloc +
+                action + '?' +
+                compat_urllib_parse.urlencode({
+                    'jschl_vc': vc, 'jschl_answer': compat_str(av_val)}))
+            self._download_webpage(
+                confirm_url, video_id,
+                note=u'Confirming after redirect')
+            webpage = self._download_webpage(url, video_id)
+
+        video_url = self._search_regex(r"var normal_video_file = '(.*?)';",
+                                       webpage, u'video file URL')
+        video_title = self._og_search_title(webpage)
+        video_description = self._og_search_description(webpage)
+
+        return {
+            '_type': 'video',
+            'id':  video_id,
+            'url': video_url,
+            'ext': 'flv',
+            'title': video_title,
+            'description': video_description
+        }

+ 166 - 0
youtube_dl/extractor/appletrailers.py

@@ -0,0 +1,166 @@
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import (
+    determine_ext,
+)
+
+
+class AppleTrailersIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?trailers.apple.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
+    _TEST = {
+        u"url": u"http://trailers.apple.com/trailers/wb/manofsteel/",
+        u"playlist": [
+            {
+                u"file": u"manofsteel-trailer4.mov",
+                u"md5": u"11874af099d480cc09e103b189805d5f",
+                u"info_dict": {
+                    u"duration": 111,
+                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_11624.jpg",
+                    u"title": u"Trailer 4",
+                    u"upload_date": u"20130523",
+                    u"uploader_id": u"wb",
+                },
+            },
+            {
+                u"file": u"manofsteel-trailer3.mov",
+                u"md5": u"07a0a262aae5afe68120eed61137ab34",
+                u"info_dict": {
+                    u"duration": 182,
+                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_10793.jpg",
+                    u"title": u"Trailer 3",
+                    u"upload_date": u"20130417",
+                    u"uploader_id": u"wb",
+                },
+            },
+            {
+                u"file": u"manofsteel-trailer.mov",
+                u"md5": u"e401fde0813008e3307e54b6f384cff1",
+                u"info_dict": {
+                    u"duration": 148,
+                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_8703.jpg",
+                    u"title": u"Trailer",
+                    u"upload_date": u"20121212",
+                    u"uploader_id": u"wb",
+                },
+            },
+            {
+                u"file": u"manofsteel-teaser.mov",
+                u"md5": u"76b392f2ae9e7c98b22913c10a639c97",
+                u"info_dict": {
+                    u"duration": 93,
+                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_6899.jpg",
+                    u"title": u"Teaser",
+                    u"upload_date": u"20120721",
+                    u"uploader_id": u"wb",
+                },
+            }
+        ]
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        movie = mobj.group('movie')
+        uploader_id = mobj.group('company')
+
+        playlist_url = url.partition(u'?')[0] + u'/includes/playlists/web.inc'
+        playlist_snippet = self._download_webpage(playlist_url, movie)
+        playlist_cleaned = re.sub(r'(?s)<script>.*?</script>', u'', playlist_snippet)
+        playlist_html = u'<html>' + playlist_cleaned + u'</html>'
+
+        size_cache = {}
+
+        doc = xml.etree.ElementTree.fromstring(playlist_html)
+        playlist = []
+        for li in doc.findall('./div/ul/li'):
+            title = li.find('.//h3').text
+            video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
+            thumbnail = li.find('.//img').attrib['src']
+
+            date_el = li.find('.//p')
+            upload_date = None
+            m = re.search(r':\s?(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<year>[0-9]{2})', date_el.text)
+            if m:
+                upload_date = u'20' + m.group('year') + m.group('month') + m.group('day')
+            runtime_el = date_el.find('./br')
+            m = re.search(r':\s?(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime_el.tail)
+            duration = None
+            if m:
+                duration = 60 * int(m.group('minutes')) + int(m.group('seconds'))
+
+            formats = []
+            for formats_el in li.findall('.//a'):
+                if formats_el.attrib['class'] != 'OverlayPanel':
+                    continue
+                target = formats_el.attrib['target']
+
+                format_code = formats_el.text
+                if 'Automatic' in format_code:
+                    continue
+
+                size_q = formats_el.attrib['href']
+                size_id = size_q.rpartition('#videos-')[2]
+                if size_id not in size_cache:
+                    size_url = url + size_q
+                    sizepage_html = self._download_webpage(
+                        size_url, movie,
+                        note=u'Downloading size info %s' % size_id,
+                        errnote=u'Error while downloading size info %s' % size_id,
+                    )
+                    _doc = xml.etree.ElementTree.fromstring(sizepage_html)
+                    size_cache[size_id] = _doc
+
+                sizepage_doc = size_cache[size_id]
+                links = sizepage_doc.findall('.//{http://www.w3.org/1999/xhtml}ul/{http://www.w3.org/1999/xhtml}li/{http://www.w3.org/1999/xhtml}a')
+                for vid_a in links:
+                    href = vid_a.get('href')
+                    if not href.endswith(target):
+                        continue
+                    detail_q = href.partition('#')[0]
+                    detail_url = url + '/' + detail_q
+
+                    m = re.match(r'includes/(?P<detail_id>[^/]+)/', detail_q)
+                    detail_id = m.group('detail_id')
+
+                    detail_html = self._download_webpage(
+                        detail_url, movie,
+                        note=u'Downloading detail %s %s' % (detail_id, size_id),
+                        errnote=u'Error while downloading detail %s %s' % (detail_id, size_id)
+                    )
+                    detail_doc = xml.etree.ElementTree.fromstring(detail_html)
+                    movie_link_el = detail_doc.find('.//{http://www.w3.org/1999/xhtml}a')
+                    assert movie_link_el.get('class') == 'movieLink'
+                    movie_link = movie_link_el.get('href').partition('?')[0].replace('_', '_h')
+                    ext = determine_ext(movie_link)
+                    assert ext == 'mov'
+
+                    formats.append({
+                        'format': format_code,
+                        'ext': ext,
+                        'url': movie_link,
+                    })
+
+            info = {
+                '_type': 'video',
+                'id': video_id,
+                'title': title,
+                'formats': formats,
+                'title': title,
+                'duration': duration,
+                'thumbnail': thumbnail,
+                'upload_date': upload_date,
+                'uploader_id': uploader_id,
+                'user_agent': 'QuickTime compatible (youtube-dl)',
+            }
+            # TODO: Remove when #980 has been merged
+            info['url'] = formats[-1]['url']
+            info['ext'] = formats[-1]['ext']
+
+            playlist.append(info)
+
+        return {
+            '_type': 'playlist',
+            'id': movie,
+            'entries': playlist,
+        }

+ 35 - 0
youtube_dl/extractor/canalc2.py

@@ -0,0 +1,35 @@
+# coding: utf-8
+import re
+
+from .common import InfoExtractor
+
+
+class Canalc2IE(InfoExtractor):
+    _IE_NAME = 'canalc2.tv'
+    _VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?idVideo=(\d+)&voir=oui'
+
+    _TEST = {
+        u'url': u'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
+        u'file': u'12163.mp4',
+        u'md5': u'060158428b650f896c542dfbb3d6487f',
+        u'info_dict': {
+            u'title': u'Terrasses du Numérique'
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = re.match(self._VALID_URL, url).group(1)
+        webpage = self._download_webpage(url, video_id)
+        file_name = self._search_regex(
+            r"so\.addVariable\('file','(.*?)'\);",
+            webpage, 'file name')
+        video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file_name
+
+        title = self._html_search_regex(
+            r'class="evenement8">(.*?)</a>', webpage, u'title')
+        
+        return {'id': video_id,
+                'ext': 'mp4',
+                'url': video_url,
+                'title': title,
+                }

+ 1 - 1
youtube_dl/extractor/canalplus.py

@@ -5,7 +5,7 @@ from .common import InfoExtractor
 from ..utils import unified_strdate
 from ..utils import unified_strdate
 
 
 class CanalplusIE(InfoExtractor):
 class CanalplusIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.canalplus\.fr/.*?\?vid=(?P<id>\d+)'
+    _VALID_URL = r'https?://(www\.canalplus\.fr/.*?\?vid=|player\.canalplus\.fr/#/)(?P<id>\d+)'
     _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
     _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
     IE_NAME = u'canalplus.fr'
     IE_NAME = u'canalplus.fr'
 
 

+ 58 - 0
youtube_dl/extractor/cnn.py

@@ -0,0 +1,58 @@
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import determine_ext
+
+
+class CNNIE(InfoExtractor):
+    _VALID_URL = r'''(?x)https?://(edition\.)?cnn\.com/video/(data/.+?|\?)/
+        (?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn|(?=&)))'''
+
+    _TESTS = [{
+        u'url': u'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
+        u'file': u'sports_2013_06_09_nadal-1-on-1.cnn.mp4',
+        u'md5': u'3e6121ea48df7e2259fe73a0628605c4',
+        u'info_dict': {
+            u'title': u'Nadal wins 8th French Open title',
+            u'description': u'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.',
+        },
+    },
+    {
+        u"url": u"http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29",
+        u"file": u"us_2013_08_21_sot-student-gives-epic-speech.georgia-institute-of-technology.mp4",
+        u"md5": u"b5cc60c60a3477d185af8f19a2a26f4e",
+        u"info_dict": {
+            u"title": "Student's epic speech stuns new freshmen",
+            u"description": "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\""
+        }
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        path = mobj.group('path')
+        page_title = mobj.group('title')
+        info_url = u'http://cnn.com/video/data/3.0/%s/index.xml' % path
+        info_xml = self._download_webpage(info_url, page_title)
+        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
+
+        formats = []
+        for f in info.findall('files/file'):
+            mf = re.match(r'(\d+)x(\d+)(?:_(.*)k)?',f.attrib['bitrate'])
+            if mf is not None:
+                formats.append((int(mf.group(1)), int(mf.group(2)), int(mf.group(3) or 0), f.text))
+        formats = sorted(formats)
+        (_,_,_, video_path) = formats[-1]
+        video_url = 'http://ht.cdn.turner.com/cnn/big%s' % video_path
+
+        thumbnails = sorted([((int(t.attrib['height']),int(t.attrib['width'])), t.text) for t in info.findall('images/image')])
+        thumbs_dict = [{'resolution': res, 'url': t_url} for (res, t_url) in thumbnails]
+
+        return {'id': info.attrib['id'],
+                'title': info.find('headline').text,
+                'url': video_url,
+                'ext': determine_ext(video_url),
+                'thumbnail': thumbnails[-1][1],
+                'thumbnails': thumbs_dict,
+                'description': info.find('description').text,
+                }

+ 13 - 3
youtube_dl/extractor/common.py

@@ -114,6 +114,11 @@ class InfoExtractor(object):
         """Real extraction process. Redefine in subclasses."""
         """Real extraction process. Redefine in subclasses."""
         pass
         pass
 
 
+    @classmethod
+    def ie_key(cls):
+        """A string for getting the InfoExtractor with get_info_extractor"""
+        return cls.__name__[:-2]
+
     @property
     @property
     def IE_NAME(self):
     def IE_NAME(self):
         return type(self).__name__[:-2]
         return type(self).__name__[:-2]
@@ -129,7 +134,7 @@ class InfoExtractor(object):
         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
             if errnote is None:
             if errnote is None:
                 errnote = u'Unable to download webpage'
                 errnote = u'Unable to download webpage'
-            raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2])
+            raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2], cause=err)
 
 
     def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None):
     def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None):
         """ Returns a tuple (page content as string, URL handle) """
         """ Returns a tuple (page content as string, URL handle) """
@@ -140,12 +145,17 @@ class InfoExtractor(object):
 
 
         urlh = self._request_webpage(url_or_request, video_id, note, errnote)
         urlh = self._request_webpage(url_or_request, video_id, note, errnote)
         content_type = urlh.headers.get('Content-Type', '')
         content_type = urlh.headers.get('Content-Type', '')
+        webpage_bytes = urlh.read()
         m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
         m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
         if m:
         if m:
             encoding = m.group(1)
             encoding = m.group(1)
         else:
         else:
-            encoding = 'utf-8'
-        webpage_bytes = urlh.read()
+            m = re.search(br'<meta[^>]+charset="?([^"]+)[ /">]',
+                          webpage_bytes[:1024])
+            if m:
+                encoding = m.group(1).decode('ascii')
+            else:
+                encoding = 'utf-8'
         if self._downloader.params.get('dump_intermediate_pages', False):
         if self._downloader.params.get('dump_intermediate_pages', False):
             try:
             try:
                 url = url_or_request.get_full_url()
                 url = url_or_request.get_full_url()

+ 2 - 5
youtube_dl/extractor/generic.py

@@ -7,8 +7,8 @@ from .common import InfoExtractor
 from ..utils import (
 from ..utils import (
     compat_urllib_error,
     compat_urllib_error,
     compat_urllib_parse,
     compat_urllib_parse,
-    compat_urllib_parse_urlparse,
     compat_urllib_request,
     compat_urllib_request,
+    compat_urlparse,
 
 
     ExtractorError,
     ExtractorError,
 )
 )
@@ -163,10 +163,7 @@ class GenericIE(InfoExtractor):
             raise ExtractorError(u'Invalid URL: %s' % url)
             raise ExtractorError(u'Invalid URL: %s' % url)
 
 
         video_url = compat_urllib_parse.unquote(mobj.group(1))
         video_url = compat_urllib_parse.unquote(mobj.group(1))
-        if video_url.startswith('//'):
-            video_url = compat_urllib_parse_urlparse(url).scheme + ':' + video_url
-        if '://' not in video_url:
-            video_url = url + ('' if url.endswith('/') else '/') + video_url
+        video_url = compat_urlparse.urljoin(url, video_url)
         video_id = os.path.basename(video_url)
         video_id = os.path.basename(video_url)
 
 
         # here's a fun little line of code for you:
         # here's a fun little line of code for you:

+ 2 - 2
youtube_dl/extractor/googleplus.py

@@ -57,8 +57,8 @@ class GooglePlusIE(InfoExtractor):
             webpage, 'title', default=u'NA')
             webpage, 'title', default=u'NA')
 
 
         # Step 2, Simulate clicking the image box to launch video
         # Step 2, Simulate clicking the image box to launch video
-        DOMAIN = 'https://plus.google.com'
-        video_page = self._search_regex(r'<a href="((?:%s)?/photos/.*?)"' % re.escape(DOMAIN),
+        DOMAIN = 'https://plus.google.com/'
+        video_page = self._search_regex(r'<a href="((?:%s)?photos/.*?)"' % re.escape(DOMAIN),
             webpage, u'video page URL')
             webpage, u'video page URL')
         if not video_page.startswith(DOMAIN):
         if not video_page.startswith(DOMAIN):
             video_page = DOMAIN + video_page
             video_page = DOMAIN + video_page

+ 12 - 10
youtube_dl/extractor/hark.py

@@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
 
 
 import re
 import re
+import json
 
 
 from .common import InfoExtractor
 from .common import InfoExtractor
 from ..utils import determine_ext
 from ..utils import determine_ext
@@ -12,24 +13,25 @@ class HarkIE(InfoExtractor):
         u'file': u'mmbzyhkgny.mp3',
         u'file': u'mmbzyhkgny.mp3',
         u'md5': u'6783a58491b47b92c7c1af5a77d4cbee',
         u'md5': u'6783a58491b47b92c7c1af5a77d4cbee',
         u'info_dict': {
         u'info_dict': {
-            u"title": u"Obama: 'Beyond The Afghan Theater, We Only Target Al Qaeda' On May 23, 2013 ",
+            u'title': u"Obama: 'Beyond The Afghan Theater, We Only Target Al Qaeda' on May 23, 2013",
+            u'description': u'President Barack Obama addressed the nation live on May 23, 2013 in a speech aimed at addressing counter-terrorism policies including the use of drone strikes, detainees at Guantanamo Bay prison facility, and American citizens who are terrorists.',
+            u'duration': 11,
         }
         }
     }
     }
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group(1)
         video_id = mobj.group(1)
-        embed_url = "http://www.hark.com/clips/%s/homepage_embed" %(video_id)
-        webpage = self._download_webpage(embed_url, video_id)
-
-        final_url = self._search_regex(r'src="(.+?).mp3"',
-                                webpage, 'video url')+'.mp3'
-        title = self._html_search_regex(r'<title>(.+?)</title>',
-                                webpage, 'video title').replace(' Sound Clip and Quote - Hark','').replace(
-                                'Sound Clip , Quote, MP3, and Ringtone - Hark','')
+        json_url = "http://www.hark.com/clips/%s.json" %(video_id)
+        info_json = self._download_webpage(json_url, video_id)
+        info = json.loads(info_json)
+        final_url = info['url']
 
 
         return {'id': video_id,
         return {'id': video_id,
                 'url' : final_url,
                 'url' : final_url,
-                'title': title,
+                'title': info['name'],
                 'ext': determine_ext(final_url),
                 'ext': determine_ext(final_url),
+                'description': info['description'],
+                'thumbnail': info['image_original'],
+                'duration': info['duration'],
                 }
                 }

+ 4 - 2
youtube_dl/extractor/kankan.py

@@ -21,8 +21,10 @@ class KankanIE(InfoExtractor):
         video_id = mobj.group('id')
         video_id = mobj.group('id')
         webpage = self._download_webpage(url, video_id)
         webpage = self._download_webpage(url, video_id)
 
 
-        title = self._search_regex(r'G_TITLE=[\'"](.+?)[\'"]', webpage, u'video title')
-        gcid = self._search_regex(r'lurl:[\'"]http://.+?/.+?/(.+?)/', webpage, u'gcid')
+        title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, u'video title')
+        surls = re.search(r'surls:\[\'.+?\'\]|lurl:\'.+?\.flv\'', webpage).group(0)
+        gcids = re.findall(r"http://.+?/.+?/(.+?)/", surls)
+        gcid = gcids[-1]
 
 
         video_info_page = self._download_webpage('http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid,
         video_info_page = self._download_webpage('http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid,
                                                  video_id, u'Downloading video url info')
                                                  video_id, u'Downloading video url info')

+ 76 - 0
youtube_dl/extractor/mit.py

@@ -0,0 +1,76 @@
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    clean_html,
+    get_element_by_id,
+)
+
+
+class TechTVMITIE(InfoExtractor):
+    IE_NAME = u'techtv.mit.edu'
+    _VALID_URL = r'https?://techtv\.mit\.edu/(videos|embeds)/(?P<id>\d+)'
+
+    _TEST = {
+        u'url': u'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set',
+        u'file': u'25418.mp4',
+        u'md5': u'1f8cb3e170d41fd74add04d3c9330e5f',
+        u'info_dict': {
+            u'title': u'MIT DNA Learning Center Set',
+            u'description': u'md5:82313335e8a8a3f243351ba55bc1b474',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        webpage = self._download_webpage(
+            'http://techtv.mit.edu/videos/%s' % video_id, video_id)
+        embed_page = self._download_webpage(
+            'http://techtv.mit.edu/embeds/%s/' % video_id, video_id,
+            note=u'Downloading embed page')
+
+        base_url = self._search_regex(r'ipadUrl: \'(.+?cloudfront.net/)',
+            embed_page, u'base url')
+        formats_json = self._search_regex(r'bitrates: (\[.+?\])', embed_page,
+            u'video formats')
+        formats = json.loads(formats_json)
+        formats = sorted(formats, key=lambda f: f['bitrate'])
+
+        title = get_element_by_id('edit-title', webpage)
+        description = clean_html(get_element_by_id('edit-description', webpage))
+        thumbnail = self._search_regex(r'playlist:.*?url: \'(.+?)\'',
+            embed_page, u'thumbnail', flags=re.DOTALL)
+
+        return {'id': video_id,
+                'title': title,
+                'url': base_url + formats[-1]['url'].replace('mp4:', ''),
+                'ext': 'mp4',
+                'description': description,
+                'thumbnail': thumbnail,
+                }
+
+
+class MITIE(TechTVMITIE):
+    IE_NAME = u'video.mit.edu'
+    _VALID_URL = r'https?://video\.mit\.edu/watch/(?P<title>[^/]+)'
+
+    _TEST = {
+        u'url': u'http://video.mit.edu/watch/the-government-is-profiling-you-13222/',
+        u'file': u'21783.mp4',
+        u'md5': u'7db01d5ccc1895fc5010e9c9e13648da',
+        u'info_dict': {
+            u'title': u'The Government is Profiling You',
+            u'description': u'md5:ad5795fe1e1623b73620dbfd47df9afd',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        page_title = mobj.group('title')
+        webpage = self._download_webpage(url, page_title)
+        self.to_screen('%s: Extracting %s url' % (page_title, TechTVMITIE.IE_NAME))
+        embed_url = self._search_regex(r'<iframe .*?src="(.+?)"', webpage,
+            u'embed url')
+        return self.url_result(embed_url, ie='TechTVMIT')

+ 33 - 0
youtube_dl/extractor/nbc.py

@@ -0,0 +1,33 @@
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import find_xpath_attr, compat_str
+
+
+class NBCNewsIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.nbcnews\.com/video/.+?/(?P<id>\d+)'
+
+    _TEST = {
+        u'url': u'http://www.nbcnews.com/video/nbc-news/52753292',
+        u'file': u'52753292.flv',
+        u'md5': u'47abaac93c6eaf9ad37ee6c4463a5179',
+        u'info_dict': {
+            u'title': u'Crew emerges after four-month Mars food study',
+            u'description': u'md5:24e632ffac72b35f8b67a12d1b6ddfc1',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        info_xml = self._download_webpage('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
+        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')).find('video')
+
+        return {'id': video_id,
+                'title': info.find('headline').text,
+                'ext': 'flv',
+                'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text,
+                'description': compat_str(info.find('caption').text),
+                'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
+                }

+ 90 - 0
youtube_dl/extractor/sohu.py

@@ -0,0 +1,90 @@
+# encoding: utf-8
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class SohuIE(InfoExtractor):
+    _VALID_URL = r'https?://tv\.sohu\.com/\d+?/n(?P<id>\d+)\.shtml.*?'
+
+    _TEST = {
+        u'url': u'http://tv.sohu.com/20130724/n382479172.shtml#super',
+        u'file': u'382479172.mp4',
+        u'md5': u'bde8d9a6ffd82c63a1eefaef4eeefec7',
+        u'info_dict': {
+            u'title': u'MV:Far East Movement《The Illest》',
+        },
+    }
+
+    def _real_extract(self, url):
+
+        def _fetch_data(vid_id):
+            base_data_url = u'http://hot.vrs.sohu.com/vrs_flash.action?vid='
+            data_url = base_data_url + str(vid_id)
+            data_json = self._download_webpage(
+                data_url, video_id,
+                note=u'Downloading JSON data for ' + str(vid_id))
+            return json.loads(data_json)
+
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+        raw_title = self._html_search_regex(r'(?s)<title>(.+?)</title>',
+                                            webpage, u'video title')
+        title = raw_title.partition('-')[0].strip()
+
+        vid = self._html_search_regex(r'var vid="(\d+)"', webpage,
+                                      u'video path')
+        data = _fetch_data(vid)
+
+        QUALITIES = ('ori', 'super', 'high', 'nor')
+        vid_ids = [data['data'][q + 'Vid']
+                   for q in QUALITIES
+                   if data['data'][q + 'Vid'] != 0]
+        if not vid_ids:
+            raise ExtractorError(u'No formats available for this video')
+
+        # For now, we just pick the highest available quality
+        vid_id = vid_ids[-1]
+
+        format_data = data if vid == vid_id else _fetch_data(vid_id)
+        part_count = format_data['data']['totalBlocks']
+        allot = format_data['allot']
+        prot = format_data['prot']
+        clipsURL = format_data['data']['clipsURL']
+        su = format_data['data']['su']
+
+        playlist = []
+        for i in range(part_count):
+            part_url = ('http://%s/?prot=%s&file=%s&new=%s' %
+                        (allot, prot, clipsURL[i], su[i]))
+            part_str = self._download_webpage(
+                part_url, video_id,
+                note=u'Downloading part %d of %d' % (i+1, part_count))
+
+            part_info = part_str.split('|')
+            video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3])
+
+            video_info = {
+                'id': '%s_part%02d' % (video_id, i + 1),
+                'title': title,
+                'url': video_url,
+                'ext': 'mp4',
+            }
+            playlist.append(video_info)
+
+        if len(playlist) == 1:
+            info = playlist[0]
+            info['id'] = video_id
+        else:
+            info = {
+                '_type': 'playlist',
+                'entries': playlist,
+                'id': video_id,
+            }
+
+        return info

+ 73 - 0
youtube_dl/extractor/trilulilu.py

@@ -0,0 +1,73 @@
+import json
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+
+
+class TriluliluIE(InfoExtractor):
+    _VALID_URL = r'(?x)(?:https?://)?(?:www\.)?trilulilu\.ro/video-(?P<category>[^/]+)/(?P<video_id>[^/]+)'
+    _TEST = {
+        u"url": u"http://www.trilulilu.ro/video-animatie/big-buck-bunny-1",
+        u'file': u"big-buck-bunny-1.mp4",
+        u'info_dict': {
+            u"title": u"Big Buck Bunny",
+            u"description": u":) pentru copilul din noi",
+        },
+        # Server ignores Range headers (--test)
+        u"params": {
+            u"skip_download": True
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('video_id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._og_search_title(webpage)
+        thumbnail = self._og_search_thumbnail(webpage)
+        description = self._og_search_description(webpage)
+
+        log_str = self._search_regex(
+            r'block_flash_vars[ ]=[ ]({[^}]+})', webpage, u'log info')
+        log = json.loads(log_str)
+
+        format_url = (u'http://fs%(server)s.trilulilu.ro/%(hash)s/'
+                      u'video-formats2' % log)
+        format_str = self._download_webpage(
+            format_url, video_id,
+            note=u'Downloading formats',
+            errnote=u'Error while downloading formats')
+
+        format_doc = xml.etree.ElementTree.fromstring(format_str)
+ 
+        video_url_template = (
+            u'http://fs%(server)s.trilulilu.ro/stream.php?type=video'
+            u'&source=site&hash=%(hash)s&username=%(userid)s&'
+            u'key=ministhebest&format=%%s&sig=&exp=' %
+            log)
+        formats = [
+            {
+                'format': fnode.text,
+                'url': video_url_template % fnode.text,
+            }
+
+            for fnode in format_doc.findall('./formats/format')
+        ]
+
+        info = {
+            '_type': 'video',
+            'id': video_id,
+            'formats': formats,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+        }
+
+        # TODO: Remove when #980 has been merged
+        info['url'] = formats[-1]['url']
+        info['ext'] = formats[-1]['format'].partition('-')[0]
+
+        return info

+ 0 - 1
youtube_dl/extractor/wat.py

@@ -6,7 +6,6 @@ import re
 from .common import InfoExtractor
 from .common import InfoExtractor
 
 
 from ..utils import (
 from ..utils import (
-    compat_urllib_parse,
     unified_strdate,
     unified_strdate,
 )
 )
 
 

+ 14 - 4
youtube_dl/extractor/youporn.py

@@ -12,14 +12,16 @@ from ..utils import (
     unescapeHTML,
     unescapeHTML,
     unified_strdate,
     unified_strdate,
 )
 )
-
+from ..aes import (
+    aes_decrypt_text
+)
 
 
 class YouPornIE(InfoExtractor):
 class YouPornIE(InfoExtractor):
     _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+)'
     _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+)'
     _TEST = {
     _TEST = {
         u'url': u'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
         u'url': u'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
         u'file': u'505835.mp4',
         u'file': u'505835.mp4',
-        u'md5': u'c37ddbaaa39058c76a7e86c6813423c1',
+        u'md5': u'71ec5fcfddacf80f495efa8b6a8d9a89',
         u'info_dict': {
         u'info_dict': {
             u"upload_date": u"20101221", 
             u"upload_date": u"20101221", 
             u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?", 
             u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?", 
@@ -75,7 +77,15 @@ class YouPornIE(InfoExtractor):
         # Get all of the links from the page
         # Get all of the links from the page
         LINK_RE = r'(?s)<a href="(?P<url>[^"]+)">'
         LINK_RE = r'(?s)<a href="(?P<url>[^"]+)">'
         links = re.findall(LINK_RE, download_list_html)
         links = re.findall(LINK_RE, download_list_html)
-        if(len(links) == 0):
+        
+        # Get link of hd video if available
+        mobj = re.search(r'var encryptedQuality720URL = \'(?P<encrypted_video_url>[a-zA-Z0-9+/]+={0,2})\';', webpage)
+        if mobj != None:
+            encrypted_video_url = mobj.group(u'encrypted_video_url')
+            video_url = aes_decrypt_text(encrypted_video_url, video_title, 32).decode('utf-8')
+            links = [video_url] + links
+        
+        if not links:
             raise ExtractorError(u'ERROR: no known formats available for video')
             raise ExtractorError(u'ERROR: no known formats available for video')
 
 
         self.to_screen(u'Links found: %d' % len(links))
         self.to_screen(u'Links found: %d' % len(links))
@@ -112,7 +122,7 @@ class YouPornIE(InfoExtractor):
             self._print_formats(formats)
             self._print_formats(formats)
             return
             return
 
 
-        req_format = self._downloader.params.get('format', None)
+        req_format = self._downloader.params.get('format', 'best')
         self.to_screen(u'Format: %s' % req_format)
         self.to_screen(u'Format: %s' % req_format)
 
 
         if req_format is None or req_format == 'best':
         if req_format is None or req_format == 'best':

+ 1 - 1
youtube_dl/extractor/youtube.py

@@ -419,7 +419,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         elif len(s) == 89:
         elif len(s) == 89:
             return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
             return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
         elif len(s) == 88:
         elif len(s) == 88:
-            return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12]
+            return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
         elif len(s) == 87:
         elif len(s) == 87:
             return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
             return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
         elif len(s) == 86:
         elif len(s) == 86:

+ 64 - 23
youtube_dl/utils.py

@@ -1,19 +1,20 @@
 #!/usr/bin/env python
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
 
 
+import datetime
+import email.utils
 import errno
 import errno
 import gzip
 import gzip
 import io
 import io
 import json
 import json
 import locale
 import locale
 import os
 import os
+import platform
 import re
 import re
+import socket
 import sys
 import sys
 import traceback
 import traceback
 import zlib
 import zlib
-import email.utils
-import socket
-import datetime
 
 
 try:
 try:
     import urllib.request as compat_urllib_request
     import urllib.request as compat_urllib_request
@@ -60,6 +61,11 @@ try:
 except ImportError: # Python 2
 except ImportError: # Python 2
     import httplib as compat_http_client
     import httplib as compat_http_client
 
 
+try:
+    from urllib.error import HTTPError as compat_HTTPError
+except ImportError:  # Python 2
+    from urllib2 import HTTPError as compat_HTTPError
+
 try:
 try:
     from subprocess import DEVNULL
     from subprocess import DEVNULL
     compat_subprocess_get_DEVNULL = lambda: DEVNULL
     compat_subprocess_get_DEVNULL = lambda: DEVNULL
@@ -476,7 +482,7 @@ def formatSeconds(secs):
 def make_HTTPS_handler(opts):
 def make_HTTPS_handler(opts):
     if sys.version_info < (3,2):
     if sys.version_info < (3,2):
         # Python's 2.x handler is very simplistic
         # Python's 2.x handler is very simplistic
-        return YoutubeDLHandlerHTTPS()
+        return compat_urllib_request.HTTPSHandler()
     else:
     else:
         import ssl
         import ssl
         context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
         context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
@@ -485,11 +491,11 @@ def make_HTTPS_handler(opts):
         context.verify_mode = (ssl.CERT_NONE
         context.verify_mode = (ssl.CERT_NONE
                                if opts.no_check_certificate
                                if opts.no_check_certificate
                                else ssl.CERT_REQUIRED)
                                else ssl.CERT_REQUIRED)
-        return YoutubeDLHandlerHTTPS(context=context)
+        return compat_urllib_request.HTTPSHandler(context=context)
 
 
 class ExtractorError(Exception):
 class ExtractorError(Exception):
     """Error during info extraction."""
     """Error during info extraction."""
-    def __init__(self, msg, tb=None, expected=False):
+    def __init__(self, msg, tb=None, expected=False, cause=None):
         """ tb, if given, is the original traceback (so that it can be printed out).
         """ tb, if given, is the original traceback (so that it can be printed out).
         If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
         If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
         """
         """
@@ -502,6 +508,7 @@ class ExtractorError(Exception):
 
 
         self.traceback = tb
         self.traceback = tb
         self.exc_info = sys.exc_info()  # preserve original exception
         self.exc_info = sys.exc_info()  # preserve original exception
+        self.cause = cause
 
 
     def format_traceback(self):
     def format_traceback(self):
         if self.traceback is None:
         if self.traceback is None:
@@ -569,8 +576,7 @@ class ContentTooShortError(Exception):
         self.downloaded = downloaded
         self.downloaded = downloaded
         self.expected = expected
         self.expected = expected
 
 
-
-class YoutubeDLHandler_Template:  # Old-style class, like HTTPHandler
+class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
     """Handler for HTTP requests and responses.
     """Handler for HTTP requests and responses.
 
 
     This class, when installed with an OpenerDirector, automatically adds
     This class, when installed with an OpenerDirector, automatically adds
@@ -603,8 +609,8 @@ class YoutubeDLHandler_Template:  # Old-style class, like HTTPHandler
         ret.code = code
         ret.code = code
         return ret
         return ret
 
 
-    def _http_request(self, req):
-        for h, v in std_headers.items():
+    def http_request(self, req):
+        for h,v in std_headers.items():
             if h in req.headers:
             if h in req.headers:
                 del req.headers[h]
                 del req.headers[h]
             req.add_header(h, v)
             req.add_header(h, v)
@@ -619,12 +625,27 @@ class YoutubeDLHandler_Template:  # Old-style class, like HTTPHandler
             del req.headers['Youtubedl-user-agent']
             del req.headers['Youtubedl-user-agent']
         return req
         return req
 
 
-    def _http_response(self, req, resp):
+    def http_response(self, req, resp):
         old_resp = resp
         old_resp = resp
         # gzip
         # gzip
         if resp.headers.get('Content-encoding', '') == 'gzip':
         if resp.headers.get('Content-encoding', '') == 'gzip':
-            gz = gzip.GzipFile(fileobj=io.BytesIO(resp.read()), mode='r')
-            resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
+            content = resp.read()
+            gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
+            try:
+                uncompressed = io.BytesIO(gz.read())
+            except IOError as original_ioerror:
+                # There may be junk add the end of the file
+                # See http://stackoverflow.com/q/4928560/35070 for details
+                for i in range(1, 1024):
+                    try:
+                        gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
+                        uncompressed = io.BytesIO(gz.read())
+                    except IOError:
+                        continue
+                    break
+                else:
+                    raise original_ioerror
+            resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
             resp.msg = old_resp.msg
             resp.msg = old_resp.msg
         # deflate
         # deflate
         if resp.headers.get('Content-encoding', '') == 'deflate':
         if resp.headers.get('Content-encoding', '') == 'deflate':
@@ -633,16 +654,8 @@ class YoutubeDLHandler_Template:  # Old-style class, like HTTPHandler
             resp.msg = old_resp.msg
             resp.msg = old_resp.msg
         return resp
         return resp
 
 
-
-class YoutubeDLHandler(YoutubeDLHandler_Template, compat_urllib_request.HTTPHandler):
-    http_request = YoutubeDLHandler_Template._http_request
-    http_response = YoutubeDLHandler_Template._http_response
-
-
-class YoutubeDLHandlerHTTPS(YoutubeDLHandler_Template, compat_urllib_request.HTTPSHandler):
-    https_request = YoutubeDLHandler_Template._http_request
-    https_response = YoutubeDLHandler_Template._http_response
-
+    https_request = http_request
+    https_response = http_response
 
 
 def unified_strdate(date_str):
 def unified_strdate(date_str):
     """Return a string with the date in the format YYYYMMDD"""
     """Return a string with the date in the format YYYYMMDD"""
@@ -720,3 +733,31 @@ class DateRange(object):
         return self.start <= date <= self.end
         return self.start <= date <= self.end
     def __str__(self):
     def __str__(self):
         return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
         return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
+
+
+def platform_name():
+    """ Returns the platform name as a compat_str """
+    res = platform.platform()
+    if isinstance(res, bytes):
+        res = res.decode(preferredencoding())
+
+    assert isinstance(res, compat_str)
+    return res
+
+
+def bytes_to_intlist(bs):
+    if not bs:
+        return []
+    if isinstance(bs[0], int):  # Python 3
+        return list(bs)
+    else:
+        return [ord(c) for c in bs]
+
+
+def intlist_to_bytes(xs):
+    if not xs:
+        return b''
+    if isinstance(chr(0), bytes):  # Python 2
+        return ''.join([chr(x) for x in xs])
+    else:
+        return bytes(xs)

+ 1 - 1
youtube_dl/version.py

@@ -1,2 +1,2 @@
 
 
-__version__ = '2013.08.23'
+__version__ = '2013.08.28'