Browse Source

Merge remote-tracking branch 'origin/master' into IE_cleanup

Conflicts:
	youtube_dl/FileDownloader.py
Filippo Valsorda 13 years ago
parent
commit
c63cc10ffa
12 changed files with 346 additions and 102 deletions
  1. 1 0
      .gitignore
  2. 1 1
      LATEST_VERSION
  3. 28 1
      README.md
  4. 1 1
      test/test_utils.py
  5. BIN
      youtube-dl
  6. 59 1
      youtube-dl.1
  7. 1 1
      youtube-dl.bash-completion
  8. BIN
      youtube-dl.exe
  9. 18 12
      youtube_dl/FileDownloader.py
  10. 196 59
      youtube_dl/InfoExtractors.py
  11. 22 6
      youtube_dl/__init__.py
  12. 19 20
      youtube_dl/utils.py

+ 1 - 0
.gitignore

@@ -3,3 +3,4 @@
 *~
 *~
 wine-py2exe/
 wine-py2exe/
 py2exe.log
 py2exe.log
+*.kate-swp

+ 1 - 1
LATEST_VERSION

@@ -1 +1 @@
-2012.11.28
+2012.11.29

+ 28 - 1
README.md

@@ -1,4 +1,4 @@
-% youtube-dl(1)
+% YOUTUBE-DL(1)
 
 
 # NAME
 # NAME
 youtube-dl
 youtube-dl
@@ -20,6 +20,11 @@ which means you can modify it, redistribute it or use it however you like.
     -i, --ignore-errors      continue on download errors
     -i, --ignore-errors      continue on download errors
     -r, --rate-limit LIMIT   download rate limit (e.g. 50k or 44.6m)
     -r, --rate-limit LIMIT   download rate limit (e.g. 50k or 44.6m)
     -R, --retries RETRIES    number of retries (default is 10)
     -R, --retries RETRIES    number of retries (default is 10)
+    --buffer-size SIZE       size of download buffer (e.g. 1024 or 16k) (default
+                             is 1024)
+    --no-resize-buffer       do not automatically adjust the buffer size. By
+                             default, the buffer size is automatically resized
+                             from an initial value of SIZE.
     --dump-user-agent        display the current browser identification
     --dump-user-agent        display the current browser identification
     --user-agent UA          specify a custom user agent
     --user-agent UA          specify a custom user agent
     --list-extractors        List all supported extractors and the URLs they
     --list-extractors        List all supported extractors and the URLs they
@@ -108,6 +113,28 @@ which means you can modify it, redistribute it or use it however you like.
 
 
 You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.local/config/youtube-dl.conf`.
 You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.local/config/youtube-dl.conf`.
 
 
+# OUTPUT TEMPLATE
+
+The `-o` option allows users to indicate a template for the output file names. The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format `%(NAME)s`. To clarify, that is a percent symbol followed by a name in parenthesis, followed by a lowercase S. Allowed names are:
+
+ - `id`: The sequence will be replaced by the video identifier.
+ - `url`: The sequence will be replaced by the video URL.
+ - `uploader`: The sequence will be replaced by the nickname of the person who uploaded the video.
+ - `upload_date`: The sequence will be replaced by the upload date in YYYYMMDD format.
+ - `title`: The sequence will be replaced by the video title.
+ - `ext`: The sequence will be replaced by the appropriate extension (like flv or mp4).
+ - `epoch`: The sequence will be replaced by the Unix epoch when creating the file.
+ - `autonumber`: The sequence will be replaced by a five-digit number that will be increased with each download, starting at zero.
+
+The current default template is `%(id)s.%(ext)s`, but that will be switchted to `%(title)s-%(id)s.%(ext)s` (which can be requested with `-t` at the moment).
+
+In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title:
+
+    $ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc
+    youtube-dl test video ''_ä↭𝕐.mp4    # All kinds of weird characters
+    $ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc --restrict-filenames
+    youtube-dl_test_video_.mp4          # A simple file name
+
 # FAQ
 # FAQ
 
 
 ### Can you please put the -b option back?
 ### Can you please put the -b option back?

+ 1 - 1
test/test_utils.py

@@ -56,7 +56,7 @@ class TestUtil(unittest.TestCase):
 		self.assertEqual(sanitize_filename(u'aäb中国的c', restricted=True), u'a_b_c')
 		self.assertEqual(sanitize_filename(u'aäb中国的c', restricted=True), u'a_b_c')
 		self.assertTrue(sanitize_filename(u'ö', restricted=True) != u'') # No empty filename
 		self.assertTrue(sanitize_filename(u'ö', restricted=True) != u'') # No empty filename
 
 
-		forbidden = u'"\0\\/&: \'\t\n'
+		forbidden = u'"\0\\/&!: \'\t\n'
 		for fc in forbidden:
 		for fc in forbidden:
 			for fbc in forbidden:
 			for fbc in forbidden:
 				self.assertTrue(fbc not in sanitize_filename(fc, restricted=True))
 				self.assertTrue(fbc not in sanitize_filename(fc, restricted=True))

BIN
youtube-dl


+ 59 - 1
youtube-dl.1

@@ -1,4 +1,4 @@
-.TH youtube-dl 1 "" 
+.TH YOUTUBE-DL 1 "" 
 .SH NAME
 .SH NAME
 .PP
 .PP
 youtube-dl
 youtube-dl
@@ -24,6 +24,11 @@ redistribute it or use it however you like.
 -i,\ --ignore-errors\ \ \ \ \ \ continue\ on\ download\ errors
 -i,\ --ignore-errors\ \ \ \ \ \ continue\ on\ download\ errors
 -r,\ --rate-limit\ LIMIT\ \ \ download\ rate\ limit\ (e.g.\ 50k\ or\ 44.6m)
 -r,\ --rate-limit\ LIMIT\ \ \ download\ rate\ limit\ (e.g.\ 50k\ or\ 44.6m)
 -R,\ --retries\ RETRIES\ \ \ \ number\ of\ retries\ (default\ is\ 10)
 -R,\ --retries\ RETRIES\ \ \ \ number\ of\ retries\ (default\ is\ 10)
+--buffer-size\ SIZE\ \ \ \ \ \ \ size\ of\ download\ buffer\ (e.g.\ 1024\ or\ 16k)\ (default
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ is\ 1024)
+--no-resize-buffer\ \ \ \ \ \ \ do\ not\ automatically\ adjust\ the\ buffer\ size.\ By
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ default,\ the\ buffer\ size\ is\ automatically\ resized
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ from\ an\ initial\ value\ of\ SIZE.
 --dump-user-agent\ \ \ \ \ \ \ \ display\ the\ current\ browser\ identification
 --dump-user-agent\ \ \ \ \ \ \ \ display\ the\ current\ browser\ identification
 --user-agent\ UA\ \ \ \ \ \ \ \ \ \ specify\ a\ custom\ user\ agent
 --user-agent\ UA\ \ \ \ \ \ \ \ \ \ specify\ a\ custom\ user\ agent
 --list-extractors\ \ \ \ \ \ \ \ List\ all\ supported\ extractors\ and\ the\ URLs\ they
 --list-extractors\ \ \ \ \ \ \ \ List\ all\ supported\ extractors\ and\ the\ URLs\ they
@@ -139,6 +144,59 @@ You can configure youtube-dl by placing default arguments (such as
 \f[C]--extract-audio\ --no-mtime\f[] to always extract the audio and not
 \f[C]--extract-audio\ --no-mtime\f[] to always extract the audio and not
 copy the mtime) into \f[C]/etc/youtube-dl.conf\f[] and/or
 copy the mtime) into \f[C]/etc/youtube-dl.conf\f[] and/or
 \f[C]~/.local/config/youtube-dl.conf\f[].
 \f[C]~/.local/config/youtube-dl.conf\f[].
+.SH OUTPUT TEMPLATE
+.PP
+The \f[C]-o\f[] option allows users to indicate a template for the
+output file names.
+The basic usage is not to set any template arguments when downloading a
+single file, like in
+\f[C]youtube-dl\ -o\ funny_video.flv\ "http://some/video"\f[].
+However, it may contain special sequences that will be replaced when
+downloading each video.
+The special sequences have the format \f[C]%(NAME)s\f[].
+To clarify, that is a percent symbol followed by a name in parenthesis,
+followed by a lowercase S.
+Allowed names are:
+.IP \[bu] 2
+\f[C]id\f[]: The sequence will be replaced by the video identifier.
+.IP \[bu] 2
+\f[C]url\f[]: The sequence will be replaced by the video URL.
+.IP \[bu] 2
+\f[C]uploader\f[]: The sequence will be replaced by the nickname of the
+person who uploaded the video.
+.IP \[bu] 2
+\f[C]upload_date\f[]: The sequence will be replaced by the upload date
+in YYYYMMDD format.
+.IP \[bu] 2
+\f[C]title\f[]: The sequence will be replaced by the video title.
+.IP \[bu] 2
+\f[C]ext\f[]: The sequence will be replaced by the appropriate extension
+(like flv or mp4).
+.IP \[bu] 2
+\f[C]epoch\f[]: The sequence will be replaced by the Unix epoch when
+creating the file.
+.IP \[bu] 2
+\f[C]autonumber\f[]: The sequence will be replaced by a five-digit
+number that will be increased with each download, starting at zero.
+.PP
+The current default template is \f[C]%(id)s.%(ext)s\f[], but that will
+be switchted to \f[C]%(title)s-%(id)s.%(ext)s\f[] (which can be
+requested with \f[C]-t\f[] at the moment).
+.PP
+In some cases, you don\[aq]t want special characters such as 中, spaces,
+or &, such as when transferring the downloaded filename to a Windows
+system or the filename through an 8bit-unsafe channel.
+In these cases, add the \f[C]--restrict-filenames\f[] flag to get a
+shorter title:
+.IP
+.nf
+\f[C]
+$\ youtube-dl\ --get-filename\ -o\ "%(title)s.%(ext)s"\ BaW_jenozKc
+youtube-dl\ test\ video\ \[aq]\[aq]_ä↭𝕐.mp4\ \ \ \ #\ All\ kinds\ of\ weird\ characters
+$\ youtube-dl\ --get-filename\ -o\ "%(title)s.%(ext)s"\ BaW_jenozKc\ --restrict-filenames
+youtube-dl_test_video_.mp4\ \ \ \ \ \ \ \ \ \ #\ A\ simple\ file\ name
+\f[]
+.fi
 .SH FAQ
 .SH FAQ
 .SS Can you please put the -b option back?
 .SS Can you please put the -b option back?
 .PP
 .PP

+ 1 - 1
youtube-dl.bash-completion

@@ -3,7 +3,7 @@ __youtube-dl()
     local cur prev opts
     local cur prev opts
     COMPREPLY=()
     COMPREPLY=()
     cur="${COMP_WORDS[COMP_CWORD]}"
     cur="${COMP_WORDS[COMP_CWORD]}"
-    opts="--all-formats --audio-format --audio-quality --auto-number --batch-file --console-title --continue --cookies --dump-user-agent --extract-audio --format --get-description --get-filename --get-format --get-thumbnail --get-title --get-url --help --id --ignore-errors --keep-video --list-extractors --list-formats --literal --match-title --max-downloads --max-quality --netrc --no-continue --no-mtime --no-overwrites --no-part --no-progress --output --password --playlist-end --playlist-start --prefer-free-formats --quiet --rate-limit --reject-title --restrict-filenames --retries --simulate --skip-download --srt-lang --title --update --user-agent --username --verbose --version --write-description --write-info-json --write-srt"
+    opts="--all-formats --audio-format --audio-quality --auto-number --batch-file --buffer-size --console-title --continue --cookies --dump-user-agent --extract-audio --format --get-description --get-filename --get-format --get-thumbnail --get-title --get-url --help --id --ignore-errors --keep-video --list-extractors --list-formats --literal --match-title --max-downloads --max-quality --netrc --no-continue --no-mtime --no-overwrites --no-part --no-progress --no-resize-buffer --output --password --playlist-end --playlist-start --prefer-free-formats --quiet --rate-limit --reject-title --restrict-filenames --retries --simulate --skip-download --srt-lang --title --update --user-agent --username --verbose --version --write-description --write-info-json --write-srt"
 
 
     if [[ ${cur} == * ]] ; then
     if [[ ${cur} == * ]] ; then
         COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )
         COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )

BIN
youtube-dl.exe


+ 18 - 12
youtube_dl/FileDownloader.py

@@ -62,6 +62,8 @@ class FileDownloader(object):
 	ratelimit:         Download speed limit, in bytes/sec.
 	ratelimit:         Download speed limit, in bytes/sec.
 	nooverwrites:      Prevent overwriting files.
 	nooverwrites:      Prevent overwriting files.
 	retries:           Number of times to retry for HTTP error 5xx
 	retries:           Number of times to retry for HTTP error 5xx
+	buffersize:        Size of download buffer in bytes.
+	noresizebuffer:    Do not automatically resize the download buffer.
 	continuedl:        Try to continue downloads if possible.
 	continuedl:        Try to continue downloads if possible.
 	noprogress:        Do not print the progress bar.
 	noprogress:        Do not print the progress bar.
 	playliststart:     Playlist item to start at.
 	playliststart:     Playlist item to start at.
@@ -106,7 +108,7 @@ class FileDownloader(object):
 		if bytes == 0.0:
 		if bytes == 0.0:
 			exponent = 0
 			exponent = 0
 		else:
 		else:
-			exponent = long(math.log(bytes, 1024.0))
+			exponent = int(math.log(bytes, 1024.0))
 		suffix = 'bkMGTPEZY'[exponent]
 		suffix = 'bkMGTPEZY'[exponent]
 		converted = float(bytes) / float(1024 ** exponent)
 		converted = float(bytes) / float(1024 ** exponent)
 		return '%.2f%s' % (converted, suffix)
 		return '%.2f%s' % (converted, suffix)
@@ -125,7 +127,7 @@ class FileDownloader(object):
 		if current == 0 or dif < 0.001: # One millisecond
 		if current == 0 or dif < 0.001: # One millisecond
 			return '--:--'
 			return '--:--'
 		rate = float(current) / dif
 		rate = float(current) / dif
-		eta = long((float(total) - float(current)) / rate)
+		eta = int((float(total) - float(current)) / rate)
 		(eta_mins, eta_secs) = divmod(eta, 60)
 		(eta_mins, eta_secs) = divmod(eta, 60)
 		if eta_mins > 99:
 		if eta_mins > 99:
 			return '--:--'
 			return '--:--'
@@ -177,7 +179,7 @@ class FileDownloader(object):
 		if not self.params.get('quiet', False):
 		if not self.params.get('quiet', False):
 			terminator = [u'\n', u''][skip_eol]
 			terminator = [u'\n', u''][skip_eol]
 			output = message + terminator
 			output = message + terminator
-			if 'b' not in self._screen_file.mode or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
+			if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
 				output = output.encode(preferredencoding(), 'ignore')
 				output = output.encode(preferredencoding(), 'ignore')
 			self._screen_file.write(output)
 			self._screen_file.write(output)
 			self._screen_file.flush()
 			self._screen_file.flush()
@@ -325,9 +327,13 @@ class FileDownloader(object):
 		"""Generate the output filename."""
 		"""Generate the output filename."""
 		try:
 		try:
 			template_dict = dict(info_dict)
 			template_dict = dict(info_dict)
-			template_dict['epoch'] = unicode(int(time.time()))
-			template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
+
+			template_dict['epoch'] = int(time.time())
+			template_dict['autonumber'] = u'%05d' % self._num_downloads
+
 			template_dict = dict((key, u'NA' if val is None else val) for key, val in template_dict.items())
 			template_dict = dict((key, u'NA' if val is None else val) for key, val in template_dict.items())
+			template_dict = dict((k, sanitize_filename(u(v), self.params.get('restrictfilenames'))) for k,v in template_dict.items())
+
 			filename = self.params['outtmpl'] % template_dict
 			filename = self.params['outtmpl'] % template_dict
 			return filename
 			return filename
 		except (ValueError, KeyError), err:
 		except (ValueError, KeyError), err:
@@ -370,7 +376,6 @@ class FileDownloader(object):
 				raise MaxDownloadsReached()
 				raise MaxDownloadsReached()
 
 
 		filename = self.prepare_filename(info_dict)
 		filename = self.prepare_filename(info_dict)
-		filename = sanitize_filename(filename, self.params.get('restrictfilenames'))
 
 
 		# Forced printings
 		# Forced printings
 		if self.params.get('forcetitle', False):
 		if self.params.get('forcetitle', False):
@@ -398,7 +403,7 @@ class FileDownloader(object):
 			if dn != '' and not os.path.exists(dn): # dn is already encoded
 			if dn != '' and not os.path.exists(dn): # dn is already encoded
 				os.makedirs(dn)
 				os.makedirs(dn)
 		except (OSError, IOError), err:
 		except (OSError, IOError), err:
-			self.trouble(u'ERROR: unable to create directory ' + unicode(err))
+			self.trouble(u'ERROR: unable to create directory ' + u(err))
 			return
 			return
 
 
 		if self.params.get('writedescription', False):
 		if self.params.get('writedescription', False):
@@ -623,7 +628,7 @@ class FileDownloader(object):
 					else:
 					else:
 						# Examine the reported length
 						# Examine the reported length
 						if (content_length is not None and
 						if (content_length is not None and
-								(resume_len - 100 < long(content_length) < resume_len + 100)):
+								(resume_len - 100 < int(content_length) < resume_len + 100)):
 							# The file had already been fully downloaded.
 							# The file had already been fully downloaded.
 							# Explanation to the above condition: in issue #175 it was revealed that
 							# Explanation to the above condition: in issue #175 it was revealed that
 							# YouTube sometimes adds or removes a few bytes from the end of the file,
 							# YouTube sometimes adds or removes a few bytes from the end of the file,
@@ -650,10 +655,10 @@ class FileDownloader(object):
 
 
 		data_len = data.info().get('Content-length', None)
 		data_len = data.info().get('Content-length', None)
 		if data_len is not None:
 		if data_len is not None:
-			data_len = long(data_len) + resume_len
+			data_len = int(data_len) + resume_len
 		data_len_str = self.format_bytes(data_len)
 		data_len_str = self.format_bytes(data_len)
 		byte_counter = 0 + resume_len
 		byte_counter = 0 + resume_len
-		block_size = 1024
+		block_size = self.params.get('buffersize', 1024)
 		start = time.time()
 		start = time.time()
 		while True:
 		while True:
 			# Download and write
 			# Download and write
@@ -679,7 +684,8 @@ class FileDownloader(object):
 			except (IOError, OSError), err:
 			except (IOError, OSError), err:
 				self.trouble(u'\nERROR: unable to write data: %s' % str(err))
 				self.trouble(u'\nERROR: unable to write data: %s' % str(err))
 				return False
 				return False
-			block_size = self.best_block_size(after - before, len(data_block))
+			if not self.params.get('noresizebuffer', False):
+				block_size = self.best_block_size(after - before, len(data_block))
 
 
 			# Progress message
 			# Progress message
 			speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
 			speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
@@ -699,7 +705,7 @@ class FileDownloader(object):
 		stream.close()
 		stream.close()
 		self.report_finish()
 		self.report_finish()
 		if data_len is not None and byte_counter != data_len:
 		if data_len is not None and byte_counter != data_len:
-			raise ContentTooShortError(byte_counter, long(data_len))
+			raise ContentTooShortError(byte_counter, int(data_len))
 		self.try_rename(tmpfilename, filename)
 		self.try_rename(tmpfilename, filename)
 
 
 		# Update file modification time
 		# Update file modification time

+ 196 - 59
youtube_dl/InfoExtractors.py

@@ -253,7 +253,7 @@ class YoutubeIE(InfoExtractor):
 				else:
 				else:
 					raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
 					raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
 			except (IOError, netrc.NetrcParseError), err:
 			except (IOError, netrc.NetrcParseError), err:
-				self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % compat_str(err))
+				self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % u(err))
 				return
 				return
 
 
 		# Set language
 		# Set language
@@ -262,7 +262,7 @@ class YoutubeIE(InfoExtractor):
 			self.report_lang()
 			self.report_lang()
 			urllib2.urlopen(request).read()
 			urllib2.urlopen(request).read()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self._downloader.to_stderr(u'WARNING: unable to set language: %s' % compat_str(err))
+			self._downloader.to_stderr(u'WARNING: unable to set language: %s' % u(err))
 			return
 			return
 
 
 		# No authentication to be performed
 		# No authentication to be performed
@@ -285,7 +285,7 @@ class YoutubeIE(InfoExtractor):
 				self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
 				self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
 				return
 				return
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self._downloader.to_stderr(u'WARNING: unable to log in: %s' % compat_str(err))
+			self._downloader.to_stderr(u'WARNING: unable to log in: %s' % u(err))
 			return
 			return
 
 
 		# Confirm age
 		# Confirm age
@@ -298,7 +298,7 @@ class YoutubeIE(InfoExtractor):
 			self.report_age_confirmation()
 			self.report_age_confirmation()
 			age_results = urllib2.urlopen(request).read()
 			age_results = urllib2.urlopen(request).read()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self._downloader.trouble(u'ERROR: unable to confirm age: %s' % compat_str(err))
+			self._downloader.trouble(u'ERROR: unable to confirm age: %s' % u(err))
 			return
 			return
 
 
 	def _real_extract(self, url):
 	def _real_extract(self, url):
@@ -320,7 +320,7 @@ class YoutubeIE(InfoExtractor):
 		try:
 		try:
 			video_webpage = urllib2.urlopen(request).read()
 			video_webpage = urllib2.urlopen(request).read()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
+			self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % u(err))
 			return
 			return
 
 
 		# Attempt to extract SWF player URL
 		# Attempt to extract SWF player URL
@@ -342,7 +342,7 @@ class YoutubeIE(InfoExtractor):
 				if 'token' in video_info:
 				if 'token' in video_info:
 					break
 					break
 			except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 			except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-				self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % compat_str(err))
+				self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % u(err))
 				return
 				return
 		if 'token' not in video_info:
 		if 'token' not in video_info:
 			if 'reason' in video_info:
 			if 'reason' in video_info:
@@ -405,7 +405,7 @@ class YoutubeIE(InfoExtractor):
 				try:
 				try:
 					srt_list = urllib2.urlopen(request).read()
 					srt_list = urllib2.urlopen(request).read()
 				except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 				except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-					raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err))
+					raise Trouble(u'WARNING: unable to download video subtitles: %s' % u(err))
 				srt_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', srt_list)
 				srt_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', srt_list)
 				srt_lang_list = dict((l[1], l[0]) for l in srt_lang_list)
 				srt_lang_list = dict((l[1], l[0]) for l in srt_lang_list)
 				if not srt_lang_list:
 				if not srt_lang_list:
@@ -422,7 +422,7 @@ class YoutubeIE(InfoExtractor):
 				try:
 				try:
 					srt_xml = urllib2.urlopen(request).read()
 					srt_xml = urllib2.urlopen(request).read()
 				except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 				except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-					raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err))
+					raise Trouble(u'WARNING: unable to download video subtitles: %s' % u(err))
 				if not srt_xml:
 				if not srt_xml:
 					raise Trouble(u'WARNING: unable to download video subtitles')
 					raise Trouble(u'WARNING: unable to download video subtitles')
 				video_subtitles = self._closed_captions_xml_to_srt(srt_xml.decode('utf-8'))
 				video_subtitles = self._closed_captions_xml_to_srt(srt_xml.decode('utf-8'))
@@ -544,7 +544,7 @@ class MetacafeIE(InfoExtractor):
 			self.report_disclaimer()
 			self.report_disclaimer()
 			disclaimer = urllib2.urlopen(request).read()
 			disclaimer = urllib2.urlopen(request).read()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % compat_str(err))
+			self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % u(err))
 			return
 			return
 
 
 		# Confirm age
 		# Confirm age
@@ -557,7 +557,7 @@ class MetacafeIE(InfoExtractor):
 			self.report_age_confirmation()
 			self.report_age_confirmation()
 			disclaimer = urllib2.urlopen(request).read()
 			disclaimer = urllib2.urlopen(request).read()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self._downloader.trouble(u'ERROR: unable to confirm age: %s' % compat_str(err))
+			self._downloader.trouble(u'ERROR: unable to confirm age: %s' % u(err))
 			return
 			return
 
 
 	def _real_extract(self, url):
 	def _real_extract(self, url):
@@ -581,7 +581,7 @@ class MetacafeIE(InfoExtractor):
 			self.report_download_webpage(video_id)
 			self.report_download_webpage(video_id)
 			webpage = urllib2.urlopen(request).read()
 			webpage = urllib2.urlopen(request).read()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % compat_str(err))
+			self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % u(err))
 			return
 			return
 
 
 		# Extract URL, uploader and title from webpage
 		# Extract URL, uploader and title from webpage
@@ -672,7 +672,7 @@ class DailymotionIE(InfoExtractor):
 			self.report_download_webpage(video_id)
 			self.report_download_webpage(video_id)
 			webpage = urllib2.urlopen(request).read()
 			webpage = urllib2.urlopen(request).read()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % compat_str(err))
+			self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % u(err))
 			return
 			return
 
 
 		# Extract URL, uploader and title from webpage
 		# Extract URL, uploader and title from webpage
@@ -768,7 +768,7 @@ class GoogleIE(InfoExtractor):
 			self.report_download_webpage(video_id)
 			self.report_download_webpage(video_id)
 			webpage = urllib2.urlopen(request).read()
 			webpage = urllib2.urlopen(request).read()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
+			self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
 			return
 			return
 
 
 		# Extract URL, uploader, and title from webpage
 		# Extract URL, uploader, and title from webpage
@@ -807,7 +807,7 @@ class GoogleIE(InfoExtractor):
 			try:
 			try:
 				webpage = urllib2.urlopen(request).read()
 				webpage = urllib2.urlopen(request).read()
 			except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 			except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-				self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
+				self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
 				return
 				return
 			mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
 			mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
 			if mobj is None:
 			if mobj is None:
@@ -861,7 +861,7 @@ class PhotobucketIE(InfoExtractor):
 			self.report_download_webpage(video_id)
 			self.report_download_webpage(video_id)
 			webpage = urllib2.urlopen(request).read()
 			webpage = urllib2.urlopen(request).read()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
+			self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
 			return
 			return
 
 
 		# Extract URL, uploader, and title from webpage
 		# Extract URL, uploader, and title from webpage
@@ -929,7 +929,7 @@ class YahooIE(InfoExtractor):
 			try:
 			try:
 				webpage = urllib2.urlopen(request).read()
 				webpage = urllib2.urlopen(request).read()
 			except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 			except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-				self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
+				self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
 				return
 				return
 
 
 			mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
 			mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
@@ -953,7 +953,7 @@ class YahooIE(InfoExtractor):
 			self.report_download_webpage(video_id)
 			self.report_download_webpage(video_id)
 			webpage = urllib2.urlopen(request).read()
 			webpage = urllib2.urlopen(request).read()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
+			self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
 			return
 			return
 
 
 		# Extract uploader and title from webpage
 		# Extract uploader and title from webpage
@@ -1011,7 +1011,7 @@ class YahooIE(InfoExtractor):
 			self.report_download_webpage(video_id)
 			self.report_download_webpage(video_id)
 			webpage = urllib2.urlopen(request).read()
 			webpage = urllib2.urlopen(request).read()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
+			self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
 			return
 			return
 
 
 		# Extract media URL from playlist XML
 		# Extract media URL from playlist XML
@@ -1067,7 +1067,7 @@ class VimeoIE(InfoExtractor):
 			self.report_download_webpage(video_id)
 			self.report_download_webpage(video_id)
 			webpage = urllib2.urlopen(request).read()
 			webpage = urllib2.urlopen(request).read()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
+			self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
 			return
 			return
 
 
 		# Now we begin extracting as much information as we can from what we
 		# Now we begin extracting as much information as we can from what we
@@ -1147,6 +1147,143 @@ class VimeoIE(InfoExtractor):
 		}]
 		}]
 
 
 
 
+class ArteTvIE(InfoExtractor):
+	"""arte.tv information extractor."""
+
+	_VALID_URL = r'(?:http://)?videos\.arte\.tv/(?:fr|de)/videos/.*'
+	_LIVE_URL = r'index-[0-9]+\.html$'
+
+	IE_NAME = u'arte.tv'
+
+	def __init__(self, downloader=None):
+		InfoExtractor.__init__(self, downloader)
+
+	def report_download_webpage(self, video_id):
+		"""Report webpage download."""
+		self._downloader.to_screen(u'[arte.tv] %s: Downloading webpage' % video_id)
+
+	def report_extraction(self, video_id):
+		"""Report information extraction."""
+		self._downloader.to_screen(u'[arte.tv] %s: Extracting information' % video_id)
+
+	def fetch_webpage(self, url):
+		self._downloader.increment_downloads()
+		request = urllib2.Request(url)
+		try:
+			self.report_download_webpage(url)
+			webpage = urllib2.urlopen(request).read()
+		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+			self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
+			return
+		except ValueError, err:
+			self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
+			return
+		return webpage
+
+	def grep_webpage(self, url, regex, regexFlags, matchTuples):
+		page = self.fetch_webpage(url)
+		mobj = re.search(regex, page, regexFlags)
+		info = {}
+
+		if mobj is None:
+			self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
+			return
+
+		for (i, key, err) in matchTuples:
+			if mobj.group(i) is None:
+				self._downloader.trouble(err)
+				return
+			else:
+				info[key] = mobj.group(i)
+
+		return info
+
+	def extractLiveStream(self, url):
+		video_lang = url.split('/')[-4]
+		info = self.grep_webpage(
+			url,
+			r'src="(.*?/videothek_js.*?\.js)',
+			0,
+			[
+				(1, 'url', u'ERROR: Invalid URL: %s' % url)
+			]
+		)
+		http_host = url.split('/')[2]
+		next_url = 'http://%s%s' % (http_host, urllib.unquote(info.get('url')))
+		info = self.grep_webpage(
+			next_url,
+			r'(s_artestras_scst_geoFRDE_' + video_lang + '.*?)\'.*?' +
+				'(http://.*?\.swf).*?' +
+				'(rtmp://.*?)\'',
+			re.DOTALL,
+			[
+				(1, 'path',   u'ERROR: could not extract video path: %s' % url),
+				(2, 'player', u'ERROR: could not extract video player: %s' % url),
+				(3, 'url',    u'ERROR: could not extract video url: %s' % url)
+			]
+		)
+		video_url = u'%s/%s' % (info.get('url'), info.get('path'))
+
+	def extractPlus7Stream(self, url):
+		video_lang = url.split('/')[-3]
+		info = self.grep_webpage(
+			url,
+			r'param name="movie".*?videorefFileUrl=(http[^\'"&]*)',
+			0,
+			[
+				(1, 'url', u'ERROR: Invalid URL: %s' % url)
+			]
+		)
+		next_url = urllib.unquote(info.get('url'))
+		info = self.grep_webpage(
+			next_url,
+			r'<video lang="%s" ref="(http[^\'"&]*)' % video_lang,
+			0,
+			[
+				(1, 'url', u'ERROR: Could not find <video> tag: %s' % url)
+			]
+		)
+		next_url = urllib.unquote(info.get('url'))
+
+		info = self.grep_webpage(
+			next_url,
+			r'<video id="(.*?)".*?>.*?' +
+				'<name>(.*?)</name>.*?' +
+				'<dateVideo>(.*?)</dateVideo>.*?' +
+				'<url quality="hd">(.*?)</url>',
+			re.DOTALL,
+			[
+				(1, 'id',    u'ERROR: could not extract video id: %s' % url),
+				(2, 'title', u'ERROR: could not extract video title: %s' % url),
+				(3, 'date',  u'ERROR: could not extract video date: %s' % url),
+				(4, 'url',   u'ERROR: could not extract video url: %s' % url)
+			]
+		)
+
+		return {
+			'id':           info.get('id'),
+			'url':          urllib.unquote(info.get('url')),
+			'uploader':     u'arte.tv',
+			'upload_date':  info.get('date'),
+			'title':        info.get('title'),
+			'ext':          u'mp4',
+			'format':       u'NA',
+			'player_url':   None,
+		}
+
+	def _real_extract(self, url):
+		video_id = url.split('/')[-1]
+		self.report_extraction(video_id)
+
+		if re.search(self._LIVE_URL, video_id) is not None:
+			self.extractLiveStream(url)
+			return
+		else:
+			info = self.extractPlus7Stream(url)
+
+		return [info]
+
+
 class GenericIE(InfoExtractor):
 class GenericIE(InfoExtractor):
 	"""Generic last-resort information extractor."""
 	"""Generic last-resort information extractor."""
 
 
@@ -1232,7 +1369,7 @@ class GenericIE(InfoExtractor):
 			self.report_download_webpage(video_id)
 			self.report_download_webpage(video_id)
 			webpage = urllib2.urlopen(request).read()
 			webpage = urllib2.urlopen(request).read()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
+			self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
 			return
 			return
 		except ValueError, err:
 		except ValueError, err:
 			# since this is the last-resort InfoExtractor, if
 			# since this is the last-resort InfoExtractor, if
@@ -1324,7 +1461,7 @@ class YoutubeSearchIE(InfoExtractor):
 			return
 			return
 		else:
 		else:
 			try:
 			try:
-				n = long(prefix)
+				n = int(prefix)
 				if n <= 0:
 				if n <= 0:
 					self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
 					self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
 					return
 					return
@@ -1351,7 +1488,7 @@ class YoutubeSearchIE(InfoExtractor):
 			try:
 			try:
 				data = urllib2.urlopen(request).read()
 				data = urllib2.urlopen(request).read()
 			except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 			except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-				self._downloader.trouble(u'ERROR: unable to download API page: %s' % compat_str(err))
+				self._downloader.trouble(u'ERROR: unable to download API page: %s' % u(err))
 				return
 				return
 			api_response = json.loads(data)['data']
 			api_response = json.loads(data)['data']
 
 
@@ -1402,7 +1539,7 @@ class GoogleSearchIE(InfoExtractor):
 			return
 			return
 		else:
 		else:
 			try:
 			try:
-				n = long(prefix)
+				n = int(prefix)
 				if n <= 0:
 				if n <= 0:
 					self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
 					self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
 					return
 					return
@@ -1428,7 +1565,7 @@ class GoogleSearchIE(InfoExtractor):
 			try:
 			try:
 				page = urllib2.urlopen(request).read()
 				page = urllib2.urlopen(request).read()
 			except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 			except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-				self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
+				self._downloader.trouble(u'ERROR: unable to download webpage: %s' % u(err))
 				return
 				return
 
 
 			# Extract video identifiers
 			# Extract video identifiers
@@ -1484,7 +1621,7 @@ class YahooSearchIE(InfoExtractor):
 			return
 			return
 		else:
 		else:
 			try:
 			try:
-				n = long(prefix)
+				n = int(prefix)
 				if n <= 0:
 				if n <= 0:
 					self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
 					self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
 					return
 					return
@@ -1511,7 +1648,7 @@ class YahooSearchIE(InfoExtractor):
 			try:
 			try:
 				page = urllib2.urlopen(request).read()
 				page = urllib2.urlopen(request).read()
 			except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 			except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-				self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
+				self._downloader.trouble(u'ERROR: unable to download webpage: %s' % u(err))
 				return
 				return
 
 
 			# Extract video identifiers
 			# Extract video identifiers
@@ -1581,7 +1718,7 @@ class YoutubePlaylistIE(InfoExtractor):
 			try:
 			try:
 				page = urllib2.urlopen(request).read()
 				page = urllib2.urlopen(request).read()
 			except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 			except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-				self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
+				self._downloader.trouble(u'ERROR: unable to download webpage: %s' % u(err))
 				return
 				return
 
 
 			# Extract video identifiers
 			# Extract video identifiers
@@ -1638,7 +1775,7 @@ class YoutubeChannelIE(InfoExtractor):
 			try:
 			try:
 				page = urllib2.urlopen(request).read()
 				page = urllib2.urlopen(request).read()
 			except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 			except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-				self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
+				self._downloader.trouble(u'ERROR: unable to download webpage: %s' % u(err))
 				return
 				return
 
 
 			# Extract video identifiers
 			# Extract video identifiers
@@ -1701,7 +1838,7 @@ class YoutubeUserIE(InfoExtractor):
 			try:
 			try:
 				page = urllib2.urlopen(request).read()
 				page = urllib2.urlopen(request).read()
 			except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 			except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-				self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
+				self._downloader.trouble(u'ERROR: unable to download webpage: %s' % u(err))
 				return
 				return
 
 
 			# Extract video identifiers
 			# Extract video identifiers
@@ -1773,7 +1910,7 @@ class BlipTVUserIE(InfoExtractor):
 			mobj = re.search(r'data-users-id="([^"]+)"', page)
 			mobj = re.search(r'data-users-id="([^"]+)"', page)
 			page_base = page_base % mobj.group(1)
 			page_base = page_base % mobj.group(1)
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
+			self._downloader.trouble(u'ERROR: unable to download webpage: %s' % u(err))
 			return
 			return
 
 
 
 
@@ -1861,7 +1998,7 @@ class DepositFilesIE(InfoExtractor):
 			self.report_download_webpage(file_id)
 			self.report_download_webpage(file_id)
 			webpage = urllib2.urlopen(request).read()
 			webpage = urllib2.urlopen(request).read()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % compat_str(err))
+			self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % u(err))
 			return
 			return
 
 
 		# Search for the real file URL
 		# Search for the real file URL
@@ -1977,7 +2114,7 @@ class FacebookIE(InfoExtractor):
 				else:
 				else:
 					raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
 					raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
 			except (IOError, netrc.NetrcParseError), err:
 			except (IOError, netrc.NetrcParseError), err:
-				self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % compat_str(err))
+				self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % u(err))
 				return
 				return
 
 
 		if useremail is None:
 		if useremail is None:
@@ -1997,7 +2134,7 @@ class FacebookIE(InfoExtractor):
 				self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
 				self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
 				return
 				return
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self._downloader.to_stderr(u'WARNING: unable to log in: %s' % compat_str(err))
+			self._downloader.to_stderr(u'WARNING: unable to log in: %s' % u(err))
 			return
 			return
 
 
 	def _real_extract(self, url):
 	def _real_extract(self, url):
@@ -2014,7 +2151,7 @@ class FacebookIE(InfoExtractor):
 			page = urllib2.urlopen(request)
 			page = urllib2.urlopen(request)
 			video_webpage = page.read()
 			video_webpage = page.read()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
+			self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % u(err))
 			return
 			return
 
 
 		# Start extracting information
 		# Start extracting information
@@ -2149,13 +2286,13 @@ class BlipTVIE(InfoExtractor):
 					'urlhandle': urlh
 					'urlhandle': urlh
 				}
 				}
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % compat_str(err))
+			self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % u(err))
 			return
 			return
 		if info is None: # Regular URL
 		if info is None: # Regular URL
 			try:
 			try:
 				json_code = urlh.read()
 				json_code = urlh.read()
 			except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 			except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-				self._downloader.trouble(u'ERROR: unable to read video info webpage: %s' % compat_str(err))
+				self._downloader.trouble(u'ERROR: unable to read video info webpage: %s' % u(err))
 				return
 				return
 
 
 			try:
 			try:
@@ -2223,7 +2360,7 @@ class MyVideoIE(InfoExtractor):
 			self.report_download_webpage(video_id)
 			self.report_download_webpage(video_id)
 			webpage = urllib2.urlopen(request).read()
 			webpage = urllib2.urlopen(request).read()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
+			self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
 			return
 			return
 
 
 		self.report_extraction(video_id)
 		self.report_extraction(video_id)
@@ -2320,7 +2457,7 @@ class ComedyCentralIE(InfoExtractor):
 			htmlHandle = urllib2.urlopen(req)
 			htmlHandle = urllib2.urlopen(req)
 			html = htmlHandle.read()
 			html = htmlHandle.read()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err))
+			self._downloader.trouble(u'ERROR: unable to download webpage: %s' % u(err))
 			return
 			return
 		if dlNewest:
 		if dlNewest:
 			url = htmlHandle.geturl()
 			url = htmlHandle.geturl()
@@ -2353,7 +2490,7 @@ class ComedyCentralIE(InfoExtractor):
 			urlHandle = urllib2.urlopen(playerUrl_raw)
 			urlHandle = urllib2.urlopen(playerUrl_raw)
 			playerUrl = urlHandle.geturl()
 			playerUrl = urlHandle.geturl()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self._downloader.trouble(u'ERROR: unable to find out player URL: ' + unicode(err))
+			self._downloader.trouble(u'ERROR: unable to find out player URL: ' + u(err))
 			return
 			return
 
 
 		uri = mMovieParams[0][1]
 		uri = mMovieParams[0][1]
@@ -2362,7 +2499,7 @@ class ComedyCentralIE(InfoExtractor):
 		try:
 		try:
 			indexXml = urllib2.urlopen(indexUrl).read()
 			indexXml = urllib2.urlopen(indexUrl).read()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self._downloader.trouble(u'ERROR: unable to download episode index: ' + unicode(err))
+			self._downloader.trouble(u'ERROR: unable to download episode index: ' + u(err))
 			return
 			return
 
 
 		results = []
 		results = []
@@ -2383,7 +2520,7 @@ class ComedyCentralIE(InfoExtractor):
 			try:
 			try:
 				configXml = urllib2.urlopen(configReq).read()
 				configXml = urllib2.urlopen(configReq).read()
 			except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 			except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-				self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err))
+				self._downloader.trouble(u'ERROR: unable to download webpage: %s' % u(err))
 				return
 				return
 
 
 			cdoc = xml.etree.ElementTree.fromstring(configXml)
 			cdoc = xml.etree.ElementTree.fromstring(configXml)
@@ -2466,7 +2603,7 @@ class EscapistIE(InfoExtractor):
 			m = re.match(r'text/html; charset="?([^"]+)"?', webPage.headers['Content-Type'])
 			m = re.match(r'text/html; charset="?([^"]+)"?', webPage.headers['Content-Type'])
 			webPage = webPageBytes.decode(m.group(1) if m else 'utf-8')
 			webPage = webPageBytes.decode(m.group(1) if m else 'utf-8')
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self._downloader.trouble(u'ERROR: unable to download webpage: ' + unicode(err))
+			self._downloader.trouble(u'ERROR: unable to download webpage: ' + u(err))
 			return
 			return
 
 
 		descMatch = re.search('<meta name="description" content="([^"]*)"', webPage)
 		descMatch = re.search('<meta name="description" content="([^"]*)"', webPage)
@@ -2482,7 +2619,7 @@ class EscapistIE(InfoExtractor):
 		try:
 		try:
 			configJSON = urllib2.urlopen(configUrl).read()
 			configJSON = urllib2.urlopen(configUrl).read()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self._downloader.trouble(u'ERROR: unable to download configuration: ' + unicode(err))
+			self._downloader.trouble(u'ERROR: unable to download configuration: ' + u(err))
 			return
 			return
 
 
 		# Technically, it's JavaScript, not JSON
 		# Technically, it's JavaScript, not JSON
@@ -2491,7 +2628,7 @@ class EscapistIE(InfoExtractor):
 		try:
 		try:
 			config = json.loads(configJSON)
 			config = json.loads(configJSON)
 		except (ValueError,), err:
 		except (ValueError,), err:
-			self._downloader.trouble(u'ERROR: Invalid JSON in configuration file: ' + unicode(err))
+			self._downloader.trouble(u'ERROR: Invalid JSON in configuration file: ' + u(err))
 			return
 			return
 
 
 		playlist = config['playlist']
 		playlist = config['playlist']
@@ -2538,7 +2675,7 @@ class CollegeHumorIE(InfoExtractor):
 		try:
 		try:
 			webpage = urllib2.urlopen(request).read()
 			webpage = urllib2.urlopen(request).read()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
+			self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % u(err))
 			return
 			return
 
 
 		m = re.search(r'id="video:(?P<internalvideoid>[0-9]+)"', webpage)
 		m = re.search(r'id="video:(?P<internalvideoid>[0-9]+)"', webpage)
@@ -2559,7 +2696,7 @@ class CollegeHumorIE(InfoExtractor):
 		try:
 		try:
 			metaXml = urllib2.urlopen(xmlUrl).read()
 			metaXml = urllib2.urlopen(xmlUrl).read()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % compat_str(err))
+			self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % u(err))
 			return
 			return
 
 
 		mdoc = xml.etree.ElementTree.fromstring(metaXml)
 		mdoc = xml.etree.ElementTree.fromstring(metaXml)
@@ -2604,7 +2741,7 @@ class XVideosIE(InfoExtractor):
 		try:
 		try:
 			webpage = urllib2.urlopen(request).read()
 			webpage = urllib2.urlopen(request).read()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
+			self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % u(err))
 			return
 			return
 
 
 		self.report_extraction(video_id)
 		self.report_extraction(video_id)
@@ -2688,7 +2825,7 @@ class SoundcloudIE(InfoExtractor):
 		try:
 		try:
 			webpage = urllib2.urlopen(request).read()
 			webpage = urllib2.urlopen(request).read()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
+			self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % u(err))
 			return
 			return
 
 
 		self.report_extraction('%s/%s' % (uploader, slug_title))
 		self.report_extraction('%s/%s' % (uploader, slug_title))
@@ -2723,7 +2860,7 @@ class SoundcloudIE(InfoExtractor):
 			try:
 			try:
 				upload_date = datetime.datetime.strptime(mobj.group(1), '%B %d, %Y %H:%M').strftime('%Y%m%d')
 				upload_date = datetime.datetime.strptime(mobj.group(1), '%B %d, %Y %H:%M').strftime('%Y%m%d')
 			except Exception, e:
 			except Exception, e:
-				self._downloader.to_stderr(compat_str(e))
+				self._downloader.to_stderr(u(e))
 
 
 		# for soundcloud, a request to a cross domain is required for cookies
 		# for soundcloud, a request to a cross domain is required for cookies
 		request = urllib2.Request('http://media.soundcloud.com/crossdomain.xml', std_headers)
 		request = urllib2.Request('http://media.soundcloud.com/crossdomain.xml', std_headers)
@@ -2765,7 +2902,7 @@ class InfoQIE(InfoExtractor):
 		try:
 		try:
 			webpage = urllib2.urlopen(request).read()
 			webpage = urllib2.urlopen(request).read()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
+			self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % u(err))
 			return
 			return
 
 
 		self.report_extraction(url)
 		self.report_extraction(url)
@@ -2877,7 +3014,7 @@ class MixcloudIE(InfoExtractor):
 			self.report_download_json(file_url)
 			self.report_download_json(file_url)
 			jsonData = urllib2.urlopen(request).read()
 			jsonData = urllib2.urlopen(request).read()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self._downloader.trouble(u'ERROR: Unable to retrieve file: %s' % compat_str(err))
+			self._downloader.trouble(u'ERROR: Unable to retrieve file: %s' % u(err))
 			return
 			return
 
 
 		# parse JSON
 		# parse JSON
@@ -2956,7 +3093,7 @@ class StanfordOpenClassroomIE(InfoExtractor):
 			try:
 			try:
 				metaXml = urllib2.urlopen(xmlUrl).read()
 				metaXml = urllib2.urlopen(xmlUrl).read()
 			except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 			except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-				self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % unicode(err))
+				self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % u(err))
 				return
 				return
 			mdoc = xml.etree.ElementTree.fromstring(metaXml)
 			mdoc = xml.etree.ElementTree.fromstring(metaXml)
 			try:
 			try:
@@ -2980,7 +3117,7 @@ class StanfordOpenClassroomIE(InfoExtractor):
 			try:
 			try:
 				coursepage = urllib2.urlopen(url).read()
 				coursepage = urllib2.urlopen(url).read()
 			except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 			except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-				self._downloader.trouble(u'ERROR: unable to download course info page: ' + unicode(err))
+				self._downloader.trouble(u'ERROR: unable to download course info page: ' + u(err))
 				return
 				return
 
 
 			m = re.search('<h1>([^<]+)</h1>', coursepage)
 			m = re.search('<h1>([^<]+)</h1>', coursepage)
@@ -3019,7 +3156,7 @@ class StanfordOpenClassroomIE(InfoExtractor):
 			try:
 			try:
 				rootpage = urllib2.urlopen(rootURL).read()
 				rootpage = urllib2.urlopen(rootURL).read()
 			except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 			except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-				self._downloader.trouble(u'ERROR: unable to download course info page: ' + unicode(err))
+				self._downloader.trouble(u'ERROR: unable to download course info page: ' + u(err))
 				return
 				return
 
 
 			info['title'] = info['id']
 			info['title'] = info['id']
@@ -3066,7 +3203,7 @@ class MTVIE(InfoExtractor):
 		try:
 		try:
 			webpage = urllib2.urlopen(request).read()
 			webpage = urllib2.urlopen(request).read()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
+			self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % u(err))
 			return
 			return
 
 
 		mobj = re.search(r'<meta name="mtv_vt" content="([^"]+)"/>', webpage)
 		mobj = re.search(r'<meta name="mtv_vt" content="([^"]+)"/>', webpage)
@@ -3099,7 +3236,7 @@ class MTVIE(InfoExtractor):
 		try:
 		try:
 			metadataXml = urllib2.urlopen(request).read()
 			metadataXml = urllib2.urlopen(request).read()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self._downloader.trouble(u'ERROR: unable to download video metadata: %s' % compat_str(err))
+			self._downloader.trouble(u'ERROR: unable to download video metadata: %s' % u(err))
 			return
 			return
 
 
 		mdoc = xml.etree.ElementTree.fromstring(metadataXml)
 		mdoc = xml.etree.ElementTree.fromstring(metadataXml)
@@ -3187,7 +3324,7 @@ class YoukuIE(InfoExtractor):
 			self.report_download_webpage(video_id)
 			self.report_download_webpage(video_id)
 			jsondata = urllib2.urlopen(request).read()
 			jsondata = urllib2.urlopen(request).read()
 		except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
 		except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
-			self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
+			self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
 			return
 			return
 
 
 		self.report_extraction(video_id)
 		self.report_extraction(video_id)
@@ -3361,7 +3498,7 @@ class GooglePlusIE(InfoExtractor):
 		try:
 		try:
 			webpage = urllib2.urlopen(request).read()
 			webpage = urllib2.urlopen(request).read()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self._downloader.trouble(u'ERROR: Unable to retrieve entry webpage: %s' % compat_str(err))
+			self._downloader.trouble(u'ERROR: Unable to retrieve entry webpage: %s' % u(err))
 			return
 			return
 
 
 		# Extract update date
 		# Extract update date
@@ -3403,7 +3540,7 @@ class GooglePlusIE(InfoExtractor):
 		try:
 		try:
 			webpage = urllib2.urlopen(request).read()
 			webpage = urllib2.urlopen(request).read()
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-			self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
+			self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
 			return
 			return
 		self.report_extract_vid_page(video_page)
 		self.report_extract_vid_page(video_page)
 
 

+ 22 - 6
youtube_dl/__init__.py

@@ -18,10 +18,11 @@ __authors__  = (
 	'Ori Avtalion',
 	'Ori Avtalion',
 	'shizeeg',
 	'shizeeg',
 	'Filippo Valsorda',
 	'Filippo Valsorda',
+	'Christian Albrecht',
 	)
 	)
 
 
 __license__ = 'Public Domain'
 __license__ = 'Public Domain'
-__version__ = '2012.11.28'
+__version__ = '2012.11.29'
 
 
 UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl'
 UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl'
 UPDATE_URL_VERSION = 'https://raw.github.com/rg3/youtube-dl/master/LATEST_VERSION'
 UPDATE_URL_VERSION = 'https://raw.github.com/rg3/youtube-dl/master/LATEST_VERSION'
@@ -126,9 +127,12 @@ def parseOpts():
 
 
 		opts = []
 		opts = []
 
 
-		if option._short_opts: opts.append(option._short_opts[0])
-		if option._long_opts: opts.append(option._long_opts[0])
-		if len(opts) > 1: opts.insert(1, ', ')
+		if option._short_opts:
+			opts.append(option._short_opts[0])
+		if option._long_opts:
+			opts.append(option._long_opts[0])
+		if len(opts) > 1:
+			opts.insert(1, ', ')
 
 
 		if option.takes_value(): opts.append(' %s' % option.metavar)
 		if option.takes_value(): opts.append(' %s' % option.metavar)
 
 
@@ -187,6 +191,11 @@ def parseOpts():
 			dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
 			dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
 	general.add_option('-R', '--retries',
 	general.add_option('-R', '--retries',
 			dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10)
 			dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10)
+	general.add_option('--buffer-size',
+			dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16k) (default is %default)', default="1024")
+	general.add_option('--no-resize-buffer',
+			action='store_true', dest='noresizebuffer',
+			help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False)
 	general.add_option('--dump-user-agent',
 	general.add_option('--dump-user-agent',
 			action='store_true', dest='dump_user_agent',
 			action='store_true', dest='dump_user_agent',
 			help='display the current browser identification', default=False)
 			help='display the current browser identification', default=False)
@@ -362,7 +371,7 @@ def gen_extractors():
 		YoukuIE(),
 		YoukuIE(),
 		XNXXIE(),
 		XNXXIE(),
 		GooglePlusIE(),
 		GooglePlusIE(),
-
+		ArteTvIE(),
 		GenericIE()
 		GenericIE()
 	]
 	]
 
 
@@ -440,9 +449,14 @@ def _real_main():
 		opts.ratelimit = numeric_limit
 		opts.ratelimit = numeric_limit
 	if opts.retries is not None:
 	if opts.retries is not None:
 		try:
 		try:
-			opts.retries = long(opts.retries)
+			opts.retries = int(opts.retries)
 		except (TypeError, ValueError), err:
 		except (TypeError, ValueError), err:
 			parser.error(u'invalid retry count specified')
 			parser.error(u'invalid retry count specified')
+	if opts.buffersize is not None:
+		numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
+		if numeric_buffersize is None:
+			parser.error(u'invalid buffer size specified')
+		opts.buffersize = numeric_buffersize
 	try:
 	try:
 		opts.playliststart = int(opts.playliststart)
 		opts.playliststart = int(opts.playliststart)
 		if opts.playliststart <= 0:
 		if opts.playliststart <= 0:
@@ -493,6 +507,8 @@ def _real_main():
 		'ratelimit': opts.ratelimit,
 		'ratelimit': opts.ratelimit,
 		'nooverwrites': opts.nooverwrites,
 		'nooverwrites': opts.nooverwrites,
 		'retries': opts.retries,
 		'retries': opts.retries,
+		'buffersize': opts.buffersize,
+		'noresizebuffer': opts.noresizebuffer,
 		'continuedl': opts.continue_dl,
 		'continuedl': opts.continue_dl,
 		'noprogress': opts.noprogress,
 		'noprogress': opts.noprogress,
 		'playliststart': opts.playliststart,
 		'playliststart': opts.playliststart,

+ 19 - 20
youtube_dl/utils.py

@@ -27,9 +27,9 @@ std_headers = {
 }
 }
 
 
 try:
 try:
-    compat_str = unicode # Python 2
+	u = unicode # Python 2
 except NameError:
 except NameError:
-    compat_str = str
+	u = str
 
 
 def preferredencoding():
 def preferredencoding():
 	"""Get preferred encoding.
 	"""Get preferred encoding.
@@ -37,19 +37,17 @@ def preferredencoding():
 	Returns the best encoding scheme for the system, based on
 	Returns the best encoding scheme for the system, based on
 	locale.getpreferredencoding() and some further tweaks.
 	locale.getpreferredencoding() and some further tweaks.
 	"""
 	"""
-	def yield_preferredencoding():
-		try:
-			pref = locale.getpreferredencoding()
-			u'TEST'.encode(pref)
-		except:
-			pref = 'UTF-8'
-		while True:
-			yield pref
-	return yield_preferredencoding().next()
+	try:
+		pref = locale.getpreferredencoding()
+		u'TEST'.encode(pref)
+	except:
+		pref = 'UTF-8'
+
+	return pref
 
 
 
 
 def htmlentity_transform(matchobj):
 def htmlentity_transform(matchobj):
-	"""Transforms an HTML entity to a Unicode character.
+	"""Transforms an HTML entity to a character.
 
 
 	This function receives a match object and is intended to be used with
 	This function receives a match object and is intended to be used with
 	the re.sub() function.
 	the re.sub() function.
@@ -60,7 +58,6 @@ def htmlentity_transform(matchobj):
 	if entity in htmlentitydefs.name2codepoint:
 	if entity in htmlentitydefs.name2codepoint:
 		return unichr(htmlentitydefs.name2codepoint[entity])
 		return unichr(htmlentitydefs.name2codepoint[entity])
 
 
-	# Unicode character
 	mobj = re.match(ur'(?u)#(x?\d+)', entity)
 	mobj = re.match(ur'(?u)#(x?\d+)', entity)
 	if mobj is not None:
 	if mobj is not None:
 		numstr = mobj.group(1)
 		numstr = mobj.group(1)
@@ -69,7 +66,7 @@ def htmlentity_transform(matchobj):
 			numstr = u'0%s' % numstr
 			numstr = u'0%s' % numstr
 		else:
 		else:
 			base = 10
 			base = 10
-		return unichr(long(numstr, base))
+		return unichr(int(numstr, base))
 
 
 	# Unknown entity in name, return its literal representation
 	# Unknown entity in name, return its literal representation
 	return (u'&%s;' % entity)
 	return (u'&%s;' % entity)
@@ -128,8 +125,10 @@ class IDParser(HTMLParser.HTMLParser):
 	handle_decl = handle_pi = unknown_decl = find_startpos
 	handle_decl = handle_pi = unknown_decl = find_startpos
 
 
 	def get_result(self):
 	def get_result(self):
-		if self.result == None: return None
-		if len(self.result) != 3: return None
+		if self.result is None:
+			return None
+		if len(self.result) != 3:
+			return None
 		lines = self.html.split('\n')
 		lines = self.html.split('\n')
 		lines = lines[self.result[1][0]-1:self.result[2][0]]
 		lines = lines[self.result[1][0]-1:self.result[2][0]]
 		lines[0] = lines[0][self.result[1][1]:]
 		lines[0] = lines[0][self.result[1][1]:]
@@ -208,7 +207,7 @@ def sanitize_filename(s, restricted=False):
 			return '_-' if restricted else ' -'
 			return '_-' if restricted else ' -'
 		elif char in '\\/|*<>':
 		elif char in '\\/|*<>':
 			return '_'
 			return '_'
-		if restricted and (char in '&\'' or char.isspace()):
+		if restricted and (char in '!&\'' or char.isspace()):
 			return '_'
 			return '_'
 		if restricted and ord(char) > 127:
 		if restricted and ord(char) > 127:
 			return '_'
 			return '_'
@@ -235,7 +234,7 @@ def orderedSet(iterable):
 
 
 def unescapeHTML(s):
 def unescapeHTML(s):
 	"""
 	"""
-	@param s a string (of type unicode)
+	@param s a string
 	"""
 	"""
 	assert type(s) == type(u'')
 	assert type(s) == type(u'')
 
 
@@ -244,7 +243,7 @@ def unescapeHTML(s):
 
 
 def encodeFilename(s):
 def encodeFilename(s):
 	"""
 	"""
-	@param s The name of the file (of type unicode)
+	@param s The name of the file
 	"""
 	"""
 
 
 	assert type(s) == type(u'')
 	assert type(s) == type(u'')
@@ -316,7 +315,7 @@ class ContentTooShortError(Exception):
 
 
 class Trouble(Exception):
 class Trouble(Exception):
 	"""Trouble helper exception
 	"""Trouble helper exception
-	
+
 	This is an exception to be handled with
 	This is an exception to be handled with
 	FileDownloader.trouble
 	FileDownloader.trouble
 	"""
 	"""