Bläddra i källkod

Merge branch 'master' into totalwebcasting

Filippo Valsorda 7 år sedan
förälder
incheckning
97bc05116e
100 ändrade filer med 6971 tillägg och 1413 borttagningar
  1. 8 8
      .github/ISSUE_TEMPLATE.md
  2. 6 6
      .github/ISSUE_TEMPLATE_tmpl.md
  3. 1 0
      .github/PULL_REQUEST_TEMPLATE.md
  4. 2 1
      .gitignore
  5. 18 9
      .travis.yml
  6. 40 0
      AUTHORS
  7. 9 7
      CONTRIBUTING.md
  8. 1766 0
      ChangeLog
  9. 4 2
      MANIFEST.in
  10. 24 9
      Makefile
  11. 166 142
      README.md
  12. 2 2
      devscripts/check-porn.py
  13. 5 0
      devscripts/install_jython.sh
  14. 2 1
      devscripts/make_lazy_extractors.py
  15. 1 1
      devscripts/prepare_manpage.py
  16. 22 0
      devscripts/run_tests.sh
  17. 88 28
      docs/supportedsites.md
  18. 4 2
      setup.py
  19. 611 1
      test/test_InfoExtractor.py
  20. 90 3
      test/test_YoutubeDL.py
  21. 8 1
      test/test_aes.py
  22. 3 3
      test/test_compat.py
  23. 36 7
      test/test_download.py
  24. 26 0
      test/test_options.py
  25. 4 4
      test/test_subtitles.py
  26. 192 4
      test/test_utils.py
  27. 20 0
      test/test_youtube_chapters.py
  28. 10 0
      test/testdata/f4m/custom_base_url.f4m
  29. 14 0
      test/testdata/m3u8/pluzz_francetv_11507.m3u8
  30. 16 0
      test/testdata/m3u8/teamcoco_11995.m3u8
  31. 13 0
      test/testdata/m3u8/toggle_mobile_12211.m3u8
  32. 20 0
      test/testdata/m3u8/twitch_vod.m3u8
  33. 10 0
      test/testdata/m3u8/vidio.m3u8
  34. 18 0
      test/testdata/mpd/float_duration.mpd
  35. 218 0
      test/testdata/mpd/urls_only.mpd
  36. 291 85
      youtube_dl/YoutubeDL.py
  37. 30 9
      youtube_dl/__init__.py
  38. 28 0
      youtube_dl/aes.py
  39. 6 3
      youtube_dl/cache.py
  40. 86 12
      youtube_dl/compat.py
  41. 3 0
      youtube_dl/downloader/__init__.py
  42. 28 25
      youtube_dl/downloader/common.py
  43. 19 31
      youtube_dl/downloader/dash.py
  44. 37 3
      youtube_dl/downloader/external.py
  45. 27 31
      youtube_dl/downloader/f4m.py
  46. 124 15
      youtube_dl/downloader/fragment.py
  47. 57 30
      youtube_dl/downloader/hls.py
  48. 195 151
      youtube_dl/downloader/http.py
  49. 10 24
      youtube_dl/downloader/ism.py
  50. 1 1
      youtube_dl/downloader/rtmp.py
  51. 41 9
      youtube_dl/extractor/abc.py
  52. 15 5
      youtube_dl/extractor/abcnews.py
  53. 1 1
      youtube_dl/extractor/abcotvs.py
  54. 12 11
      youtube_dl/extractor/acast.py
  55. 2 1
      youtube_dl/extractor/addanime.py
  56. 150 0
      youtube_dl/extractor/adn.py
  57. 113 18
      youtube_dl/extractor/adobepass.py
  58. 104 177
      youtube_dl/extractor/adultswim.py
  59. 37 11
      youtube_dl/extractor/aenetworks.py
  60. 175 38
      youtube_dl/extractor/afreecatv.py
  61. 10 18
      youtube_dl/extractor/airmozilla.py
  62. 53 0
      youtube_dl/extractor/aliexpress.py
  63. 6 3
      youtube_dl/extractor/aljazeera.py
  64. 33 11
      youtube_dl/extractor/allocine.py
  65. 33 13
      youtube_dl/extractor/amcnetworks.py
  66. 85 0
      youtube_dl/extractor/americastestkitchen.py
  67. 19 7
      youtube_dl/extractor/amp.py
  68. 44 22
      youtube_dl/extractor/animeondemand.py
  69. 70 14
      youtube_dl/extractor/anvato.py
  70. 30 19
      youtube_dl/extractor/aparat.py
  71. 2 2
      youtube_dl/extractor/appleconnect.py
  72. 5 4
      youtube_dl/extractor/appletrailers.py
  73. 4 4
      youtube_dl/extractor/archiveorg.py
  74. 16 7
      youtube_dl/extractor/ard.py
  75. 1 2
      youtube_dl/extractor/arkena.py
  76. 19 3
      youtube_dl/extractor/arte.py
  77. 93 0
      youtube_dl/extractor/asiancrush.py
  78. 13 8
      youtube_dl/extractor/atresplayer.py
  79. 73 0
      youtube_dl/extractor/atvat.py
  80. 2 2
      youtube_dl/extractor/audioboom.py
  81. 78 0
      youtube_dl/extractor/aws.py
  82. 213 0
      youtube_dl/extractor/azmedien.py
  83. 0 140
      youtube_dl/extractor/azubu.py
  84. 1 1
      youtube_dl/extractor/bambuser.py
  85. 128 11
      youtube_dl/extractor/bandcamp.py
  86. 74 9
      youtube_dl/extractor/bbc.py
  87. 188 0
      youtube_dl/extractor/beampro.py
  88. 13 6
      youtube_dl/extractor/beeg.py
  89. 10 2
      youtube_dl/extractor/bellmedia.py
  90. 149 13
      youtube_dl/extractor/bilibili.py
  91. 3 7
      youtube_dl/extractor/bleacherreport.py
  92. 8 3
      youtube_dl/extractor/bloomberg.py
  93. 72 0
      youtube_dl/extractor/bostonglobe.py
  94. 9 4
      youtube_dl/extractor/bpb.py
  95. 144 4
      youtube_dl/extractor/br.py
  96. 138 62
      youtube_dl/extractor/brightcove.py
  97. 4 3
      youtube_dl/extractor/buzzfeed.py
  98. 18 53
      youtube_dl/extractor/byutv.py
  99. 1 4
      youtube_dl/extractor/canalc2.py
  100. 40 20
      youtube_dl/extractor/canalplus.py

+ 8 - 8
.github/ISSUE_TEMPLATE.md

@@ -1,16 +1,16 @@
 ## Please follow the guide below
 
 - You will be asked some questions and requested to provide some information, please read them **carefully** and answer honestly
-- Put an `x` into all the boxes [ ] relevant to your *issue* (like that [x])
-- Use *Preview* tab to see how your issue will actually look like
+- Put an `x` into all the boxes [ ] relevant to your *issue* (like this: `[x]`)
+- Use the *Preview* tab to see what your issue will actually look like
 
 ---
 
-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.10*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
-- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.10**
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.12.31*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.12.31**
 
 ### Before submitting an *issue* make sure you have:
-- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
+- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
 - [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones
 
 ### What is the purpose of your *issue*?
@@ -28,14 +28,14 @@
 
 ### If the purpose of this *issue* is a *bug report*, *site support request* or you are not completely sure provide the full verbose output as follows:
 
-Add `-v` flag to **your command line** you run youtube-dl with, copy the **whole** output and insert it here. It should look similar to one below (replace it with **your** log inserted between triple ```):
+Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl -v <your command line>`), copy the **whole** output and insert it here. It should look similar to one below (replace it with **your** log inserted between triple ```):
+
 ```
-$ youtube-dl -v <your command line>
 [debug] System config: []
 [debug] User config: []
 [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] youtube-dl version 2017.01.10
+[debug] youtube-dl version 2017.12.31
 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
 [debug] Proxy map: {}

+ 6 - 6
.github/ISSUE_TEMPLATE_tmpl.md

@@ -1,16 +1,16 @@
 ## Please follow the guide below
 
 - You will be asked some questions and requested to provide some information, please read them **carefully** and answer honestly
-- Put an `x` into all the boxes [ ] relevant to your *issue* (like that [x])
-- Use *Preview* tab to see how your issue will actually look like
+- Put an `x` into all the boxes [ ] relevant to your *issue* (like this: `[x]`)
+- Use the *Preview* tab to see what your issue will actually look like
 
 ---
 
-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *%(version)s*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *%(version)s*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
 - [ ] I've **verified** and **I assure** that I'm running youtube-dl **%(version)s**
 
 ### Before submitting an *issue* make sure you have:
-- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
+- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
 - [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones
 
 ### What is the purpose of your *issue*?
@@ -28,9 +28,9 @@
 
 ### If the purpose of this *issue* is a *bug report*, *site support request* or you are not completely sure provide the full verbose output as follows:
 
-Add `-v` flag to **your command line** you run youtube-dl with, copy the **whole** output and insert it here. It should look similar to one below (replace it with **your** log inserted between triple ```):
+Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl -v <your command line>`), copy the **whole** output and insert it here. It should look similar to one below (replace it with **your** log inserted between triple ```):
+
 ```
-$ youtube-dl -v <your command line>
 [debug] System config: []
 [debug] User config: []
 [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']

+ 1 - 0
.github/PULL_REQUEST_TEMPLATE.md

@@ -9,6 +9,7 @@
 ### Before submitting a *pull request* make sure you have:
 - [ ] At least skimmed through [adding new extractor tutorial](https://github.com/rg3/youtube-dl#adding-support-for-a-new-site) and [youtube-dl coding conventions](https://github.com/rg3/youtube-dl#youtube-dl-coding-conventions) sections
 - [ ] [Searched](https://github.com/rg3/youtube-dl/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests
+- [ ] Checked the code with [flake8](https://pypi.python.org/pypi/flake8)
 
 ### In order to be accepted and merged into youtube-dl each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check one of the following options:
 - [ ] I am the original author of this code and I am willing to release it under [Unlicense](http://unlicense.org/)

+ 2 - 1
.gitignore

@@ -22,6 +22,7 @@ cover/
 updates_key.pem
 *.egg-info
 *.srt
+*.ttml
 *.sbv
 *.vtt
 *.flv
@@ -35,8 +36,8 @@ updates_key.pem
 *.mkv
 *.swf
 *.part
+*.ytdl
 *.swp
-test/testdata
 test/local_parameters.json
 .tox
 youtube-dl.zsh

+ 18 - 9
.travis.yml

@@ -6,13 +6,22 @@ python:
   - "3.3"
   - "3.4"
   - "3.5"
+  - "3.6"
+  - "pypy"
+  - "pypy3"
 sudo: false
-script: nosetests test --verbose
-notifications:
-  email:
-    - filippo.valsorda@gmail.com
-    - yasoob.khld@gmail.com
-#  irc:
-#    channels:
-#      - "irc.freenode.org#youtube-dl"
-#    skip_join: true
+env:
+  - YTDL_TEST_SET=core
+  - YTDL_TEST_SET=download
+matrix:
+  include:
+    - env: JYTHON=true; YTDL_TEST_SET=core
+    - env: JYTHON=true; YTDL_TEST_SET=download
+  fast_finish: true
+  allow_failures:
+    - env: YTDL_TEST_SET=download
+    - env: JYTHON=true; YTDL_TEST_SET=core
+    - env: JYTHON=true; YTDL_TEST_SET=download
+before_install:
+  - if [ "$JYTHON" == "true" ]; then ./devscripts/install_jython.sh; export PATH="$HOME/jython/bin:$PATH"; fi
+script: ./devscripts/run_tests.sh

+ 40 - 0
AUTHORS

@@ -191,3 +191,43 @@ Rich Leeper
 Zhong Jianxin
 Thor77
 Mattias Wadman
+Arjan Verwer
+Costy Petrisor
+Logan B
+Alex Seiler
+Vijay Singh
+Paul Hartmann
+Stephen Chen
+Fabian Stahl
+Bagira
+Odd Stråbø
+Philip Herzog
+Thomas Christlieb
+Marek Rusinowski
+Tobias Gruetzmacher
+Olivier Bilodeau
+Lars Vierbergen
+Juanjo Benages
+Xiao Di Guan
+Thomas Winant
+Daniel Twardowski
+Jeremie Jarosh
+Gerard Rovira
+Marvin Ewald
+Frédéric Bournival
+Timendum
+gritstub
+Adam Voss
+Mike Fährmann
+Jan Kundrát
+Giuseppe Fabiano
+Örn Guðjónsson
+Parmjit Virk
+Genki Sky
+Ľuboš Katrinec
+Corey Nicholson
+Ashutosh Chaudhary
+John Dong
+Tatsuyuki Ishi
+Daniel Weber
+Kay Bouché

+ 9 - 7
CONTRIBUTING.md

@@ -3,7 +3,7 @@
 $ youtube-dl -v <your command line>
 [debug] System config: []
 [debug] User config: []
-[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
+[debug] Command-line args: [u'-v', u'https://www.youtube.com/watch?v=BaW_jenozKcj']
 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
 [debug] youtube-dl version 2015.12.06
 [debug] Git HEAD: 135392e
@@ -34,7 +34,7 @@ For bug reports, this means that your report should contain the *complete* outpu
 
 If your server has multiple IPs or you suspect censorship, adding `--call-home` may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/).
 
-**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `http://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `http://www.youtube.com/`) is *not* an example URL.
+**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `https://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `https://www.youtube.com/`) is *not* an example URL.
 
 ###  Are you using the latest version?
 
@@ -70,7 +70,7 @@ It may sound strange, but some bug reports we receive are completely unrelated t
 
 # DEVELOPER INSTRUCTIONS
 
-Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
+Most users do not need to build youtube-dl and can [download the builds](https://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
 
 To run youtube-dl as a developer, you don't need to build anything either. Simply execute
 
@@ -82,6 +82,8 @@ To run the test, simply invoke your favorite test runner, or execute a test file
     python test/test_download.py
     nosetests
 
+See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases.
+
 If you want to create a build of youtube-dl yourself, you'll need
 
 * python
@@ -118,7 +120,7 @@ After you have ensured this site is distributing its content legally, you can fo
     class YourExtractorIE(InfoExtractor):
         _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
         _TEST = {
-            'url': 'http://yourextractor.com/watch/42',
+            'url': 'https://yourextractor.com/watch/42',
             'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
             'info_dict': {
                 'id': '42',
@@ -149,10 +151,10 @@ After you have ensured this site is distributing its content legally, you can fo
             }
     ```
 5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
-6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
+6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
 7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
-8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
-9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
+8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
+9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
 
         $ git add youtube_dl/extractor/extractors.py
         $ git add youtube_dl/extractor/yourextractor.py

+ 1766 - 0
ChangeLog

@@ -1,3 +1,1769 @@
+version <unreleased>
+
+Extractors
+* [youku] Fix list extraction (#15135)
+* [openload] Fix extraction (#15166)
+* [rtve.es:alacarta] Fix extraction of some new URLs
+
+
+version 2017.12.31
+
+Core
++ [extractor/common] Add container meta field for formats extracted
+  in _parse_mpd_formats (#13616)
++ [downloader/hls] Use HTTP headers for key request
+* [common] Use AACL as the default fourcc when AudioTag is 255
+* [extractor/common] Fix extraction of DASH formats with the same
+  representation id (#15111)
+
+Extractors
++ [slutload] Add support for mobile URLs (#14806)
+* [abc:iview] Bypass geo restriction
+* [abc:iview] Fix extraction (#14711, #14782, #14838, #14917, #14963, #14985,
+  #15035, #15057, #15061, #15071, #15095, #15106)
+* [openload] Fix extraction (#15118)
+- [sandia] Remove extractor
+- [collegerama] Remove extractor
++ [mediasite] Add support for sites based on Mediasite Video Platform (#5428,
+  #11185, #14343)
++ [ufctv] Add support for ufc.tv (#14520)
+* [pluralsight] Fix missing first line of subtitles (#11118)
+* [openload] Fallback on f-page extraction (#14665, #14879)
+* [vimeo] Improve password protected videos extraction (#15114)
+* [aws] Fix canonical/signed headers generation on python 2 (#15102)
+
+
+version 2017.12.28
+
+Extractors
++ [internazionale] Add support for internazionale.it (#14973)
+* [playtvak] Relax video regular expression and make description optional
+  (#15037)
++ [filmweb] Add support for filmweb.no (#8773, #10368)
++ [23video] Add support for 23video.com
++ [espn] Add support for fivethirtyeight.com (#6864)
++ [umg:de] Add support for universal-music.de (#11582, #11584)
++ [espn] Add support for espnfc and extract more formats (#8053)
+* [youku] Update ccode (#14880)
++ [openload] Add support for oload.stream (#15070)
+* [youku] Fix list extraction (#15065)
+
+
+version 2017.12.23
+
+Core
+* [extractor/common] Move X-Forwarded-For setup code into _request_webpage
++ [YoutubeDL] Add support for playlist_uploader and playlist_uploader_id in
+  output template (#11427, #15018)
++ [extractor/common] Introduce uploader, uploader_id and uploader_url
+  meta fields for playlists (#11427, #15018)
+* [downloader/fragment] Encode filename of fragment being removed (#15020)
++ [utils] Add another date format pattern (#14999)
+
+Extractors
++ [kaltura] Add another embed pattern for entry_id
++ [7plus] Add support for 7plus.com.au (#15043)
+* [animeondemand] Relax login error regular expression
++ [shahid] Add support for show pages (#7401)
++ [youtube] Extract uploader, uploader_id and uploader_url for playlists
+  (#11427, #15018)
+* [afreecatv] Improve format extraction (#15019)
++ [cspan] Add support for audio only pages and catch page errors (#14995)
++ [mailru] Add support for embed URLs (#14904)
+* [crunchyroll] Future-proof XML element checks (#15013)
+* [cbslocal] Fix timestamp extraction (#14999, #15000)
+* [discoverygo] Correct TTML subtitle extension
+* [vk] Make view count optional (#14979)
+* [disney] Skip Apple FairPlay formats (#14982)
+* [voot] Fix format extraction (#14758)
+
+
+version 2017.12.14
+
+Core
+* [postprocessor/xattr] Clarify NO_SPACE message (#14970)
+* [downloader/http] Return actual download result from real_download (#14971)
+
+Extractors
++ [itv] Extract more subtitles and duration
+* [itv] Improve extraction (#14944)
++ [byutv] Add support for geo restricted videos
+* [byutv] Fix extraction (#14966, #14967)
++ [bbccouk] Fix extraction for 320k HLS streams
++ [toutv] Add support for special video URLs (#14179)
+* [discovery] Fix free videos extraction (#14157, #14954)
+* [tvnow] Fix extraction (#7831)
++ [nickelodeon:br] Add support for nickelodeon brazil websites (#14893)
+* [nick] Improve extraction (#14876)
+* [tbs] Fix extraction (#13658)
+
+
+version 2017.12.10
+
+Core
++ [utils] Add sami mimetype to mimetype2ext
+
+Extractors
+* [culturebox] Improve video id extraction (#14947)
+* [twitter] Improve extraction (#14197)
++ [udemy] Extract more HLS formats
+* [udemy] Improve course id extraction (#14938)
++ [stretchinternet] Add support for portal.stretchinternet.com (#14576)
+* [ellentube] Fix extraction (#14407, #14570)
++ [raiplay:playlist] Add support for playlists (#14563)
+* [sonyliv] Bypass geo restriction
+* [sonyliv] Extract higher quality formats (#14922)
+* [fox] Extract subtitles
++ [fox] Add support for Adobe Pass authentication (#14205, #14489)
+- [dailymotion:cloud] Remove extractor (#6794)
+* [xhamster] Fix thumbnail extraction (#14780)
++ [xhamster] Add support for mobile URLs (#14780)
+* [generic] Don't pass video id as mpd id while extracting DASH (#14902)
+* [ard] Skip invalid stream URLs (#14906)
+* [porncom] Fix metadata extraction (#14911)
+* [pluralsight] Detect agreement request (#14913)
+* [toutv] Fix login (#14614)
+
+
+version 2017.12.02
+
+Core
++ [downloader/fragment] Commit part file after each fragment
++ [extractor/common] Add durations for DASH fragments with bare SegmentURLs
++ [extractor/common] Add support for DASH manifests with SegmentLists with
+  bare SegmentURLs (#14844)
++ [utils] Add hvc1 codec code to parse_codecs
+
+Extractors
+* [xhamster] Fix extraction (#14884)
+* [youku] Update ccode (#14872)
+* [mnet] Fix format extraction (#14883)
++ [xiami] Add Referer header to API request
+* [mtv] Correct scc extention in extracted subtitles (#13730)
+* [vvvvid] Fix extraction for kenc videos (#13406)
++ [br] Add support for BR Mediathek videos (#14560, #14788)
++ [daisuki] Add support for motto.daisuki.com (#14681)
+* [odnoklassniki] Fix API metadata request (#14862)
+* [itv] Fix HLS formats extraction
++ [pbs] Add another media id regular expression
+
+
+version 2017.11.26
+
+Core
+* [extractor/common] Use final URL when dumping request (#14769)
+
+Extractors
+* [fczenit] Fix extraction
+- [firstpost] Remove extractor
+* [freespeech] Fix extraction
+* [nexx] Extract more formats
++ [openload] Add support for openload.link (#14763)
+* [empflix] Relax URL regular expression
+* [empflix] Fix extractrion
+* [tnaflix] Don't modify download URLs (#14811)
+- [gamersyde] Remove extractor
+* [francetv:generationwhat] Fix extraction
++ [massengeschmacktv] Add support for Massengeschmack TV
+* [fox9] Fix extraction
+* [faz] Fix extraction and add support for Perform Group embeds (#14714)
++ [performgroup] Add support for performgroup.com
++ [jwplatform] Add support for iframes (#14828)
+* [culturebox] Fix extraction (#14827)
+* [youku] Fix extraction; update ccode (#14815)
+* [livestream] Make SMIL extraction non fatal (#14792)
++ [drtuber] Add support for mobile URLs (#14772)
++ [spankbang] Add support for mobile URLs (#14771)
+* [instagram] Fix description, timestamp and counters extraction (#14755)
+
+
+version 2017.11.15
+
+Core
+* [common] Skip Apple FairPlay m3u8 manifests (#14741)
+* [YoutubeDL] Fix playlist range optimization for --playlist-items (#14740)
+
+Extractors
+* [vshare] Capture and output error message
+* [vshare] Fix extraction (#14473)
+* [crunchyroll] Extract old RTMP formats
+* [tva] Fix extraction (#14736)
+* [gamespot] Lower preference of HTTP formats (#14652)
+* [instagram:user] Fix extraction (#14699)
+* [ccma] Fix typo (#14730)
+- Remove sensitive data from logging in messages
+* [instagram:user] Fix extraction (#14699)
++ [gamespot] Add support for article URLs (#14652)
+* [gamespot] Skip Brightcove Once HTTP formats (#14652)
+* [cartoonnetwork] Update tokenizer_src (#14666)
++ [wsj] Recognize another URL pattern (#14704)
+* [pandatv] Update API URL and sign format URLs (#14693)
+* [crunchyroll] Use old login method (#11572)
+
+
+version 2017.11.06
+
+Core
++ [extractor/common] Add protocol for f4m formats
+* [f4m] Prefer baseURL for relative URLs (#14660)
+* [extractor/common] Respect URL query in _extract_wowza_formats (14645)
+
+Extractors
++ [hotstar:playlist] Add support for playlists (#12465)
+* [hotstar] Bypass geo restriction (#14672)
+- [22tracks] Remove extractor (#11024, #14628)
++ [skysport] Sdd support ooyala videos protected with embed_token (#14641)
+* [gamespot] Extract formats referenced with new data fields (#14652)
+* [spankbang] Detect unavailable videos (#14644)
+
+
+version 2017.10.29
+
+Core
+* [extractor/common] Prefix format id for audio only HLS formats
++ [utils] Add support for zero years and months in parse_duration
+
+Extractors
+* [egghead] Fix extraction (#14388)
++ [fxnetworks] Extract series metadata (#14603)
++ [younow] Add support for younow.com (#9255, #9432, #12436)
+* [dctptv] Fix extraction (#14599)
+* [youtube] Restrict embed regular expression (#14600)
+* [vimeo] Restrict iframe embed regular expression (#14600)
+* [soundgasm] Improve extraction (#14588)
+- [myvideo] Remove extractor (#8557)
++ [nbc] Add support for classic-tv videos (#14575)
++ [vrtnu] Add support for cookies authentication and simplify (#11873)
++ [canvas] Add support for vrt.be/vrtnu (#11873)
+* [twitch:clips] Fix title extraction (#14566)
++ [ndtv] Add support for sub-sites (#14534)
+* [dramafever] Fix login error message extraction
++ [nick] Add support for more nickelodeon sites (no, dk, se, ch, fr, es, pt,
+  ro, hu) (#14553)
+
+
+version 2017.10.20
+
+Core
+* [downloader/fragment] Report warning instead of error on inconsistent
+  download state
+* [downloader/hls] Fix total fragments count when ad fragments exist
+
+Extractors
+* [parliamentliveuk] Fix extraction (#14524)
+* [soundcloud] Update client id (#14546)
++ [servus] Add support for servus.com (#14362)
++ [unity] Add support for unity3d.com (#14528)
+* [youtube] Replace youtube redirect URLs in description (#14517)
+* [pbs] Restrict direct video URL regular expression (#14519)
+* [drtv] Respect preference for direct HTTP formats (#14509)
++ [eporner] Add support for embed URLs (#14507)
+* [arte] Capture and output error message
+* [niconico] Improve uploader metadata extraction robustness (#14135)
+
+
+version 2017.10.15.1
+
+Core
+* [downloader/hls] Ignore anvato ad fragments (#14496)
+* [downloader/fragment] Output ad fragment count
+
+Extractors
+* [scrippsnetworks:watch] Bypass geo restriction
++ [anvato] Add ability to bypass geo restriction
+* [redditr] Fix extraction for URLs with query (#14495)
+
+
+version 2017.10.15
+
+Core
++ [common] Add support for jwplayer youtube embeds
+
+Extractors
+* [scrippsnetworks:watch] Fix extraction (#14389)
+* [anvato] Process master m3u8 manifests
+* [youtube] Fix relative URLs in description
+* [spike] Bypass geo restriction
++ [howstuffworks] Add support for more domains
+* [infoq] Fix http format downloading
++ [rtlnl] Add support for another type of embeds
++ [onionstudios] Add support for bulbs-video embeds
+* [udn] Fix extraction
+* [shahid] Fix extraction (#14448)
+* [kaltura] Ignore Widevine encrypted video (.wvm) (#14471)
+* [vh1] Fix extraction (#9613)
+
+
+version 2017.10.12
+
+Core
+* [YoutubeDL] Improve _default_format_spec (#14461)
+
+Extractors
+* [steam] Fix extraction (#14067)
++ [funk] Add support for funk.net (#14464)
++ [nexx] Add support for shortcuts and relax domain id extraction
++ [voxmedia] Add support for recode.net (#14173)
++ [once] Add support for vmap URLs
++ [generic] Add support for channel9 embeds (#14469)
+* [tva] Fix extraction (#14328)
++ [tubitv] Add support for new URL format (#14460)
+- [afreecatv:global] Remove extractor
+- [youtube:shared] Removed extractor (#14420)
++ [slideslive] Add support for slideslive.com (#2680)
++ [facebook] Support thumbnails (#14416)
+* [vvvvid] Fix episode number extraction (#14456)
+* [hrti:playlist] Relax URL regular expression
+* [wdr] Relax media link regular expression (#14447)
+* [hrti] Relax URL regular expression (#14443)
+* [fox] Delegate extraction to uplynk:preplay (#14147)
++ [youtube] Add support for hooktube.com (#14437)
+
+
+version 2017.10.07
+
+Core
+* [YoutubeDL] Ignore duplicates in --playlist-items
+* [YoutubeDL] Fix out of range --playlist-items for iterable playlists and
+  reduce code duplication (#14425)
++ [utils] Use cache in OnDemandPagedList by default
+* [postprocessor/ffmpeg] Convert to opus using libopus (#14381)
+
+Extractors
+* [reddit] Sort formats (#14430)
+* [lnkgo] Relax URL regular expression (#14423)
+* [pornflip] Extend URL regular expression (#14405, #14406)
++ [xtube] Add support for embed URLs (#14417)
++ [xvideos] Add support for embed URLs and improve extraction (#14409)
+* [beeg] Fix extraction (#14403)
+* [tvn24] Relax URL regular expression (#14395)
+* [nbc] Fix extraction (#13651, #13715, #14137, #14198, #14312, #14314, #14378,
+  #14392, #14414, #14419, #14431)
++ [ketnet] Add support for videos without direct sources (#14377)
+* [canvas] Generalize mediazone.vrt.be extractor and rework canvas and een
++ [afreecatv] Add support for adult videos (#14376)
+
+
+version 2017.10.01
+
+Core
+* [YoutubeDL] Document youtube_include_dash_manifest
+
+Extractors
++ [tvp] Add support for new URL schema (#14368)
++ [generic] Add support for single format Video.js embeds (#14371)
+* [yahoo] Bypass geo restriction for brightcove (#14210)
+* [yahoo] Use extracted brightcove account id (#14210)
+* [rtve:alacarta] Fix extraction (#14290)
++ [yahoo] Add support for custom brigthcove embeds (#14210)
++ [generic] Add support for Video.js embeds
++ [gfycat] Add support for /gifs/detail URLs (#14322)
+* [generic] Fix infinite recursion for twitter:player URLs (#14339)
+* [xhamsterembed] Fix extraction (#14308)
+
+
+version 2017.09.24
+
+Core
++ [options] Accept lrc as a subtitle conversion target format (#14292)
+* [utils] Fix handling raw TTML subtitles (#14191)
+
+Extractors
+* [24video] Fix timestamp extraction and make non fatal (#14295)
++ [24video] Add support for 24video.adult (#14295)
++ [kakao] Add support for tv.kakao.com (#12298, #14007)
++ [twitter] Add support for URLs without user id (#14270)
++ [americastestkitchen] Add support for americastestkitchen.com (#10764,
+  #13996)
+* [generic] Fix support for multiple HTML5 videos on one page (#14080)
+* [mixcloud] Fix extraction (#14088, #14132)
++ [lynda] Add support for educourse.ga (#14286)
+* [beeg] Fix extraction (#14275)
+* [nbcsports:vplayer] Correct theplatform URL (#13873)
+* [twitter] Fix duration extraction (#14141)
+* [tvplay] Bypass geo restriction
++ [heise] Add support for YouTube embeds (#14109)
++ [popcorntv] Add support for popcorntv.it (#5914, #14211)
+* [viki] Update app data (#14181)
+* [morningstar] Relax URL regular expression (#14222)
+* [openload] Fix extraction (#14225, #14257)
+* [noovo] Fix extraction (#14214)
+* [dailymotion:playlist] Relax URL regular expression (#14219)
++ [twitch] Add support for go.twitch.tv URLs (#14215)
+* [vgtv] Relax URL regular expression (#14223)
+
+
+version 2017.09.15
+
+Core
+* [downloader/fragment] Restart inconsistent incomplete fragment downloads
+  (#13731)
+* [YoutubeDL] Download raw subtitles files (#12909, #14191)
+
+Extractors
+* [condenast] Fix extraction (#14196, #14207)
++ [orf] Add support for f4m stories
+* [tv4] Relax URL regular expression (#14206)
+* [animeondemand] Bypass geo restriction
++ [animeondemand] Add support for flash videos (#9944)
+
+
+version 2017.09.11
+
+Extractors
+* [rutube:playlist] Fix suitable (#14166)
+
+
+version 2017.09.10
+
+Core
++ [utils] Introduce bool_or_none
+* [YoutubeDL] Ensure dir existence for each requested format (#14116)
+
+Extractors
+* [fox] Fix extraction (#14147)
+* [rutube] Use bool_or_none
+* [rutube] Rework and generalize playlist extractors (#13565)
++ [rutube:playlist] Add support for playlists (#13534, #13565)
++ [radiocanada] Add fallback for title extraction (#14145)
+* [vk] Use dedicated YouTube embeds extraction routine
+* [vice] Use dedicated YouTube embeds extraction routine
+* [cracked] Use dedicated YouTube embeds extraction routine
+* [chilloutzone] Use dedicated YouTube embeds extraction routine
+* [abcnews] Use dedicated YouTube embeds extraction routine
+* [youtube] Separate methods for embeds extraction
+* [redtube] Fix formats extraction (#14122)
+* [arte] Relax unavailability check (#14112)
++ [manyvids] Add support for preview videos from manyvids.com (#14053, #14059)
+* [vidme:user] Relax URL regular expression (#14054)
+* [bpb] Fix extraction (#14043, #14086)
+* [soundcloud] Fix download URL with private tracks (#14093)
+* [aliexpress:live] Add support for live.aliexpress.com (#13698, #13707)
+* [viidea] Capture and output lecture error message (#14099)
+* [radiocanada] Skip unsupported platforms (#14100)
+
+
+version 2017.09.02
+
+Extractors
+* [youtube] Force old layout for each webpage (#14068, #14072, #14074, #14076,
+  #14077, #14079, #14082, #14083, #14094, #14095, #14096)
+* [youtube] Fix upload date extraction (#14065)
++ [charlierose] Add support for episodes (#14062)
++ [bbccouk] Add support for w-prefixed ids (#14056)
+* [googledrive] Extend URL regular expression (#9785)
++ [googledrive] Add support for source format (#14046)
+* [pornhd] Fix extraction (#14005)
+
+
+version 2017.08.27.1
+
+Extractors
+
+* [youtube] Fix extraction with --youtube-skip-dash-manifest enabled (#14037)
+
+
+version 2017.08.27
+
+Core
++ [extractor/common] Extract height and format id for HTML5 videos (#14034)
+* [downloader/http] Rework HTTP downloader (#506, #809, #2849, #4240, #6023,
+  #8625, #9483)
+    * Simplify code and split into separate routines to facilitate maintaining
+    * Make retry mechanism work on errors during actual download not only
+      during connection establishment phase
+    * Retry on ECONNRESET and ETIMEDOUT during reading data from network
+    * Retry on content too short
+    * Show error description on retry
+
+Extractors
+* [generic] Lower preference for extraction from LD-JSON
+* [rai] Fix audio formats extraction (#14024)
+* [youtube] Fix controversy videos extraction (#14027, #14029)
+* [mixcloud] Fix extraction (#14015, #14020)
+
+
+version 2017.08.23
+
+Core
++ [extractor/common] Introduce _parse_xml
+* [extractor/common] Make HLS and DASH extraction in_parse_html5_media_entries
+  non fatal (#13970)
+* [utils] Fix unescapeHTML for misformed string like "&a&quot;" (#13935)
+
+Extractors
+* [cbc:watch] Bypass geo restriction (#13993)
+* [toutv] Relax DRM check (#13994)
++ [googledrive] Add support for subtitles (#13619, #13638)
+* [pornhub] Relax uploader regular expression (#13906, #13975)
+* [bandcamp:album] Extract track titles (#13962)
++ [bbccouk] Add support for events URLs (#13893)
++ [liveleak] Support multi-video pages (#6542)
++ [liveleak] Support another liveleak embedding pattern (#13336)
+* [cda] Fix extraction (#13935)
++ [laola1tv] Add support for tv.ittf.com (#13965)
+* [mixcloud] Fix extraction (#13958, #13974, #13980, #14003)
+
+
+version 2017.08.18
+
+Core
+* [YoutubeDL] Sanitize byte string format URLs (#13951)
++ [extractor/common] Add support for float durations in _parse_mpd_formats
+  (#13919)
+
+Extractors
+* [arte] Detect unavailable videos (#13945)
+* [generic] Convert redirect URLs to unicode strings (#13951)
+* [udemy] Fix paid course detection (#13943)
+* [pluralsight] Use RPC API for course extraction (#13937)
++ [clippit] Add support for clippituser.tv
++ [qqmusic] Support new URL schemes (#13805)
+* [periscope] Renew HLS extraction (#13917)
+* [mixcloud] Extract decrypt key
+
+
+version 2017.08.13
+
+Core
+* [YoutubeDL] Make sure format id is not empty
+* [extractor/common] Make _family_friendly_search optional
+* [extractor/common] Respect source's type attribute for HTML5 media (#13892)
+
+Extractors
+* [pornhub:playlistbase] Skip videos from drop-down menu (#12819, #13902)
++ [fourtube] Add support pornerbros.com (#6022)
++ [fourtube] Add support porntube.com (#7859, #13901)
++ [fourtube] Add support fux.com
+* [limelight] Improve embeds detection (#13895)
++ [reddit] Add support for v.redd.it and reddit.com (#13847)
+* [aparat] Extract all formats (#13887)
+* [mixcloud] Fix play info decryption (#13885)
++ [generic] Add support for vzaar embeds (#13876)
+
+
+version 2017.08.09
+
+Core
+* [utils] Skip missing params in cli_bool_option (#13865)
+
+Extractors
+* [xxxymovies] Fix title extraction (#13868)
++ [nick] Add support for nick.com.pl (#13860)
+* [mixcloud] Fix play info decryption (#13867)
+* [20min] Fix embeds extraction (#13852)
+* [dplayit] Fix extraction (#13851)
++ [niconico] Support videos with multiple formats (#13522)
++ [niconico] Support HTML5-only videos (#13806)
+
+
+version 2017.08.06
+
+Core
+* Use relative paths for DASH fragments (#12990)
+
+Extractors
+* [pluralsight] Fix format selection
+- [mpora] Remove extractor (#13826)
++ [voot] Add support for voot.com (#10255, #11644, #11814, #12350, #13218)
+* [vlive:channel] Limit number of videos per page to 100 (#13830)
+* [podomatic] Extend URL regular expression (#13827)
+* [cinchcast] Extend URL regular expression
+* [yandexdisk] Relax URL regular expression (#13824)
+* [vidme] Extract DASH and HLS formats
+- [teamfour] Remove extractor (#13782)
+* [pornhd] Fix extraction (#13783)
+* [udemy] Fix subtitles extraction (#13812)
+* [mlb] Extend URL regular expression (#13740, #13773)
++ [pbs] Add support for new URL schema (#13801)
+* [nrktv] Update API host (#13796)
+
+
+version 2017.07.30.1
+
+Core
+* [downloader/hls] Use redirect URL as manifest base (#13755)
+* [options] Correctly hide login info from debug outputs (#13696)
+
+Extractors
++ [watchbox] Add support for watchbox.de (#13739)
+- [clipfish] Remove extractor
++ [youjizz] Fix extraction (#13744)
++ [generic] Add support for another ooyala embed pattern (#13727)
++ [ard] Add support for lives (#13771)
+* [soundcloud] Update client id
++ [soundcloud:trackstation] Add support for track stations (#13733)
+* [svtplay] Use geo verification proxy for API request
+* [svtplay] Update API URL (#13767)
++ [yandexdisk] Add support for yadi.sk (#13755)
++ [megaphone] Add support for megaphone.fm
+* [amcnetworks] Make rating optional (#12453)
+* [cloudy] Fix extraction (#13737)
++ [nickru] Add support for nickelodeon.ru
+* [mtv] Improve thumbnal extraction
+* [nick] Automate geo-restriction bypass (#13711)
+* [niconico] Improve error reporting (#13696)
+
+
+version 2017.07.23
+
+Core
+* [YoutubeDL] Improve default format specification (#13704)
+* [YoutubeDL] Do not override id, extractor and extractor_key for
+  url_transparent entities
+* [extractor/common] Fix playlist_from_matches
+
+Extractors
+* [itv] Fix production id extraction (#13671, #13703)
+* [vidio] Make duration non fatal and fix typo
+* [mtv] Skip missing video parts (#13690)
+* [sportbox:embed] Fix extraction
++ [npo] Add support for npo3.nl URLs (#13695)
+* [dramafever] Remove video id from title (#13699)
++ [egghead:lesson] Add support for lessons (#6635)
+* [funnyordie] Extract more metadata (#13677)
+* [youku:show] Fix playlist extraction (#13248)
++ [dispeak] Recognize sevt subdomain (#13276)
+* [adn] Improve error reporting (#13663)
+* [crunchyroll] Relax series and season regular expression (#13659)
++ [spiegel:article] Add support for nexx iframe embeds (#13029)
++ [nexx:embed] Add support for iframe embeds
+* [nexx] Improve JS embed extraction
++ [pearvideo] Add support for pearvideo.com (#13031)
+
+
+version 2017.07.15
+
+Core
+* [YoutubeDL] Don't expand environment variables in meta fields (#13637)
+
+Extractors
+* [spiegeltv] Delegate extraction to nexx extractor (#13159)
++ [nexx] Add support for nexx.cloud (#10807, #13465)
+* [generic] Fix rutube embeds extraction (#13641)
+* [karrierevideos] Fix title extraction (#13641)
+* [youtube] Don't capture YouTube Red ad for creator meta field (#13621)
+* [slideshare] Fix extraction (#13617)
++ [5tv] Add another video URL pattern (#13354, #13606)
+* [drtv] Make HLS and HDS extraction non fatal
+* [ted] Fix subtitles extraction (#13628, #13629)
+* [vine] Make sure the title won't be empty
++ [twitter] Support HLS streams in vmap URLs
++ [periscope] Support pscp.tv URLs in embedded frames
+* [twitter] Extract mp4 urls via mobile API (#12726)
+* [niconico] Fix authentication error handling (#12486)
+* [giantbomb] Extract m3u8 formats (#13626)
++ [vlive:playlist] Add support for playlists (#13613)
+
+
+version 2017.07.09
+
+Core
++ [extractor/common] Add support for AMP tags in _parse_html5_media_entries
++ [utils] Support attributes with no values in get_elements_by_attribute
+
+Extractors
++ [dailymail] Add support for embeds
++ [joj] Add support for joj.sk (#13268)
+* [abc.net.au:iview] Extract more formats (#13492, #13489)
+* [egghead:course] Fix extraction (#6635, #13370)
++ [cjsw] Add support for cjsw.com (#13525)
++ [eagleplatform] Add support for referrer protected videos (#13557)
++ [eagleplatform] Add support for another embed pattern (#13557)
+* [veoh] Extend URL regular expression (#13601)
+* [npo:live] Fix live stream id extraction (#13568, #13605)
+* [googledrive] Fix height extraction (#13603)
++ [dailymotion] Add support for new layout (#13580)
+- [yam] Remove extractor
+* [xhamster] Extract all formats and fix duration extraction (#13593)
++ [xhamster] Add support for new URL schema (#13593)
+* [espn] Extend URL regular expression (#13244, #13549)
+* [kaltura] Fix typo in subtitles extraction (#13569)
+* [vier] Adapt extraction to redesign (#13575)
+
+
+version 2017.07.02
+
+Core
+* [extractor/common] Improve _json_ld
+
+Extractors
++ [thisoldhouse] Add more fallbacks for video id
+* [thisoldhouse] Fix video id extraction (#13540, #13541)
+* [xfileshare] Extend format regular expression (#13536)
+* [ted] Fix extraction (#13535)
++ [tastytrade] Add support for tastytrade.com (#13521)
+* [dplayit] Relax video id regular expression (#13524)
++ [generic] Extract more generic metadata (#13527)
++ [bbccouk] Capture and output error message (#13501, #13518)
+* [cbsnews] Relax video info regular expression (#13284, #13503)
++ [facebook] Add support for plugin video embeds and multiple embeds (#13493)
+* [soundcloud] Switch to https for API requests (#13502)
+* [pandatv] Switch to https for API and download URLs
++ [pandatv] Add support for https URLs (#13491)
++ [niconico] Support sp subdomain (#13494)
+
+
+version 2017.06.25
+
+Core
++ [adobepass] Add support for DIRECTV NOW (mso ATTOTT) (#13472)
+* [YoutubeDL] Skip malformed formats for better extraction robustness
+
+Extractors
++ [wsj] Add support for barrons.com (#13470)
++ [ign] Add another video id pattern (#13328)
++ [raiplay:live] Add support for live streams (#13414)
++ [redbulltv] Add support for live videos and segments (#13486)
++ [onetpl] Add support for videos embedded via pulsembed (#13482)
+* [ooyala] Make more robust
+* [ooyala] Skip empty format URLs (#13471, #13476)
+* [hgtv.com:show] Fix typo
+
+
+version 2017.06.23
+
+Core
+* [adobepass] Fix extraction on older python 2.6
+
+Extractors
+* [youtube] Adapt to new automatic captions rendition (#13467)
+* [hgtv.com:show] Relax video config regular expression (#13279, #13461)
+* [drtuber] Fix formats extraction (#12058)
+* [youporn] Fix upload date extraction
+* [youporn] Improve formats extraction
+* [youporn] Fix title extraction (#13456)
+* [googledrive] Fix formats sorting (#13443)
+* [watchindianporn] Fix extraction (#13411, #13415)
++ [vimeo] Add fallback mp4 extension for original format
++ [ruv] Add support for ruv.is (#13396)
+* [viu] Fix extraction on older python 2.6
+* [pandora.tv] Fix upload_date extraction (#12846)
++ [asiancrush] Add support for asiancrush.com (#13420)
+
+
+version 2017.06.18
+
+Core
+* [downloader/common] Use utils.shell_quote for debug command line
+* [utils] Use compat_shlex_quote in shell_quote
+* [postprocessor/execafterdownload] Encode command line (#13407)
+* [compat] Fix compat_shlex_quote on Windows (#5889, #10254)
+* [postprocessor/metadatafromtitle] Fix missing optional meta fields processing
+   in --metadata-from-title (#13408)
+* [extractor/common] Fix json dumping with --geo-bypass
++ [extractor/common] Improve jwplayer subtitles extraction
++ [extractor/common] Improve jwplayer formats extraction (#13379)
+
+Extractors
+* [polskieradio] Fix extraction (#13392)
++ [xfileshare] Add support for fastvideo.me (#13385)
+* [bilibili] Fix extraction of videos with double quotes in titles (#13387)
+* [4tube] Fix extraction (#13381, #13382)
++ [disney] Add support for disneychannel.de (#13383)
+* [npo] Improve URL regular expression (#13376)
++ [corus] Add support for showcase.ca
++ [corus] Add support for history.ca (#13359)
+
+
+version 2017.06.12
+
+Core
+* [utils] Handle compat_HTMLParseError in extract_attributes (#13349)
++ [compat] Introduce compat_HTMLParseError
+* [utils] Improve unified_timestamp
+* [extractor/generic] Ensure format id is unicode string
+* [extractor/common] Return unicode string from _match_id
++ [YoutubeDL] Sanitize more fields (#13313)
+
+Extractors
++ [xfileshare] Add support for rapidvideo.tv (#13348)
+* [xfileshare] Modernize and pass Referer
++ [rutv] Add support for testplayer.vgtrk.com (#13347)
++ [newgrounds] Extract more metadata (#13232)
++ [newgrounds:playlist] Add support for playlists (#10611)
+* [newgrounds] Improve formats and uploader extraction (#13346)
+* [msn] Fix formats extraction
+* [turbo] Ensure format id is string
+* [sexu] Ensure height is int
+* [jove] Ensure comment count is int
+* [golem] Ensure format id is string
+* [gfycat] Ensure filesize is int
+* [foxgay] Ensure height is int
+* [flickr] Ensure format id is string
+* [sohu] Fix numeric fields
+* [safari] Improve authentication detection (#13319)
+* [liveleak] Ensure height is int (#13313)
+* [streamango] Make title optional (#13292)
+* [rtlnl] Improve URL regular expression (#13295)
+* [tvplayer] Fix extraction (#13291)
+
+
+version 2017.06.05
+
+Core
+* [YoutubeDL] Don't emit ANSI escape codes on Windows (#13270)
+
+Extractors
++ [bandcamp:weekly] Add support for bandcamp weekly (#12758)
+* [pornhub:playlist] Fix extraction (#13281)
+- [godtv] Remove extractor (#13175)
+* [safari] Fix typo (#13252)
+* [youtube] Improve chapters extraction (#13247)
+* [1tv] Lower preference for HTTP formats (#13246)
+* [francetv] Relax URL regular expression
+* [drbonanza] Fix extraction (#13231)
+* [packtpub] Fix authentication (#13240)
+
+
+version 2017.05.29
+
+Extractors
+* [youtube] Fix DASH MPD extraction for videos with non-encrypted format URLs
+  (#13211)
+* [xhamster] Fix uploader and like/dislike count extraction (#13216))
++ [xhamster] Extract categories (#11728)
++ [abcnews] Add support for embed URLs (#12851)
+* [gaskrank] Fix extraction (#12493)
+* [medialaan] Fix videos with missing videoUrl (#12774)
+* [dvtv] Fix playlist support
++ [dvtv] Add support for DASH and HLS formats (#3063)
++ [beam:vod] Add support for beam.pro/mixer.com VODs (#13032))
+* [cbsinteractive] Relax URL regular expression (#13213)
+* [adn] Fix formats extraction
++ [youku] Extract more metadata (#10433)
+* [cbsnews] Fix extraction (#13205)
+
+
+version 2017.05.26
+
+Core
++ [utils] strip_jsonp() can recognize more patterns
+* [postprocessor/ffmpeg] Fix metadata filename handling on Python 2 (#13182)
+
+Extractors
++ [youtube] DASH MPDs with cipher signatures are recognized now (#11381)
++ [bbc] Add support for authentication
+* [tudou] Merge into youku extractor (#12214)
+* [youku:show] Fix extraction
+* [youku] Fix extraction (#13191)
+* [udemy] Fix extraction for outputs' format entries without URL (#13192)
+* [vimeo] Fix formats' sorting (#13189)
+* [cbsnews] Fix extraction for 60 Minutes videos (#12861)
+
+
+version 2017.05.23
+
+Core
++ [downloader/external] Pass -loglevel to ffmpeg downloader (#13183)
++ [adobepass] Add support for Bright House Networks (#13149)
+
+Extractors
++ [streamcz] Add support for subtitles (#13174)
+* [youtube] Fix DASH manifest signature decryption (#8944, #13156)
+* [toggle] Relax URL regular expression (#13172)
+* [toypics] Fix extraction (#13077)
+* [njpwworld] Fix extraction (#13162, #13169)
++ [hitbox] Add support for smashcast.tv (#13154)
+* [mitele] Update app key regular expression (#13158)
+
+
+version 2017.05.18.1
+
+Core
+* [jsinterp] Fix typo and cleanup regular expressions (#13134)
+
+
+version 2017.05.18
+
+Core
++ [jsinterp] Add support for quoted names and indexers (#13123, #13124, #13125,
+  #13126, #13128, #13129, #13130, #13131, #13132)
++ [extractor/common] Add support for schemeless URLs in _extract_wowza_formats
+  (#13088, #13092)
++ [utils] Recognize more audio codecs (#13081)
+
+Extractors
++ [vier] Extract more metadata (#12539)
+* [vier] Improve extraction (#12801)
+    + Add support for authentication
+    * Bypass authentication when no credentials provided
+    * Improve extraction robustness
+* [dailymail] Fix sources extraction (#13057)
+* [dailymotion] Extend URL regular expression (#13079)
+
+
+version 2017.05.14
+
+Core
++ [extractor/common] Respect Width and Height attributes in ISM manifests
++ [postprocessor/metadatafromtitle] Add support regular expression syntax for
+  --metadata-from-title (#13065)
+
+Extractors
++ [mediaset] Add support for video.mediaset.it (#12708, #12964)
+* [orf:radio] Fix extraction (#11643, #12926)
+* [aljazeera] Extend URL regular expression (#13053)
+* [imdb] Relax URL regular expression (#13056)
++ [francetv] Add support for mobile.france.tv (#13068)
++ [upskill] Add support for upskillcourses.com (#13043)
+* [thescene] Fix extraction (#13061)
+* [condenast] Improve embed support
+* [liveleak] Fix extraction (#12053)
++ [douyu] Support Douyu shows (#12228)
+* [myspace] Improve URL regular expression (#13040)
+* [adultswim] Use desktop platform in assets URL (#13041)
+
+
+version 2017.05.09
+
+Core
+* [YoutubeDL] Force --restrict-filenames when no locale is set on all python
+  versions (#13027)
+
+Extractors
+* [francetv] Adapt to site redesign (#13034)
++ [packtpub] Add support for authentication (#12622)
+* [drtv] Lower preference for SignLanguage formats (#13013, #13016)
++ [cspan] Add support for brightcove live embeds (#13028)
+* [vrv] Extract DASH formats and subtitles
+* [funimation] Fix authentication (#13021)
+* [adultswim] Fix extraction (#8640, #10950, #11042, #12121)
+    + Add support for Adobe Pass authentication
+    + Add support for live streams
+    + Add support for show pages
+* [turner] Extract thumbnail, is_live and strip description
++ [nonktube] Add support for nonktube.com (#8647, #13024)
++ [nuevo] Pass headers to _extract_nuevo
+* [nbc] Improve extraction (#12364)
+
+
+version 2017.05.07
+
+Common
+* [extractor/common] Fix typo in _extract_akamai_formats
++ [postprocessor/ffmpeg] Embed chapters into media file with --add-metadata
++ [extractor/common] Introduce chapters meta field
+
+Extractors
+* [youtube] Fix authentication (#12820, #12927, #12973, #12992, #12993, #12995,
+  #13003)
+* [bilibili] Fix video downloading (#13001)
+* [rmcdecouverte] Fix extraction (#12937)
+* [theplatform] Extract chapters
+* [bandcamp] Fix thumbnail extraction (#12980)
+* [pornhub] Extend URL regular expression (#12996)
++ [youtube] Extract chapters
++ [nrk] Extract chapters
++ [vice] Add support for ooyala embeds in article pages
++ [vice] Support vice articles (#12968)
+* [vice] Fix extraction for non en_us videos (#12967)
+* [gdcvault] Fix extraction for some videos (#12733)
+* [pbs] Improve multipart video support (#12981)
+* [laola1tv] Fix extraction (#12880)
++ [cda] Support birthday verification (#12789)
+* [leeco] Fix extraction (#12974)
++ [pbs] Extract chapters
+* [amp] Imporove thumbnail and subtitles extraction
+* [foxsports] Fix extraction (#12945)
+- [coub] Remove comment count extraction (#12941)
+
+
+version 2017.05.01
+
+Core
++ [extractor/common] Extract view count from JSON-LD
+* [utils] Improve unified_timestamp
++ [utils] Add video/mp2t to mimetype2ext
+* [downloader/external] Properly handle live stream downloading cancellation
+  (#8932)
++ [utils] Add support for unicode whitespace in clean_html on python 2 (#12906)
+
+Extractors
+* [infoq] Make audio format extraction non fatal (#12938)
+* [brightcove] Allow whitespace around attribute names in embedded code
++ [zaq1] Add support for zaq1.pl (#12693)
++ [xvideos] Extract duration (#12828)
+* [vevo] Fix extraction (#12879)
++ [noovo] Add support for noovo.ca (#12792)
++ [washingtonpost] Add support for embeds (#12699)
+* [yandexmusic:playlist] Fix extraction for python 3 (#12888)
+* [anvato] Improve extraction (#12913)
+    * Promote to regular shortcut based extractor
+    * Add mcp to access key mapping table
+    * Add support for embeds extraction
+    * Add support for anvato embeds in generic extractor
+* [xtube] Fix extraction for older FLV videos (#12734)
+* [tvplayer] Fix extraction (#12908)
+
+
+version 2017.04.28
+
+Core
++ [adobepass] Use geo verification headers for all requests
+- [downloader/fragment] Remove assert for resume_len when no fragments
+  downloaded
++ [extractor/common] Add manifest_url for explicit group rendition formats
+* [extractor/common] Fix manifest_url for m3u8 formats
+- [extractor/common] Don't list master m3u8 playlists in format list (#12832)
+
+Extractor
+* [aenetworks] Fix extraction for shows with single season
++ [go] Add support for Disney, DisneyJunior and DisneyXD show pages
+* [youtube] Recognize new locale-based player URLs (#12885)
++ [streamable] Add support for new embedded URL schema (#12844)
+* [arte:+7] Relax URL regular expression (#12837)
+
+
+version 2017.04.26
+
+Core
+* Introduce --keep-fragments for keeping fragments of fragmented download
+  on disk after download is finished
+* [YoutubeDL] Fix output template for missing timestamp (#12796)
+* [socks] Handle cases where credentials are required but missing
+* [extractor/common] Improve HLS extraction (#12211)
+    * Extract m3u8 parsing to separate method
+    * Improve rendition groups extraction
+    * Build stream name according stream GROUP-ID
+    * Ignore reference to AUDIO group without URI when stream has no CODECS
+    * Use float for scaled tbr in _parse_m3u8_formats
+* [utils] Add support for TTML styles in dfxp2srt
+* [downloader/hls] No need to download keys for fragments that have been
+  already downloaded
+* [downloader/fragment] Improve fragment downloading
+    * Resume immediately
+    * Don't concatenate fragments and decrypt them on every resume
+    * Optimize disk storage usage, don't store intermediate fragments on disk
+    * Store bookkeeping download state file
++ [extractor/common] Add support for multiple getters in try_get
++ [extractor/common] Add support for video of WebPage context in _json_ld
+  (#12778)
++ [extractor/common] Relax JWPlayer regular expression and remove
+  duplicate URLs (#12768)
+
+Extractors
+* [iqiyi] Fix extraction of Yule videos
+* [vidio] Improve extraction and sort formats
++ [brightcove] Match only video elements with data-video-id attribute
+* [iqiyi] Fix playlist detection (#12504)
+- [azubu] Remove extractor (#12813)
+* [porn91] Fix extraction (#12814)
+* [vidzi] Fix extraction (#12793)
++ [amp] Extract error message (#12795)
++ [xfileshare] Add support for gorillavid.com and daclips.com (#12776)
+* [instagram] Fix extraction (#12777)
++ [generic] Support Brightcove videos in <iframe> (#12482)
++ [brightcove] Support URLs with bcpid instead of playerID (#12482)
+* [brightcove] Fix _extract_url (#12782)
++ [odnoklassniki] Extract HLS formats
+
+
+version 2017.04.17
+
+Extractors
+* [limelight] Improve extraction LimelightEmbeddedPlayerFlash media embeds and
+  add support for channel and channelList embeds
+* [generic] Extract multiple Limelight embeds (#12761)
++ [itv] Extract series metadata
+* [itv] Fix RTMP formats downloading (#12759)
+* [itv] Use native HLS downloader by default
++ [go90] Extract subtitles (#12752)
++ [go90] Extract series metadata (#12752)
+
+
+version 2017.04.16
+
+Core
+* [YoutubeDL] Apply expand_path after output template substitution
++ [YoutubeDL] Propagate overridden meta fields to extraction results of type
+  url (#11163)
+
+Extractors
++ [generic] Extract RSS entries as url_transparent (#11163)
++ [streamango] Add support for streamango.com (#12643)
++ [wsj:article] Add support for articles (#12558)
+* [brightcove] Relax video tag embeds extraction and validate ambiguous embeds'
+  URLs (#9163, #12005, #12178, #12480)
++ [udemy] Add support for react rendition (#12744)
+
+
+version 2017.04.15
+
+Extractors
+* [youku] Fix fileid extraction (#12741, #12743)
+
+
+version 2017.04.14
+
+Core
++ [downloader/hls] Add basic support for EXT-X-BYTERANGE tag (#10955)
++ [adobepass] Improve Comcast and Verizon login code (#10803)
++ [adobepass] Add support for Verizon (#10803)
+
+Extractors
++ [aenetworks] Add support for specials (#12723)
++ [hbo] Extract HLS formats
++ [go90] Add support for go90.com (#10127)
++ [tv2hu] Add support for tv2.hu (#10509)
++ [generic] Exclude URLs with xml ext from valid video URLs (#10768, #11654)
+* [youtube] Improve HLS formats extraction
+* [afreecatv] Fix extraction for videos with different key layout (#12718)
+- [youtube] Remove explicit preference for audio-only and video-only formats in
+  order not to break sorting when new formats appear
+* [canalplus] Bypass geo restriction
+
+
+version 2017.04.11
+
+Extractors
+* [afreecatv] Fix extraction (#12706)
++ [generic] Add support for <object> YouTube embeds (#12637)
+* [bbccouk] Treat bitrate as audio+video bitrate in media selector
++ [bbccouk] Skip unrecognized formats in media selector (#12701)
++ [bbccouk] Add support for https protocol in media selector (#12701)
+* [curiositystream] Fix extraction (#12638)
+* [adn] Update subtitle decryption key
+* [chaturbate] Fix extraction (#12665, #12688, #12690)
+
+
+version 2017.04.09
+
+Extractors
++ [medici] Add support for medici.tv (#3406)
++ [rbmaradio] Add support for redbullradio.com URLs (#12687)
++ [npo:live] Add support for default URL (#12555)
+* [mixcloud:playlist] Fix title, description and view count extraction (#12582)
++ [thesun] Add suport for thesun.co.uk (#11298, #12674)
++ [ceskateleveize:porady] Add support for porady (#7411, #12645)
+* [ceskateleveize] Improve extraction and remove URL replacement hacks
++ [kaltura] Add support for iframe embeds (#12679)
+* [airmozilla] Fix extraction (#12670)
+* [wshh] Extract html5 entries and delegate to generic extractor (12676)
++ [raiplay] Extract subtitles
++ [xfileshare] Add support for vidlo.us (#12660)
++ [xfileshare] Add support for vidbom.com (#12661)
++ [aenetworks] Add more video URL regular expressions (#12657)
++ [odnoklassniki] Fix format sorting for 1080p quality
++ [rtl2] Add support for you.rtl2.de (#10257)
++ [vshare] Add support for vshare.io (#12278)
+
+
+version 2017.04.03
+
+Core
++ [extractor/common] Add censorship check for TransTelekom ISP
+* [extractor/common] Move censorship checks to a separate method
+
+Extractors
++ [discoveryvr] Add support for discoveryvr.com (#12578)
++ [tv5mondeplus] Add support for tv5mondeplus.com (#11386)
++ [periscope] Add support for pscp.tv URLs (#12618, #12625)
+
+
+version 2017.04.02
+
+Core
+* [YoutubeDL] Return early when extraction of url_transparent fails
+
+Extractors
+* [rai] Fix and improve extraction (#11790)
++ [vrv] Add support for series pages
+* [limelight] Improve extraction for audio only formats
+* [funimation] Fix extraction (#10696, #11773)
++ [xfileshare] Add support for vidabc.com (#12589)
++ [xfileshare] Improve extraction and extract hls formats
++ [crunchyroll] Pass geo verifcation proxy
++ [cwtv] Extract ISM formats
++ [tvplay] Bypass geo restriction
++ [vrv] Add support for vrv.co
++ [packtpub] Add support for packtpub.com (#12610)
++ [generic] Pass base_url to _parse_jwplayer_data
++ [adn] Add support for animedigitalnetwork.fr (#4866)
++ [allocine] Extract more metadata
+* [allocine] Fix extraction (#12592)
+* [openload] Fix extraction
+
+
+version 2017.03.26
+
+Core
+* Don't raise an error if JWPlayer config data is not a Javascript object
+  literal. _find_jwplayer_data now returns a dict rather than an str. (#12307)
+* Expand environment variables for options representing paths (#12556)
++ [utils] Introduce expand_path
+* [downloader/hls] Delegate downloading to ffmpeg immediately for live streams
+
+Extractors
+* [afreecatv] Fix extraction (#12179)
++ [atvat] Add support for atv.at (#5325)
++ [fox] Add metadata extraction (#12391)
++ [atresplayer] Extract DASH formats
++ [atresplayer] Extract HD manifest (#12548)
+* [atresplayer] Fix login error detection (#12548)
+* [franceculture] Fix extraction (#12547)
+* [youtube] Improve URL regular expression (#12538)
+* [generic] Do not follow redirects to the same URL
+
+
+version 2017.03.24
+
+Extractors
+- [9c9media] Remove mp4 URL extraction request
++ [bellmedia] Add support for etalk.ca and space.ca (#12447)
+* [channel9] Fix extraction (#11323)
+* [cloudy] Fix extraction (#12525)
++ [hbo] Add support for free episode URLs and new formats extraction (#12519)
+* [condenast] Fix extraction and style (#12526)
+* [viu] Relax URL regular expression (#12529)
+
+
+version 2017.03.22
+
+Extractors
+- [pluralsight] Omit module title from video title (#12506)
+* [pornhub] Decode obfuscated video URL (#12470, #12515)
+* [senateisvp] Allow https URL scheme for embeds (#12512)
+
+
+version 2017.03.20
+
+Core
++ [YoutubeDL] Allow multiple input URLs to be used with stdout (-) as
+  output template
++ [adobepass] Detect and output error on authz token extraction (#12472)
+
+Extractors
++ [bostonglobe] Add extractor for bostonglobe.com (#12099)
++ [toongoggles] Add support for toongoggles.com (#12171)
++ [medialaan] Add support for Medialaan sites (#9974, #11912)
++ [discoverynetworks] Add support for more domains and bypass geo restiction
+* [openload] Fix extraction (#10408)
+
+
+version 2017.03.16
+
+Core
++ [postprocessor/ffmpeg] Add support for flac
++ [extractor/common] Extract SMIL formats from jwplayer
+
+Extractors
++ [generic] Add forgotten return for jwplayer formats
+* [redbulltv] Improve extraction
+
+
+version 2017.03.15
+
+Core
+* Fix missing subtitles if --add-metadata is used (#12423)
+
+Extractors
+* [facebook] Make title optional (#12443)
++ [mitele] Add support for ooyala videos (#12430)
+* [openload] Fix extraction (#12435, #12446)
+* [streamable] Update API URL (#12433)
++ [crunchyroll] Extract season name (#12428)
+* [discoverygo] Bypass geo restriction
++ [discoverygo:playlist] Add support for playlists (#12424)
+
+
+version 2017.03.10
+
+Extractors
+* [generic] Make title optional for jwplayer embeds (#12410)
+* [wdr:maus] Fix extraction (#12373)
+* [prosiebensat1] Improve title extraction (#12318, #12327)
+* [dplayit] Separate and rewrite extractor and bypass geo restriction (#12393)
+* [miomio] Fix extraction (#12291, #12388, #12402)
+* [telequebec] Fix description extraction (#12399)
+* [openload] Fix extraction (#12357)
+* [brightcove:legacy] Relax videoPlayer validation check (#12381)
+
+
+version 2017.03.07
+
+Core
+* Metadata are now added after conversion (#5594)
+
+Extractors
+* [soundcloud] Update client id (#12376)
+* [openload] Fix extraction (#10408, #12357)
+
+
+version 2017.03.06
+
+Core
++ [utils] Process bytestrings in urljoin (#12369)
+* [extractor/common] Improve height extraction and extract bitrate
+* [extractor/common] Move jwplayer formats extraction in separate method
++ [external:ffmpeg] Limit test download size to 10KiB (#12362)
+
+Extractors
++ [drtv] Add geo countries to GeoRestrictedError
++ [drtv:live] Bypass geo restriction
++ [tunepk] Add extractor (#12197, #12243)
+
+
+version 2017.03.05
+
+Extractors
++ [twitch] Add basic support for two-factor authentication (#11974)
++ [vier] Add support for vijf.be (#12304)
++ [redbulltv] Add support for redbull.tv (#3919, #11948)
+* [douyutv] Switch to the PC API to escape the 5-min limitation (#12316)
++ [generic] Add support for rutube embeds
++ [rutube] Relax URL regular expression
++ [vrak] Add support for vrak.tv (#11452)
++ [brightcove:new] Add ability to smuggle geo_countries into URL
++ [brightcove:new] Raise GeoRestrictedError
+* [go] Relax URL regular expression (#12341)
+* [24video] Use original host for requests (#12339)
+* [ruutu] Disable DASH formats (#12322)
+
+
+version 2017.03.02
+
+Core
++ [adobepass] Add support for Charter Spectrum (#11465)
+* [YoutubeDL] Don't sanitize identifiers in output template (#12317)
+
+Extractors
+* [facebook] Fix extraction (#12323, #12330)
+* [youtube] Mark errors about rental videos as expected (#12324)
++ [npo] Add support for audio
+* [npo] Adapt to app.php API (#12311, #12320)
+
+
+version 2017.02.28
+
+Core
++ [utils] Add bytes_to_long and long_to_bytes
++ [utils] Add pkcs1pad
++ [aes] Add aes_cbc_encrypt
+
+Extractors
++ [azmedien:showplaylist] Add support for show playlists (#12160)
++ [youtube:playlist] Recognize another playlist pattern (#11928, #12286)
++ [daisuki] Add support for daisuki.net (#2486, #3186, #4738, #6175, #7776,
+  #10060)
+* [douyu] Fix extraction (#12301)
+
+
+version 2017.02.27
+
+Core
+* [downloader/common] Limit displaying 2 digits after decimal point in sleep
+  interval message (#12183)
++ [extractor/common] Add preference to _parse_html5_media_entries
+
+Extractors
++ [npo] Add support for zapp.nl
++ [npo] Add support for hetklokhuis.nl (#12293)
+- [scivee] Remove extractor (#9315)
++ [cda] Decode download URL (#12255)
++ [crunchyroll] Improve uploader extraction (#12267)
++ [youtube] Raise GeoRestrictedError
++ [dailymotion] Raise GeoRestrictedError
++ [mdr] Recognize more URL patterns (#12169)
++ [tvigle] Raise GeoRestrictedError
+* [vevo] Fix extraction for videos with the new streams/streamsV3 format
+  (#11719)
++ [freshlive] Add support for freshlive.tv (#12175)
++ [xhamster] Capture and output videoClosed error (#12263)
++ [etonline] Add support for etonline.com (#12236)
++ [njpwworld] Add support for njpwworld.com (#11561)
+* [amcnetworks] Relax URL regular expression (#12127)
+
+
+version 2017.02.24.1
+
+Extractors
+* [noco] Modernize
+* [noco] Switch login URL to https (#12246)
++ [thescene] Extract more metadata
+* [thescene] Fix extraction (#12235)
++ [tubitv] Use geo bypass mechanism
+* [openload] Fix extraction (#10408)
++ [ivi] Raise GeoRestrictedError
+
+
+version 2017.02.24
+
+Core
+* [options] Hide deprecated options from --help
+* [options] Deprecate --autonumber-size
++ [YoutubeDL] Add support for string formatting operations in output template
+  (#5185, #5748, #6841, #9929, #9966 #9978, #12189)
+
+Extractors
++ [lynda:course] Add webpage extraction fallback (#12238)
+* [go] Sign all uplynk URLs and use geo bypass only for free videos
+  (#12087, #12210)
++ [skylinewebcams] Add support for skylinewebcams.com (#12221)
++ [instagram] Add support for multi video posts (#12226)
++ [crunchyroll] Extract playlist entries ids
+* [mgtv] Fix extraction
++ [sohu] Raise GeoRestrictedError
++ [leeco] Raise GeoRestrictedError and use geo bypass mechanism
+
+
+version 2017.02.22
+
+Extractors
+* [crunchyroll] Fix descriptions with double quotes (#12124)
+* [dailymotion] Make comment count optional (#12209)
++ [vidzi] Add support for vidzi.cc (#12213)
++ [24video] Add support for 24video.tube (#12217)
++ [crackle] Use geo bypass mechanism
++ [viewster] Use geo verification headers
++ [tfo] Improve geo restriction detection and use geo bypass mechanism
++ [telequebec] Use geo bypass mechanism
++ [limelight] Extract PlaylistService errors and improve geo restriction
+  detection
+
+
+version 2017.02.21
+
+Core
+* [extractor/common] Allow calling _initialize_geo_bypass from extractors
+  (#11970)
++ [adobepass] Add support for Time Warner Cable (#12191)
++ [travis] Run tests in parallel
++ [downloader/ism] Honor HTTP headers when downloading fragments
++ [downloader/dash] Honor HTTP headers when downloading fragments
++ [utils] Add GeoUtils class for working with geo tools and GeoUtils.random_ipv4
++ Add option --geo-bypass-country for explicit geo bypass on behalf of
+  specified country
++ Add options to control geo bypass mechanism --geo-bypass and --no-geo-bypass
++ Add experimental geo restriction bypass mechanism based on faking
+  X-Forwarded-For HTTP header
++ [utils] Introduce GeoRestrictedError for geo restricted videos
++ [utils] Introduce YoutubeDLError base class for all youtube-dl exceptions
+
+Extractors
++ [ninecninemedia] Use geo bypass mechanism
+* [spankbang] Make uploader optional (#12193)
++ [iprima] Improve geo restriction detection and disable geo bypass
+* [iprima] Modernize
+* [commonmistakes] Disable UnicodeBOM extractor test for python 3.2
++ [prosiebensat1] Throw ExtractionError on unsupported page type (#12180)
+* [nrk] Update _API_HOST and relax _VALID_URL
++ [tv4] Bypass geo restriction and improve detection
+* [tv4] Switch to hls3 protocol (#12177)
++ [viki] Improve geo restriction detection
++ [vgtv] Improve geo restriction detection
++ [srgssr] Improve geo restriction detection
++ [vbox7] Improve geo restriction detection and use geo bypass mechanism
++ [svt] Improve geo restriction detection and use geo bypass mechanism
++ [pbs] Improve geo restriction detection and use geo bypass mechanism
++ [ondemandkorea] Improve geo restriction detection and use geo bypass mechanism
++ [nrk] Improve geo restriction detection and use geo bypass mechanism
++ [itv] Improve geo restriction detection and use geo bypass mechanism
++ [go] Improve geo restriction detection and use geo bypass mechanism
++ [dramafever] Improve geo restriction detection and use geo bypass mechanism
+* [brightcove:legacy] Restrict videoPlayer value (#12040)
++ [tvn24] Add support for tvn24.pl and tvn24bis.pl (#11679)
++ [thisav] Add support for HTML5 media (#11771)
+* [metacafe] Bypass family filter (#10371)
+* [viceland] Improve info extraction
+
+
+version 2017.02.17
+
+Extractors
+* [heise] Improve extraction (#9725)
+* [ellentv] Improve (#11653)
+* [openload] Fix extraction (#10408, #12002)
++ [theplatform] Recognize URLs with whitespaces (#12044)
+* [einthusan] Relax URL regular expression (#12141, #12159)
++ [generic] Support complex JWPlayer embedded videos (#12030)
+* [elpais] Improve extraction (#12139)
+
+
+version 2017.02.16
+
+Core
++ [utils] Add support for quoted string literals in --match-filter (#8050,
+  #12142, #12144)
+
+Extractors
+* [ceskatelevize] Lower priority for audio description sources (#12119)
+* [amcnetworks] Fix extraction (#12127)
+* [pinkbike] Fix uploader extraction (#12054)
++ [onetpl] Add support for businessinsider.com.pl and plejada.pl
++ [onetpl] Add support for onet.pl (#10507)
++ [onetmvp] Add shortcut extractor
++ [vodpl] Add support for vod.pl (#12122)
++ [pornhub] Extract video URL from tv platform site (#12007, #12129)
++ [ceskatelevize] Extract DASH formats (#12119, #12133)
+
+
+version 2017.02.14
+
+Core
+* TypeError is fixed with Python 2.7.13 on Windows (#11540, #12085)
+
+Extractor
+* [zdf] Fix extraction (#12117)
+* [xtube] Fix extraction for both kinds of video id (#12088)
+* [xtube] Improve title extraction (#12088)
++ [lemonde] Fallback delegate extraction to generic extractor (#12115, #12116)
+* [bellmedia] Allow video id longer than 6 characters (#12114)
++ [limelight] Add support for referer protected videos
+* [disney] Improve extraction (#4975, #11000, #11882, #11936)
+* [hotstar] Improve extraction (#12096)
+* [einthusan] Fix extraction (#11416)
++ [aenetworks] Add support for lifetimemovieclub.com (#12097)
+* [youtube] Fix parsing codecs (#12091)
+
+
+version 2017.02.11
+
+Core
++ [utils] Introduce get_elements_by_class and get_elements_by_attribute
+  utility functions
++ [extractor/common] Skip m3u8 manifests protected with Adobe Flash Access
+
+Extractor
+* [pluralsight:course] Fix extraction (#12075)
++ [bbc] Extract m3u8 formats with 320k audio
+* [facebook] Relax video id matching (#11017, #12055, #12056)
++ [corus] Add support for Corus Entertainment sites (#12060, #9164)
++ [pluralsight] Detect blocked account error message (#12070)
++ [bloomberg] Add another video id pattern (#12062)
+* [extractor/commonmistakes] Restrict URL regular expression (#12050)
++ [tvplayer] Add support for tvplayer.com
+
+
+version 2017.02.10
+
+Extractors
+* [xtube] Fix extraction (#12023)
+* [pornhub] Fix extraction (#12007, #12018)
+* [facebook] Improve JS data regular expression (#12042)
+* [kaltura] Improve embed partner id extraction (#12041)
++ [sprout] Add support for sproutonline.com
+* [6play] Improve extraction
++ [scrippsnetworks:watch] Add support for Scripps Networks sites (#10765)
++ [go] Add support for Adobe Pass authentication (#11468, #10831)
+* [6play] Fix extraction (#12011)
++ [nbc] Add support for Adobe Pass authentication (#12006)
+
+
+version 2017.02.07
+
+Core
+* [extractor/common] Fix audio only with audio group in m3u8 (#11995)
++ [downloader/fragment] Respect --no-part
+* [extractor/common] Speed-up HTML5 media entries extraction (#11979)
+
+Extractors
+* [pornhub] Fix extraction (#11997)
++ [canalplus] Add support for cstar.fr (#11990)
++ [extractor/generic] Improve RTMP support (#11993)
++ [gaskrank] Add support for gaskrank.tv (#11685)
+* [bandcamp] Fix extraction for incomplete albums (#11727)
+* [iwara] Fix extraction (#11781)
+* [googledrive] Fix extraction on Python 3.6
++ [videopress] Add support for videopress.com
++ [afreecatv] Extract RTMP formats
+
+
+version 2017.02.04.1
+
+Extractors
++ [twitch:stream] Add support for player.twitch.tv (#11971)
+* [radiocanada] Fix extraction for toutv rtmp formats
+
+
+version 2017.02.04
+
+Core
++ Add --playlist-random to shuffle playlists (#11889, #11901)
+* [utils] Improve comments processing in js_to_json (#11947)
+* [utils] Handle single-line comments in js_to_json
+* [downloader/external:ffmpeg] Minimize the use of aac_adtstoasc filter
+
+Extractors
++ [piksel] Add another app token pattern (#11969)
++ [vk] Capture and output author blocked error message (#11965)
++ [turner] Fix secure HLS formats downloading with ffmpeg (#11358, #11373,
+  #11800)
++ [drtv] Add support for live and radio sections (#1827, #3427)
+* [myspace] Fix extraction and extract HLS and HTTP formats
++ [youtube] Add format info for itag 325 and 328
+* [vine] Fix extraction (#11955)
+- [sportbox] Remove extractor (#11954)
++ [filmon] Add support for filmon.com (#11187)
++ [infoq] Add audio only formats (#11565)
+* [douyutv] Improve room id regular expression (#11931)
+* [iprima] Fix extraction (#11920, #11896)
+* [youtube] Fix ytsearch when cookies are provided (#11924)
+* [go] Relax video id regular expression (#11937)
+* [facebook] Fix title extraction (#11941)
++ [youtube:playlist] Recognize TL playlists (#11945)
++ [bilibili] Support new Bangumi URLs (#11845)
++ [cbc:watch] Extract audio codec for audio only formats (#11893)
++ [elpais] Fix extraction for some URLs (#11765)
+
+
+version 2017.02.01
+
+Extractors
++ [facebook] Add another fallback extraction scenario (#11926)
+* [prosiebensat1] Fix extraction of descriptions (#11810, #11929)
+- [crunchyroll] Remove ScaledBorderAndShadow settings (#9028)
++ [vimeo] Extract upload timestamp
++ [vimeo] Extract license (#8726, #11880)
++ [nrk:series] Add support for series (#11571, #11711)
+
+
+version 2017.01.31
+
+Core
++ [compat] Add compat_etree_register_namespace
+
+Extractors
+* [youtube] Fix extraction for domainless player URLs (#11890, #11891, #11892,
+  #11894, #11895, #11897, #11900, #11903, #11904, #11906, #11907, #11909,
+  #11913, #11914, #11915, #11916, #11917, #11918, #11919)
++ [vimeo] Extract both mixed and separated DASH formats
++ [ruutu] Extract DASH formats
+* [itv] Fix extraction for python 2.6
+
+
+version 2017.01.29
+
+Core
+* [extractor/common] Fix initialization template (#11605, #11825)
++ [extractor/common] Document fragment_base_url and fragment's path fields
+* [extractor/common] Fix duration per DASH segment (#11868)
++ Introduce --autonumber-start option for initial value of %(autonumber)s
+  template (#727, #2702, #9362, #10457, #10529, #11862)
+
+Extractors
++ [azmedien:playlist] Add support for topic and themen playlists (#11817)
+* [npo] Fix subtitles extraction
++ [itv] Extract subtitles
++ [itv] Add support for itv.com (#9240)
++ [mtv81] Add support for mtv81.com (#7619)
++ [vlive] Add support for channels (#11826)
++ [kaltura] Add fallback for fileExt
++ [kaltura] Improve uploader_id extraction
++ [konserthusetplay] Add support for rspoplay.se (#11828)
+
+
+version 2017.01.28
+
+Core
+* [utils] Improve parse_duration
+
+Extractors
+* [crunchyroll] Improve series and season metadata extraction (#11832)
+* [soundcloud] Improve formats extraction and extract audio bitrate
++ [soundcloud] Extract HLS formats
+* [soundcloud] Fix track URL extraction (#11852)
++ [twitch:vod] Expand URL regular expressions (#11846)
+* [aenetworks] Fix season episodes extraction (#11669)
++ [tva] Add support for videos.tva.ca (#11842)
+* [jamendo] Improve and extract more metadata (#11836)
++ [disney] Add support for Disney sites (#7409, #11801, #4975, #11000)
+* [vevo] Remove request to old API and catch API v2 errors
++ [cmt,mtv,southpark] Add support for episode URLs (#11837)
++ [youtube] Add fallback for duration extraction (#11841)
+
+
+version 2017.01.25
+
+Extractors
++ [openload] Fallback video extension to mp4
++ [extractor/generic] Add support for Openload embeds (#11536, #11812)
+* [srgssr] Fix rts video extraction (#11831)
++ [afreecatv:global] Add support for afreeca.tv (#11807)
++ [crackle] Extract vtt subtitles
++ [crackle] Extract multiple resolutions for thumbnails
++ [crackle] Add support for mobile URLs
++ [konserthusetplay] Extract subtitles (#11823)
++ [konserthusetplay] Add support for HLS videos (#11823)
+* [vimeo:review] Fix config URL extraction (#11821)
+
+
+version 2017.01.24
+
+Extractors
+* [pluralsight] Fix extraction (#11820)
++ [nextmedia] Add support for NextTV (壹電視)
+* [24video] Fix extraction (#11811)
+* [youtube:playlist] Fix nonexistent and private playlist detection (#11604)
++ [chirbit] Extract uploader (#11809)
+
+
+version 2017.01.22
+
+Extractors
++ [pornflip] Add support for pornflip.com (#11556, #11795)
+* [chaturbate] Fix extraction (#11797, #11802)
++ [azmedien] Add support for AZ Medien sites (#11784, #11785)
++ [nextmedia] Support redirected URLs
++ [vimeo:channel] Extract videos' titles for playlist entries (#11796)
++ [youtube] Extract episode metadata (#9695, #11774)
++ [cspan] Support Ustream embedded videos (#11547)
++ [1tv] Add support for HLS videos (#11786)
+* [uol] Fix extraction (#11770)
+* [mtv] Relax triforce feed regular expression (#11766)
+
+
+version 2017.01.18
+
+Extractors
+* [bilibili] Fix extraction (#11077)
++ [canalplus] Add fallback for video id (#11764)
+* [20min] Fix extraction (#11683, #11751)
+* [imdb] Extend URL regular expression (#11744)
++ [naver] Add support for tv.naver.com links (#11743)
+
+
+version 2017.01.16
+
+Core
+* [options] Apply custom config to final composite configuration (#11741)
+* [YoutubeDL] Improve protocol auto determining (#11720)
+
+Extractors
+* [xiami] Relax URL regular expressions
+* [xiami] Improve track metadata extraction (#11699)
++ [limelight] Check hand-make direct HTTP links
++ [limelight] Add support for direct HTTP links at video.llnw.net (#11737)
++ [brightcove] Recognize another player ID pattern (#11688)
++ [niconico] Support login via cookies (#7968)
+* [yourupload] Fix extraction (#11601)
++ [beam:live] Add support for beam.pro live streams (#10702, #11596)
+* [vevo] Improve geo restriction detection
++ [dramafever] Add support for URLs with language code (#11714)
+* [cbc] Improve playlist support (#11704)
+
+
+version 2017.01.14
+
+Core
++ [common] Add ability to customize akamai manifest host
++ [utils] Add more date formats
+
+Extractors
+- [mtv] Eliminate _transform_rtmp_url
+* [mtv] Generalize triforce mgid extraction
++ [cmt] Add support for full episodes and video clips (#11623)
++ [mitele] Extract DASH formats
++ [ooyala] Add support for videos with embedToken (#11684)
+* [mixcloud] Fix extraction (#11674)
+* [openload] Fix extraction (#10408)
+* [tv4] Improve extraction (#11698)
+* [freesound] Fix and improve extraction (#11602)
++ [nick] Add support for beta.nick.com (#11655)
+* [mtv,cc] Use HLS by default with native HLS downloader (#11641)
+* [mtv] Fix non-HLS extraction
+
+
 version 2017.01.10
 
 Extractors

+ 4 - 2
MANIFEST.in

@@ -1,7 +1,9 @@
 include README.md
-include test/*.py
-include test/*.json
+include LICENSE
+include AUTHORS
+include ChangeLog
 include youtube-dl.bash-completion
 include youtube-dl.fish
 include youtube-dl.1
 recursive-include docs Makefile conf.py *.rst
+recursive-include test *

+ 24 - 9
Makefile

@@ -1,7 +1,7 @@
 all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
 
 clean:
-	rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.swf *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
+	rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.ytdl *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.swf *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
 	find . -name "*.pyc" -delete
 	find . -name "*.class" -delete
 
@@ -36,8 +36,17 @@ test:
 
 ot: offlinetest
 
+# Keep this list in sync with devscripts/run_tests.sh
 offlinetest: codetest
-	$(PYTHON) -m nose --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py --exclude test_socks.py
+	$(PYTHON) -m nose --verbose test \
+		--exclude test_age_restriction.py \
+		--exclude test_download.py \
+		--exclude test_iqiyi_sdk_interpreter.py \
+		--exclude test_socks.py \
+		--exclude test_subtitles.py \
+		--exclude test_write_annotations.py \
+		--exclude test_youtube_lists.py \
+		--exclude test_youtube_signature.py
 
 tar: youtube-dl.tar.gz
 
@@ -46,8 +55,15 @@ tar: youtube-dl.tar.gz
 pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1 youtube-dl.fish
 
 youtube-dl: youtube_dl/*.py youtube_dl/*/*.py
-	zip --quiet youtube-dl youtube_dl/*.py youtube_dl/*/*.py
-	zip --quiet --junk-paths youtube-dl youtube_dl/__main__.py
+	mkdir -p zip
+	for d in youtube_dl youtube_dl/downloader youtube_dl/extractor youtube_dl/postprocessor ; do \
+	  mkdir -p zip/$$d ;\
+	  cp -pPR $$d/*.py zip/$$d/ ;\
+	done
+	touch -t 200001010101 zip/youtube_dl/*.py zip/youtube_dl/*/*.py
+	mv zip/youtube_dl/__main__.py zip/
+	cd zip ; zip -q ../youtube-dl youtube_dl/*.py youtube_dl/*/*.py __main__.py
+	rm -rf zip
 	echo '#!$(PYTHON)' > youtube-dl
 	cat youtube-dl.zip >> youtube-dl
 	rm youtube-dl.zip
@@ -94,20 +110,19 @@ _EXTRACTOR_FILES = $(shell find youtube_dl/extractor -iname '*.py' -and -not -in
 youtube_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES)
 	$(PYTHON) devscripts/make_lazy_extractors.py $@
 
-youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish ChangeLog
+youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish ChangeLog AUTHORS
 	@tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \
 		--exclude '*.DS_Store' \
 		--exclude '*.kate-swp' \
 		--exclude '*.pyc' \
 		--exclude '*.pyo' \
 		--exclude '*~' \
-		--exclude '__pycache' \
+		--exclude '__pycache__' \
 		--exclude '.git' \
-		--exclude 'testdata' \
 		--exclude 'docs/_build' \
 		-- \
 		bin devscripts test youtube_dl docs \
-		ChangeLog LICENSE README.md README.txt \
+		ChangeLog AUTHORS LICENSE README.md README.txt \
 		Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion \
-		youtube-dl.zsh youtube-dl.fish setup.py \
+		youtube-dl.zsh youtube-dl.fish setup.py setup.cfg \
 		youtube-dl

+ 166 - 142
README.md

@@ -1,3 +1,5 @@
+[![Build Status](https://travis-ci.org/rg3/youtube-dl.svg?branch=master)](https://travis-ci.org/rg3/youtube-dl)
+
 youtube-dl - download videos from youtube.com or other video platforms
 
 - [INSTALLATION](#installation)
@@ -25,7 +27,7 @@ If you do not have curl, you can alternatively use a recent wget:
     sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl
     sudo chmod a+rx /usr/local/bin/youtube-dl
 
-Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in any location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\System32` (e.g. **do not** put in `C:\Windows\System32`).
+Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in any location on their [PATH](https://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\System32` (e.g. **do not** put in `C:\Windows\System32`).
 
 You can also use pip:
 
@@ -33,7 +35,7 @@ You can also use pip:
     
 This command will update youtube-dl if you have already installed it. See the [pypi page](https://pypi.python.org/pypi/youtube_dl) for more information.
 
-OS X users can install youtube-dl with [Homebrew](http://brew.sh/):
+OS X users can install youtube-dl with [Homebrew](https://brew.sh/):
 
     brew install youtube-dl
 
@@ -88,8 +90,6 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
     --mark-watched                   Mark videos watched (YouTube only)
     --no-mark-watched                Do not mark videos watched (YouTube only)
     --no-color                       Do not emit color codes in output
-    --abort-on-unavailable-fragment  Abort downloading when some fragment is not
-                                     available
 
 ## Network Options:
     --proxy URL                      Use the specified HTTP/HTTPS/SOCKS proxy.
@@ -99,16 +99,23 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
                                      string (--proxy "") for direct connection
     --socket-timeout SECONDS         Time to wait before giving up, in seconds
     --source-address IP              Client-side IP address to bind to
-                                     (experimental)
     -4, --force-ipv4                 Make all connections via IPv4
-                                     (experimental)
     -6, --force-ipv6                 Make all connections via IPv6
-                                     (experimental)
+
+## Geo Restriction:
     --geo-verification-proxy URL     Use this proxy to verify the IP address for
                                      some geo-restricted sites. The default
                                      proxy specified by --proxy (or none, if the
                                      options is not present) is used for the
-                                     actual downloading. (experimental)
+                                     actual downloading.
+    --geo-bypass                     Bypass geographic restriction via faking
+                                     X-Forwarded-For HTTP header (experimental)
+    --no-geo-bypass                  Do not bypass geographic restriction via
+                                     faking X-Forwarded-For HTTP header
+                                     (experimental)
+    --geo-bypass-country CODE        Force bypass geographic restriction with
+                                     explicitly provided two-letter ISO 3166-2
+                                     country code (experimental)
 
 ## Video Selection:
     --playlist-start NUMBER          Playlist video to start at (default is 1)
@@ -139,17 +146,19 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
                                      COUNT views
     --max-views COUNT                Do not download any videos with more than
                                      COUNT views
-    --match-filter FILTER            Generic video filter (experimental).
-                                     Specify any key (see help for -o for a list
-                                     of available keys) to match if the key is
+    --match-filter FILTER            Generic video filter. Specify any key (see
+                                     the "OUTPUT TEMPLATE" for a list of
+                                     available keys) to match if the key is
                                      present, !key to check if the key is not
-                                     present,key > NUMBER (like "comment_count >
-                                     12", also works with >=, <, <=, !=, =) to
-                                     compare against a number, and & to require
-                                     multiple matches. Values which are not
-                                     known are excluded unless you put a
-                                     question mark (?) after the operator.For
-                                     example, to only match videos that have
+                                     present, key > NUMBER (like "comment_count
+                                     > 12", also works with >=, <, <=, !=, =) to
+                                     compare against a number, key = 'LITERAL'
+                                     (like "uploader = 'Mike Smith'", also works
+                                     with !=) to match against a string literal
+                                     and & to require multiple matches. Values
+                                     which are not known are excluded unless you
+                                     put a question mark (?) after the operator.
+                                     For example, to only match videos that have
                                      been liked more than 100 times and disliked
                                      less than 50 times (or the dislike
                                      functionality is not available at the given
@@ -174,10 +183,15 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
     -R, --retries RETRIES            Number of retries (default is 10), or
                                      "infinite".
     --fragment-retries RETRIES       Number of retries for a fragment (default
-                                     is 10), or "infinite" (DASH and hlsnative
-                                     only)
-    --skip-unavailable-fragments     Skip unavailable fragments (DASH and
-                                     hlsnative only)
+                                     is 10), or "infinite" (DASH, hlsnative and
+                                     ISM)
+    --skip-unavailable-fragments     Skip unavailable fragments (DASH, hlsnative
+                                     and ISM)
+    --abort-on-unavailable-fragment  Abort downloading when some fragment is not
+                                     available
+    --keep-fragments                 Keep downloaded fragments on disk after
+                                     downloading is finished; fragments are
+                                     erased by default
     --buffer-size SIZE               Size of download buffer (e.g. 1024 or 16K)
                                      (default is 1024)
     --no-resize-buffer               Do not automatically adjust the buffer
@@ -185,6 +199,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
                                      automatically resized from an initial value
                                      of SIZE.
     --playlist-reverse               Download playlist videos in reverse order
+    --playlist-random                Download playlist videos in random order
     --xattr-set-filesize             Set file xattribute ytdl.filesize with
                                      expected file size (experimental)
     --hls-prefer-native              Use the native HLS downloader instead of
@@ -207,19 +222,11 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
     --id                             Use only video ID in file name
     -o, --output TEMPLATE            Output filename template, see the "OUTPUT
                                      TEMPLATE" for all the info
-    --autonumber-size NUMBER         Specify the number of digits in
-                                     %(autonumber)s when it is present in output
-                                     filename template or --auto-number option
-                                     is given
+    --autonumber-start NUMBER        Specify the start value for %(autonumber)s
+                                     (default is 1)
     --restrict-filenames             Restrict filenames to only ASCII
                                      characters, and avoid "&" and spaces in
                                      filenames
-    -A, --auto-number                [deprecated; use -o
-                                     "%(autonumber)s-%(title)s.%(ext)s" ] Number
-                                     downloaded files starting from 00000
-    -t, --title                      [deprecated] Use title in file name
-                                     (default)
-    -l, --literal                    [deprecated] Alias of --title
     -w, --no-overwrites              Do not overwrite files
     -c, --continue                   Force resume of partially downloaded files.
                                      By default, youtube-dl will resume
@@ -272,8 +279,8 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
     --get-filename                   Simulate, quiet but print output filename
     --get-format                     Simulate, quiet but print output format
     -j, --dump-json                  Simulate, quiet but print JSON information.
-                                     See --output for a description of available
-                                     keys.
+                                     See the "OUTPUT TEMPLATE" for a description
+                                     of available keys.
     -J, --dump-single-json           Simulate, quiet but print JSON information
                                      for each command-line argument. If the URL
                                      refers to a playlist, dump the whole
@@ -373,8 +380,9 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
                                      (requires ffmpeg or avconv and ffprobe or
                                      avprobe)
     --audio-format FORMAT            Specify audio format: "best", "aac",
-                                     "vorbis", "mp3", "m4a", "opus", or "wav";
-                                     "best" by default
+                                     "flac", "mp3", "m4a", "opus", "vorbis", or
+                                     "wav"; "best" by default; No effect without
+                                     -x
     --audio-quality QUALITY          Specify ffmpeg/avconv audio quality, insert
                                      a value between 0 (better) and 9 (worse)
                                      for VBR or a specific bitrate like 128K
@@ -394,12 +402,14 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
     --add-metadata                   Write metadata to the video file
     --metadata-from-title FORMAT     Parse additional metadata like song title /
                                      artist from the video title. The format
-                                     syntax is the same as --output, the parsed
-                                     parameters replace existing values.
-                                     Additional templates: %(album)s,
-                                     %(artist)s. Example: --metadata-from-title
-                                     "%(artist)s - %(title)s" matches a title
-                                     like "Coldplay - Paradise"
+                                     syntax is the same as --output. Regular
+                                     expression with named capture groups may
+                                     also be used. The parsed parameters replace
+                                     existing values. Example: --metadata-from-
+                                     title "%(artist)s - %(title)s" matches a
+                                     title like "Coldplay - Paradise". Example
+                                     (regex): --metadata-from-title
+                                     "(?P<artist>.+?) - (?P<title>.+)"
     --xattrs                         Write metadata to the video file's xattrs
                                      (using dublin core and xdg standards)
     --fixup POLICY                   Automatically correct known faults of the
@@ -419,7 +429,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
                                      syntax. Example: --exec 'adb push {}
                                      /sdcard/Music/ && rm {}'
     --convert-subs FORMAT            Convert the subtitles to other format
-                                     (currently supported: srt|ass|vtt)
+                                     (currently supported: srt|ass|vtt|lrc)
 
 # CONFIGURATION
 
@@ -450,7 +460,7 @@ You can also use `--config-location` if you want to use custom configuration fil
 
 ### Authentication with `.netrc` file
 
-You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on a per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by only you:
+You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](https://stackoverflow.com/tags/.netrc/info) on a per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by only you:
 ```
 touch $HOME/.netrc
 chmod a-rwx,u+rw $HOME/.netrc
@@ -466,7 +476,10 @@ machine twitch login my_twitch_account_name password my_twitch_password
 ```
 To activate authentication with the `.netrc` file you should pass `--netrc` to youtube-dl or place it in the [configuration file](#configuration).
 
-On Windows you may also need to setup the `%HOME%` environment variable manually.
+On Windows you may also need to setup the `%HOME%` environment variable manually. For example:
+```
+set HOME=%USERPROFILE%
+```
 
 # OUTPUT TEMPLATE
 
@@ -474,87 +487,96 @@ The `-o` option allows users to indicate a template for the output file names.
 
 **tl;dr:** [navigate me to examples](#output-template-examples).
 
-The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format `%(NAME)s`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a lowercase S. Allowed names are:
-
- - `id`: Video identifier
- - `title`: Video title
- - `url`: Video URL
- - `ext`: Video filename extension
- - `alt_title`: A secondary title of the video
- - `display_id`: An alternative identifier for the video
- - `uploader`: Full name of the video uploader
- - `license`: License name the video is licensed under
- - `creator`: The creator of the video
- - `release_date`: The date (YYYYMMDD) when the video was released
- - `timestamp`: UNIX timestamp of the moment the video became available
- - `upload_date`: Video upload date (YYYYMMDD)
- - `uploader_id`: Nickname or id of the video uploader
- - `location`: Physical location where the video was filmed
- - `duration`: Length of the video in seconds
- - `view_count`: How many users have watched the video on the platform
- - `like_count`: Number of positive ratings of the video
- - `dislike_count`: Number of negative ratings of the video
- - `repost_count`: Number of reposts of the video
- - `average_rating`: Average rating give by users, the scale used depends on the webpage
- - `comment_count`: Number of comments on the video
- - `age_limit`: Age restriction for the video (years)
- - `format`: A human-readable description of the format 
- - `format_id`: Format code specified by `--format`
- - `format_note`: Additional info about the format
- - `width`: Width of the video
- - `height`: Height of the video
- - `resolution`: Textual description of width and height
- - `tbr`: Average bitrate of audio and video in KBit/s
- - `abr`: Average audio bitrate in KBit/s
- - `acodec`: Name of the audio codec in use
- - `asr`: Audio sampling rate in Hertz
- - `vbr`: Average video bitrate in KBit/s
- - `fps`: Frame rate
- - `vcodec`: Name of the video codec in use
- - `container`: Name of the container format
- - `filesize`: The number of bytes, if known in advance
- - `filesize_approx`: An estimate for the number of bytes
- - `protocol`: The protocol that will be used for the actual download
- - `extractor`: Name of the extractor
- - `extractor_key`: Key name of the extractor
- - `epoch`: Unix epoch when creating the file
- - `autonumber`: Five-digit number that will be increased with each download, starting at zero
- - `playlist`: Name or id of the playlist that contains the video
- - `playlist_index`: Index of the video in the playlist padded with leading zeros according to the total length of the playlist
- - `playlist_id`: Playlist identifier
- - `playlist_title`: Playlist title
-
+The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "https://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a formatting operations. Allowed names along with sequence type are:
+
+ - `id` (string): Video identifier
+ - `title` (string): Video title
+ - `url` (string): Video URL
+ - `ext` (string): Video filename extension
+ - `alt_title` (string): A secondary title of the video
+ - `display_id` (string): An alternative identifier for the video
+ - `uploader` (string): Full name of the video uploader
+ - `license` (string): License name the video is licensed under
+ - `creator` (string): The creator of the video
+ - `release_date` (string): The date (YYYYMMDD) when the video was released
+ - `timestamp` (numeric): UNIX timestamp of the moment the video became available
+ - `upload_date` (string): Video upload date (YYYYMMDD)
+ - `uploader_id` (string): Nickname or id of the video uploader
+ - `location` (string): Physical location where the video was filmed
+ - `duration` (numeric): Length of the video in seconds
+ - `view_count` (numeric): How many users have watched the video on the platform
+ - `like_count` (numeric): Number of positive ratings of the video
+ - `dislike_count` (numeric): Number of negative ratings of the video
+ - `repost_count` (numeric): Number of reposts of the video
+ - `average_rating` (numeric): Average rating give by users, the scale used depends on the webpage
+ - `comment_count` (numeric): Number of comments on the video
+ - `age_limit` (numeric): Age restriction for the video (years)
+ - `is_live` (boolean): Whether this video is a live stream or a fixed-length video
+ - `start_time` (numeric): Time in seconds where the reproduction should start, as specified in the URL
+ - `end_time` (numeric): Time in seconds where the reproduction should end, as specified in the URL
+ - `format` (string): A human-readable description of the format 
+ - `format_id` (string): Format code specified by `--format`
+ - `format_note` (string): Additional info about the format
+ - `width` (numeric): Width of the video
+ - `height` (numeric): Height of the video
+ - `resolution` (string): Textual description of width and height
+ - `tbr` (numeric): Average bitrate of audio and video in KBit/s
+ - `abr` (numeric): Average audio bitrate in KBit/s
+ - `acodec` (string): Name of the audio codec in use
+ - `asr` (numeric): Audio sampling rate in Hertz
+ - `vbr` (numeric): Average video bitrate in KBit/s
+ - `fps` (numeric): Frame rate
+ - `vcodec` (string): Name of the video codec in use
+ - `container` (string): Name of the container format
+ - `filesize` (numeric): The number of bytes, if known in advance
+ - `filesize_approx` (numeric): An estimate for the number of bytes
+ - `protocol` (string): The protocol that will be used for the actual download
+ - `extractor` (string): Name of the extractor
+ - `extractor_key` (string): Key name of the extractor
+ - `epoch` (numeric): Unix epoch when creating the file
+ - `autonumber` (numeric): Five-digit number that will be increased with each download, starting at zero
+ - `playlist` (string): Name or id of the playlist that contains the video
+ - `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according to the total length of the playlist
+ - `playlist_id` (string): Playlist identifier
+ - `playlist_title` (string): Playlist title
+ - `playlist_uploader` (string): Full name of the playlist uploader
+ - `playlist_uploader_id` (string): Nickname or id of the playlist uploader
 
 Available for the video that belongs to some logical chapter or section:
- - `chapter`: Name or title of the chapter the video belongs to
- - `chapter_number`: Number of the chapter the video belongs to
- - `chapter_id`: Id of the chapter the video belongs to
+
+ - `chapter` (string): Name or title of the chapter the video belongs to
+ - `chapter_number` (numeric): Number of the chapter the video belongs to
+ - `chapter_id` (string): Id of the chapter the video belongs to
 
 Available for the video that is an episode of some series or programme:
- - `series`: Title of the series or programme the video episode belongs to
- - `season`: Title of the season the video episode belongs to
- - `season_number`: Number of the season the video episode belongs to
- - `season_id`: Id of the season the video episode belongs to
- - `episode`: Title of the video episode
- - `episode_number`: Number of the video episode within a season
- - `episode_id`: Id of the video episode
+
+ - `series` (string): Title of the series or programme the video episode belongs to
+ - `season` (string): Title of the season the video episode belongs to
+ - `season_number` (numeric): Number of the season the video episode belongs to
+ - `season_id` (string): Id of the season the video episode belongs to
+ - `episode` (string): Title of the video episode
+ - `episode_number` (numeric): Number of the video episode within a season
+ - `episode_id` (string): Id of the video episode
 
 Available for the media that is a track or a part of a music album:
- - `track`: Title of the track
- - `track_number`: Number of the track within an album or a disc
- - `track_id`: Id of the track
- - `artist`: Artist(s) of the track
- - `genre`: Genre(s) of the track
- - `album`: Title of the album the track belongs to
- - `album_type`: Type of the album
- - `album_artist`: List of all artists appeared on the album
- - `disc_number`: Number of the disc or other physical medium the track belongs to
- - `release_year`: Year (YYYY) when the album was released
+
+ - `track` (string): Title of the track
+ - `track_number` (numeric): Number of the track within an album or a disc
+ - `track_id` (string): Id of the track
+ - `artist` (string): Artist(s) of the track
+ - `genre` (string): Genre(s) of the track
+ - `album` (string): Title of the album the track belongs to
+ - `album_type` (string): Type of the album
+ - `album_artist` (string): List of all artists appeared on the album
+ - `disc_number` (numeric): Number of the disc or other physical medium the track belongs to
+ - `release_year` (numeric): Year (YYYY) when the album was released
 
 Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with `NA`.
 
 For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `youtube-dl test video` and id `BaW_jenozKcj`, this will result in a `youtube-dl test video-BaW_jenozKcj.mp4` file created in the current directory.
 
+For numeric sequences you can use numeric related formatting, for example, `%(view_count)05d` will result in a string with view count padded with zeros up to 5 characters, like in `00042`.
+
 Output templates can also contain arbitrary hierarchical path, e.g. `-o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s'` which will result in downloading each video in a directory corresponding to this path template. Any missing directory will be automatically created for you.
 
 To use percent literals in an output template use `%%`. To output to stdout use `-o -`.
@@ -569,7 +591,7 @@ If you are using an output template inside a Windows batch file then you must es
 
 #### Output template examples
 
-Note on Windows you may need to use double quotes instead of single.
+Note that on Windows you may need to use double quotes instead of single.
 
 ```bash
 $ youtube-dl --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc
@@ -588,7 +610,7 @@ $ youtube-dl -o '%(uploader)s/%(playlist)s/%(playlist_index)s - %(title)s.%(ext)
 $ youtube-dl -u user -p password -o '~/MyVideos/%(playlist)s/%(chapter_number)s - %(chapter)s/%(title)s.%(ext)s' https://www.udemy.com/java-tutorial/
 
 # Download entire series season keeping each series and each season in separate directory under C:/MyVideos
-$ youtube-dl -o "C:/MyVideos/%(series)s/%(season_number)s - %(season)s/%(episode_number)s - %(episode)s.%(ext)s" http://videomore.ru/kino_v_detalayah/5_sezon/367617
+$ youtube-dl -o "C:/MyVideos/%(series)s/%(season_number)s - %(season)s/%(episode_number)s - %(episode)s.%(ext)s" https://videomore.ru/kino_v_detalayah/5_sezon/367617
 
 # Stream the video being downloaded to stdout
 $ youtube-dl -o - BaW_jenozKc
@@ -639,7 +661,7 @@ Also filtering work for comparisons `=` (equals), `!=` (not equals), `^=` (begin
  - `acodec`: Name of the audio codec in use
  - `vcodec`: Name of the video codec in use
  - `container`: Name of the container format
- - `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `m3u8`, or `m3u8_native`)
+ - `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `http_dash_segments`, `m3u8`, or `m3u8_native`)
  - `format_id`: A short description of the format
 
 Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster.
@@ -656,7 +678,7 @@ If you want to preserve the old format selection behavior (prior to youtube-dl 2
 
 #### Format selection examples
 
-Note on Windows you may need to use double quotes instead of single.
+Note that on Windows you may need to use double quotes instead of single.
 
 ```bash
 # Download best mp4 format available or any other best if no mp4 available
@@ -701,17 +723,17 @@ $ youtube-dl --dateafter 20000101 --datebefore 20091231
 
 ### How do I update youtube-dl?
 
-If you've followed [our manual installation instructions](http://rg3.github.io/youtube-dl/download.html), you can simply run `youtube-dl -U` (or, on Linux, `sudo youtube-dl -U`).
+If you've followed [our manual installation instructions](https://rg3.github.io/youtube-dl/download.html), you can simply run `youtube-dl -U` (or, on Linux, `sudo youtube-dl -U`).
 
 If you have used pip, a simple `sudo pip install -U youtube-dl` is sufficient to update.
 
-If you have installed youtube-dl using a package manager like *apt-get* or *yum*, use the standard system update mechanism to update. Note that distribution packages are often outdated. As a rule of thumb, youtube-dl releases at least once a month, and often weekly or even daily. Simply go to http://yt-dl.org/ to find out the current version. Unfortunately, there is nothing we youtube-dl developers can do if your distribution serves a really outdated version. You can (and should) complain to your distribution in their bugtracker or support forum.
+If you have installed youtube-dl using a package manager like *apt-get* or *yum*, use the standard system update mechanism to update. Note that distribution packages are often outdated. As a rule of thumb, youtube-dl releases at least once a month, and often weekly or even daily. Simply go to https://yt-dl.org to find out the current version. Unfortunately, there is nothing we youtube-dl developers can do if your distribution serves a really outdated version. You can (and should) complain to your distribution in their bugtracker or support forum.
 
 As a last resort, you can also uninstall the version installed by your package manager and follow our manual installation instructions. For that, remove the distribution's package, with a line like
 
     sudo apt-get remove -y youtube-dl
 
-Afterwards, simply follow [our manual installation instructions](http://rg3.github.io/youtube-dl/download.html):
+Afterwards, simply follow [our manual installation instructions](https://rg3.github.io/youtube-dl/download.html):
 
 ```
 sudo wget https://yt-dl.org/latest/youtube-dl -O /usr/local/bin/youtube-dl
@@ -751,11 +773,11 @@ Apparently YouTube requires you to pass a CAPTCHA test if you download too much.
 
 youtube-dl works fine on its own on most sites. However, if you want to convert video/audio, you'll need [avconv](https://libav.org/) or [ffmpeg](https://www.ffmpeg.org/). On some sites - most notably YouTube - videos can be retrieved in a higher quality format without sound. youtube-dl will detect whether avconv/ffmpeg is present and automatically pick the best option.
 
-Videos or video formats streamed via RTMP protocol can only be downloaded when [rtmpdump](https://rtmpdump.mplayerhq.hu/) is installed. Downloading MMS and RTSP videos requires either [mplayer](http://mplayerhq.hu/) or [mpv](https://mpv.io/) to be installed.
+Videos or video formats streamed via RTMP protocol can only be downloaded when [rtmpdump](https://rtmpdump.mplayerhq.hu/) is installed. Downloading MMS and RTSP videos requires either [mplayer](https://mplayerhq.hu/) or [mpv](https://mpv.io/) to be installed.
 
 ### I have downloaded a video but how can I play it?
 
-Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](http://www.videolan.org/) or [mplayer](http://www.mplayerhq.hu/).
+Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](https://www.videolan.org/) or [mplayer](https://www.mplayerhq.hu/).
 
 ### I extracted a video URL with `-g`, but it does not play on another machine / in my web browser.
 
@@ -830,10 +852,10 @@ Use the `-o` to specify an [output template](#output-template), for example `-o
 
 ### How do I download a video starting with a `-`?
 
-Either prepend `http://www.youtube.com/watch?v=` or separate the ID from the options with `--`:
+Either prepend `https://www.youtube.com/watch?v=` or separate the ID from the options with `--`:
 
     youtube-dl -- -wNyEUrxzFU
-    youtube-dl "http://www.youtube.com/watch?v=-wNyEUrxzFU"
+    youtube-dl "https://www.youtube.com/watch?v=-wNyEUrxzFU"
 
 ### How do I pass cookies to youtube-dl?
 
@@ -841,15 +863,15 @@ Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`.
 
 In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [Export Cookies](https://addons.mozilla.org/en-US/firefox/addon/export-cookies/) (for Firefox).
 
-Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows, `LF` (`\n`) for Linux and `CR` (`\r`) for Mac OS. `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
+Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, Mac OS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
 
 Passing cookies to youtube-dl is a good way to workaround login when a particular extractor does not implement it explicitly. Another use case is working around [CAPTCHA](https://en.wikipedia.org/wiki/CAPTCHA) some websites require you to solve in particular cases in order to get access (e.g. YouTube, CloudFlare).
 
 ### How do I stream directly to media player?
 
-You will first need to tell youtube-dl to stream media to stdout with `-o -`, and also tell your media player to read from stdin (it must be capable of this for streaming) and then pipe former to latter. For example, streaming to [vlc](http://www.videolan.org/) can be achieved with:
+You will first need to tell youtube-dl to stream media to stdout with `-o -`, and also tell your media player to read from stdin (it must be capable of this for streaming) and then pipe former to latter. For example, streaming to [vlc](https://www.videolan.org/) can be achieved with:
 
-    youtube-dl -o - "http://www.youtube.com/watch?v=BaW_jenozKcj" | vlc -
+    youtube-dl -o - "https://www.youtube.com/watch?v=BaW_jenozKcj" | vlc -
 
 ### How do I download only new videos from a playlist?
 
@@ -869,7 +891,7 @@ When youtube-dl detects an HLS video, it can download it either with the built-i
 
 When youtube-dl knows that one particular downloader works better for a given website, that downloader will be picked. Otherwise, youtube-dl will pick the best downloader for general compatibility, which at the moment happens to be ffmpeg. This choice may change in future versions of youtube-dl, with improvements of the built-in downloader and/or ffmpeg.
 
-In particular, the generic extractor (used when your website is not in the [list of supported sites by youtube-dl](http://rg3.github.io/youtube-dl/supportedsites.html) cannot mandate one specific downloader.
+In particular, the generic extractor (used when your website is not in the [list of supported sites by youtube-dl](https://rg3.github.io/youtube-dl/supportedsites.html) cannot mandate one specific downloader.
 
 If you put either `--hls-prefer-native` or `--hls-prefer-ffmpeg` into your configuration, a different subset of videos will fail to download correctly. Instead, it is much better to [file an issue](https://yt-dl.org/bug) or a pull request which details why the native or the ffmpeg HLS downloader is a better choice for your use case.
 
@@ -895,7 +917,7 @@ Feel free to bump the issue from time to time by writing a small comment ("Issue
 
 ### How can I detect whether a given URL is supported by youtube-dl?
 
-For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/video/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
+For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from https://example.com/video/1234567 to https://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
 
 It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor.
 
@@ -909,7 +931,7 @@ youtube-dl is an open-source project manned by too few volunteers, so we'd rathe
 
 # DEVELOPER INSTRUCTIONS
 
-Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
+Most users do not need to build youtube-dl and can [download the builds](https://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
 
 To run youtube-dl as a developer, you don't need to build anything either. Simply execute
 
@@ -921,6 +943,8 @@ To run the test, simply invoke your favorite test runner, or execute a test file
     python test/test_download.py
     nosetests
 
+See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases.
+
 If you want to create a build of youtube-dl yourself, you'll need
 
 * python
@@ -957,7 +981,7 @@ After you have ensured this site is distributing its content legally, you can fo
     class YourExtractorIE(InfoExtractor):
         _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
         _TEST = {
-            'url': 'http://yourextractor.com/watch/42',
+            'url': 'https://yourextractor.com/watch/42',
             'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
             'info_dict': {
                 'id': '42',
@@ -988,10 +1012,10 @@ After you have ensured this site is distributing its content legally, you can fo
             }
     ```
 5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
-6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
+6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
 7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
-8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
-9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
+8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
+9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
 
         $ git add youtube_dl/extractor/extractors.py
         $ git add youtube_dl/extractor/yourextractor.py
@@ -1147,10 +1171,10 @@ import youtube_dl
 
 ydl_opts = {}
 with youtube_dl.YoutubeDL(ydl_opts) as ydl:
-    ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
+    ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc'])
 ```
 
-Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L129-L279). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
+Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/3e4cedf9e8cd3157df2457df7274d0c842421945/youtube_dl/YoutubeDL.py#L137-L312). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
 
 Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:
 
@@ -1186,19 +1210,19 @@ ydl_opts = {
     'progress_hooks': [my_hook],
 }
 with youtube_dl.YoutubeDL(ydl_opts) as ydl:
-    ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
+    ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc'])
 ```
 
 # BUGS
 
-Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues>. Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](http://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
+Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues>. Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](https://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
 
 **Please include the full output of youtube-dl when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this:
 ```
 $ youtube-dl -v <your command line>
 [debug] System config: []
 [debug] User config: []
-[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
+[debug] Command-line args: [u'-v', u'https://www.youtube.com/watch?v=BaW_jenozKcj']
 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
 [debug] youtube-dl version 2015.12.06
 [debug] Git HEAD: 135392e
@@ -1229,7 +1253,7 @@ For bug reports, this means that your report should contain the *complete* outpu
 
 If your server has multiple IPs or you suspect censorship, adding `--call-home` may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/).
 
-**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `http://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `http://www.youtube.com/`) is *not* an example URL.
+**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `https://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `https://www.youtube.com/`) is *not* an example URL.
 
 ###  Are you using the latest version?
 

+ 2 - 2
devscripts/check-porn.py

@@ -14,7 +14,7 @@ import os
 import sys
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-from test.helper import get_testcases
+from test.helper import gettestcases
 from youtube_dl.utils import compat_urllib_parse_urlparse
 from youtube_dl.utils import compat_urllib_request
 
@@ -24,7 +24,7 @@ if len(sys.argv) > 1:
 else:
     METHOD = 'EURISTIC'
 
-for test in get_testcases():
+for test in gettestcases():
     if METHOD == 'EURISTIC':
         try:
             webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read()

+ 5 - 0
devscripts/install_jython.sh

@@ -0,0 +1,5 @@
+#!/bin/bash
+
+wget http://central.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar
+java -jar jython-installer-2.7.1.jar -s -d "$HOME/jython"
+$HOME/jython/bin/jython -m pip install nose

+ 2 - 1
devscripts/make_lazy_extractors.py

@@ -1,6 +1,7 @@
 from __future__ import unicode_literals, print_function
 
 from inspect import getsource
+import io
 import os
 from os.path import dirname as dirn
 import sys
@@ -95,5 +96,5 @@ module_contents.append(
 
 module_src = '\n'.join(module_contents) + '\n'
 
-with open(lazy_extractors_filename, 'wt') as f:
+with io.open(lazy_extractors_filename, 'wt', encoding='utf-8') as f:
     f.write(module_src)

+ 1 - 1
devscripts/prepare_manpage.py

@@ -8,7 +8,7 @@ import re
 ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 README_FILE = os.path.join(ROOT_DIR, 'README.md')
 
-PREFIX = '''%YOUTUBE-DL(1)
+PREFIX = r'''%YOUTUBE-DL(1)
 
 # NAME
 

+ 22 - 0
devscripts/run_tests.sh

@@ -0,0 +1,22 @@
+#!/bin/bash
+
+# Keep this list in sync with the `offlinetest` target in Makefile
+DOWNLOAD_TESTS="age_restriction|download|iqiyi_sdk_interpreter|socks|subtitles|write_annotations|youtube_lists|youtube_signature"
+
+test_set=""
+multiprocess_args=""
+
+case "$YTDL_TEST_SET" in
+    core)
+        test_set="-I test_($DOWNLOAD_TESTS)\.py"
+    ;;
+    download)
+        test_set="-I test_(?!$DOWNLOAD_TESTS).+\.py"
+        multiprocess_args="--processes=4 --process-timeout=540"
+    ;;
+    *)
+        break
+    ;;
+esac
+
+nosetests test --verbose $test_set $multiprocess_args

Filskillnaden har hållts tillbaka eftersom den är för stor
+ 88 - 28
docs/supportedsites.md


+ 4 - 2
setup.py

@@ -107,8 +107,9 @@ setup(
     url='https://github.com/rg3/youtube-dl',
     author='Ricardo Garcia',
     author_email='ytdl@yt-dl.org',
-    maintainer='Philipp Hagemeister',
-    maintainer_email='phihag@phihag.de',
+    maintainer='Sergey M.',
+    maintainer_email='dstftw@gmail.com',
+    license='Unlicense',
     packages=[
         'youtube_dl',
         'youtube_dl.extractor', 'youtube_dl.downloader',
@@ -130,6 +131,7 @@ setup(
         'Programming Language :: Python :: 3.3',
         'Programming Language :: Python :: 3.4',
         'Programming Language :: Python :: 3.5',
+        'Programming Language :: Python :: 3.6',
     ],
 
     cmdclass={'build_lazy_extractors': build_lazy_extractors},

+ 611 - 1
test/test_InfoExtractor.py

@@ -3,12 +3,14 @@
 from __future__ import unicode_literals
 
 # Allow direct execution
+import io
 import os
 import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-from test.helper import FakeYDL
+from test.helper import FakeYDL, expect_dict, expect_value
+from youtube_dl.compat import compat_etree_fromstring
 from youtube_dl.extractor.common import InfoExtractor
 from youtube_dl.extractor import YoutubeIE, get_info_extractor
 from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
@@ -84,6 +86,614 @@ class TestInfoExtractor(unittest.TestCase):
         self.assertRaises(ExtractorError, self.ie._download_json, uri, None)
         self.assertEqual(self.ie._download_json(uri, None, fatal=False), None)
 
+    def test_extract_jwplayer_data_realworld(self):
+        # from http://www.suffolk.edu/sjc/
+        expect_dict(
+            self,
+            self.ie._extract_jwplayer_data(r'''
+                <script type='text/javascript'>
+                    jwplayer('my-video').setup({
+                        file: 'rtmp://192.138.214.154/live/sjclive',
+                        fallback: 'true',
+                        width: '95%',
+                      aspectratio: '16:9',
+                      primary: 'flash',
+                      mediaid:'XEgvuql4'
+                    });
+                </script>
+                ''', None, require_title=False),
+            {
+                'id': 'XEgvuql4',
+                'formats': [{
+                    'url': 'rtmp://192.138.214.154/live/sjclive',
+                    'ext': 'flv'
+                }]
+            })
+
+        # from https://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary/
+        expect_dict(
+            self,
+            self.ie._extract_jwplayer_data(r'''
+<script type="text/javascript">
+    jwplayer("mediaplayer").setup({
+        'videoid': "7564",
+        'width': "100%",
+        'aspectratio': "16:9",
+        'stretching': "exactfit",
+        'autostart': 'false',
+        'flashplayer': "https://t04.vipstreamservice.com/jwplayer/v5.10/player.swf",
+        'file': "https://cdn.pornoxo.com/key=MF+oEbaxqTKb50P-w9G3nA,end=1489689259,ip=104.199.146.27/ip=104.199.146.27/speed=6573765/buffer=3.0/2009-12/4b2157147afe5efa93ce1978e0265289c193874e02597.flv",
+        'image': "https://t03.vipstreamservice.com/thumbs/pxo-full/2009-12/14/a4b2157147afe5efa93ce1978e0265289c193874e02597.flv-full-13.jpg",
+        'filefallback': "https://cdn.pornoxo.com/key=9ZPsTR5EvPLQrBaak2MUGA,end=1489689259,ip=104.199.146.27/ip=104.199.146.27/speed=6573765/buffer=3.0/2009-12/m_4b2157147afe5efa93ce1978e0265289c193874e02597.mp4",
+        'logo.hide': true,
+        'skin': "https://t04.vipstreamservice.com/jwplayer/skin/modieus-blk.zip",
+        'plugins': "https://t04.vipstreamservice.com/jwplayer/dock/dockableskinnableplugin.swf",
+        'dockableskinnableplugin.piclink': "/index.php?key=ajax-videothumbsn&vid=7564&data=2009-12--14--4b2157147afe5efa93ce1978e0265289c193874e02597.flv--17370",
+        'controlbar': 'bottom',
+        'modes': [
+            {type: 'flash', src: 'https://t04.vipstreamservice.com/jwplayer/v5.10/player.swf'}
+        ],
+        'provider': 'http'
+    });
+    //noinspection JSAnnotator
+    invideo.setup({
+        adsUrl: "/banner-iframe/?zoneId=32",
+        adsUrl2: "",
+        autostart: false
+    });
+</script>
+            ''', 'dummy', require_title=False),
+            {
+                'thumbnail': 'https://t03.vipstreamservice.com/thumbs/pxo-full/2009-12/14/a4b2157147afe5efa93ce1978e0265289c193874e02597.flv-full-13.jpg',
+                'formats': [{
+                    'url': 'https://cdn.pornoxo.com/key=MF+oEbaxqTKb50P-w9G3nA,end=1489689259,ip=104.199.146.27/ip=104.199.146.27/speed=6573765/buffer=3.0/2009-12/4b2157147afe5efa93ce1978e0265289c193874e02597.flv',
+                    'ext': 'flv'
+                }]
+            })
+
+        # from http://www.indiedb.com/games/king-machine/videos
+        expect_dict(
+            self,
+            self.ie._extract_jwplayer_data(r'''
+<script>
+jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/\/www.indiedb.com\/","displaytitle":false,"autostart":false,"repeat":false,"title":"king machine trailer 1","sharing":{"link":"http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1","code":"<iframe width=\"560\" height=\"315\" src=\"http:\/\/www.indiedb.com\/media\/iframe\/1522983\" frameborder=\"0\" allowfullscreen><\/iframe><br><a href=\"http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1\">king machine trailer 1 - Indie DB<\/a>"},"related":{"file":"http:\/\/rss.indiedb.com\/media\/recommended\/1522983\/feed\/rss.xml","dimensions":"160x120","onclick":"link"},"sources":[{"file":"http:\/\/cdn.dbolical.com\/cache\/videos\/games\/1\/50\/49678\/encode_mp4\/king-machine-trailer.mp4","label":"360p SD","default":"true"},{"file":"http:\/\/cdn.dbolical.com\/cache\/videos\/games\/1\/50\/49678\/encode720p_mp4\/king-machine-trailer.mp4","label":"720p HD"}],"image":"http:\/\/media.indiedb.com\/cache\/images\/games\/1\/50\/49678\/thumb_620x2000\/king-machine-trailer.mp4.jpg","advertising":{"client":"vast","tag":"http:\/\/ads.intergi.com\/adrawdata\/3.0\/5205\/4251742\/0\/1013\/ADTECH;cors=yes;width=560;height=315;referring_url=http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1;content_url=http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1;media_id=1522983;title=king+machine+trailer+1;device=__DEVICE__;model=__MODEL__;os=Windows+OS;osversion=__OSVERSION__;ua=__UA__;ip=109.171.17.81;uniqueid=1522983;tags=__TAGS__;number=58cac25928151;time=1489683033"},"width":620,"height":349}).once("play", function(event) {
+            videoAnalytics("play");
+}).once("complete", function(event) {
+    videoAnalytics("completed");
+});
+</script>
+                ''', 'dummy'),
+            {
+                'title': 'king machine trailer 1',
+                'thumbnail': 'http://media.indiedb.com/cache/images/games/1/50/49678/thumb_620x2000/king-machine-trailer.mp4.jpg',
+                'formats': [{
+                    'url': 'http://cdn.dbolical.com/cache/videos/games/1/50/49678/encode_mp4/king-machine-trailer.mp4',
+                    'height': 360,
+                    'ext': 'mp4'
+                }, {
+                    'url': 'http://cdn.dbolical.com/cache/videos/games/1/50/49678/encode720p_mp4/king-machine-trailer.mp4',
+                    'height': 720,
+                    'ext': 'mp4'
+                }]
+            })
+
+    def test_parse_m3u8_formats(self):
+        _TEST_CASES = [
+            (
+                # https://github.com/rg3/youtube-dl/issues/11507
+                # http://pluzz.francetv.fr/videos/le_ministere.html
+                'pluzz_francetv_11507',
+                'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
+                [{
+                    'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_0_av.m3u8?null=0',
+                    'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
+                    'ext': 'mp4',
+                    'format_id': '180',
+                    'protocol': 'm3u8',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc1.66.30',
+                    'tbr': 180,
+                    'width': 256,
+                    'height': 144,
+                }, {
+                    'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_1_av.m3u8?null=0',
+                    'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
+                    'ext': 'mp4',
+                    'format_id': '303',
+                    'protocol': 'm3u8',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc1.66.30',
+                    'tbr': 303,
+                    'width': 320,
+                    'height': 180,
+                }, {
+                    'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_2_av.m3u8?null=0',
+                    'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
+                    'ext': 'mp4',
+                    'format_id': '575',
+                    'protocol': 'm3u8',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc1.66.30',
+                    'tbr': 575,
+                    'width': 512,
+                    'height': 288,
+                }, {
+                    'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_3_av.m3u8?null=0',
+                    'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
+                    'ext': 'mp4',
+                    'format_id': '831',
+                    'protocol': 'm3u8',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc1.77.30',
+                    'tbr': 831,
+                    'width': 704,
+                    'height': 396,
+                }, {
+                    'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_4_av.m3u8?null=0',
+                    'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
+                    'ext': 'mp4',
+                    'protocol': 'm3u8',
+                    'format_id': '1467',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc1.77.30',
+                    'tbr': 1467,
+                    'width': 1024,
+                    'height': 576,
+                }]
+            ),
+            (
+                # https://github.com/rg3/youtube-dl/issues/11995
+                # http://teamcoco.com/video/clueless-gamer-super-bowl-for-honor
+                'teamcoco_11995',
+                'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
+                [{
+                    'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-audio-160k_v4.m3u8',
+                    'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
+                    'ext': 'mp4',
+                    'format_id': 'audio-0-Default',
+                    'protocol': 'm3u8',
+                    'vcodec': 'none',
+                }, {
+                    'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8',
+                    'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
+                    'ext': 'mp4',
+                    'format_id': 'audio-1-Default',
+                    'protocol': 'm3u8',
+                    'vcodec': 'none',
+                }, {
+                    'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8',
+                    'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
+                    'ext': 'mp4',
+                    'format_id': '71',
+                    'protocol': 'm3u8',
+                    'acodec': 'mp4a.40.5',
+                    'vcodec': 'none',
+                    'tbr': 71,
+                }, {
+                    'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-400k_v4.m3u8',
+                    'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
+                    'ext': 'mp4',
+                    'format_id': '413',
+                    'protocol': 'm3u8',
+                    'acodec': 'none',
+                    'vcodec': 'avc1.42001e',
+                    'tbr': 413,
+                    'width': 400,
+                    'height': 224,
+                }, {
+                    'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-400k_v4.m3u8',
+                    'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
+                    'ext': 'mp4',
+                    'format_id': '522',
+                    'protocol': 'm3u8',
+                    'acodec': 'none',
+                    'vcodec': 'avc1.42001e',
+                    'tbr': 522,
+                    'width': 400,
+                    'height': 224,
+                }, {
+                    'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-1m_v4.m3u8',
+                    'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
+                    'ext': 'mp4',
+                    'format_id': '1205',
+                    'protocol': 'm3u8',
+                    'acodec': 'none',
+                    'vcodec': 'avc1.4d001e',
+                    'tbr': 1205,
+                    'width': 640,
+                    'height': 360,
+                }, {
+                    'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-2m_v4.m3u8',
+                    'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
+                    'ext': 'mp4',
+                    'format_id': '2374',
+                    'protocol': 'm3u8',
+                    'acodec': 'none',
+                    'vcodec': 'avc1.4d001f',
+                    'tbr': 2374,
+                    'width': 1024,
+                    'height': 576,
+                }]
+            ),
+            (
+                # https://github.com/rg3/youtube-dl/issues/12211
+                # http://video.toggle.sg/en/series/whoopie-s-world/ep3/478601
+                'toggle_mobile_12211',
+                'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
+                [{
+                    'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_sa2ntrdg/name/a.mp4/index.m3u8',
+                    'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
+                    'ext': 'mp4',
+                    'format_id': 'audio-English',
+                    'protocol': 'm3u8',
+                    'language': 'eng',
+                    'vcodec': 'none',
+                }, {
+                    'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_r7y0nitg/name/a.mp4/index.m3u8',
+                    'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
+                    'ext': 'mp4',
+                    'format_id': 'audio-Undefined',
+                    'protocol': 'm3u8',
+                    'language': 'und',
+                    'vcodec': 'none',
+                }, {
+                    'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_qlk9hlzr/name/a.mp4/index.m3u8',
+                    'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
+                    'ext': 'mp4',
+                    'format_id': '155',
+                    'protocol': 'm3u8',
+                    'tbr': 155.648,
+                    'width': 320,
+                    'height': 180,
+                }, {
+                    'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_oefackmi/name/a.mp4/index.m3u8',
+                    'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
+                    'ext': 'mp4',
+                    'format_id': '502',
+                    'protocol': 'm3u8',
+                    'tbr': 502.784,
+                    'width': 480,
+                    'height': 270,
+                }, {
+                    'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_vyg9pj7k/name/a.mp4/index.m3u8',
+                    'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
+                    'ext': 'mp4',
+                    'format_id': '827',
+                    'protocol': 'm3u8',
+                    'tbr': 827.392,
+                    'width': 640,
+                    'height': 360,
+                }, {
+                    'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_50n4psvx/name/a.mp4/index.m3u8',
+                    'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
+                    'ext': 'mp4',
+                    'format_id': '1396',
+                    'protocol': 'm3u8',
+                    'tbr': 1396.736,
+                    'width': 854,
+                    'height': 480,
+                }]
+            ),
+            (
+                # http://www.twitch.tv/riotgames/v/6528877
+                'twitch_vod',
+                'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
+                [{
+                    'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/audio_only/index-muted-HM49I092CC.m3u8',
+                    'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
+                    'ext': 'mp4',
+                    'format_id': 'Audio Only',
+                    'protocol': 'm3u8',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'none',
+                    'tbr': 182.725,
+                }, {
+                    'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/mobile/index-muted-HM49I092CC.m3u8',
+                    'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
+                    'ext': 'mp4',
+                    'format_id': 'Mobile',
+                    'protocol': 'm3u8',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc1.42C00D',
+                    'tbr': 280.474,
+                    'width': 400,
+                    'height': 226,
+                }, {
+                    'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/low/index-muted-HM49I092CC.m3u8',
+                    'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
+                    'ext': 'mp4',
+                    'format_id': 'Low',
+                    'protocol': 'm3u8',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc1.42C01E',
+                    'tbr': 628.347,
+                    'width': 640,
+                    'height': 360,
+                }, {
+                    'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/medium/index-muted-HM49I092CC.m3u8',
+                    'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
+                    'ext': 'mp4',
+                    'format_id': 'Medium',
+                    'protocol': 'm3u8',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc1.42C01E',
+                    'tbr': 893.387,
+                    'width': 852,
+                    'height': 480,
+                }, {
+                    'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/high/index-muted-HM49I092CC.m3u8',
+                    'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
+                    'ext': 'mp4',
+                    'format_id': 'High',
+                    'protocol': 'm3u8',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc1.42C01F',
+                    'tbr': 1603.789,
+                    'width': 1280,
+                    'height': 720,
+                }, {
+                    'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/chunked/index-muted-HM49I092CC.m3u8',
+                    'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
+                    'ext': 'mp4',
+                    'format_id': 'Source',
+                    'protocol': 'm3u8',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc1.100.31',
+                    'tbr': 3214.134,
+                    'width': 1280,
+                    'height': 720,
+                }]
+            ),
+            (
+                # http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015
+                # EXT-X-STREAM-INF tag with NAME attribute that is not defined
+                # in HLS specification
+                'vidio',
+                'https://www.vidio.com/videos/165683/playlist.m3u8',
+                [{
+                    'url': 'https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b300.mp4.m3u8',
+                    'manifest_url': 'https://www.vidio.com/videos/165683/playlist.m3u8',
+                    'ext': 'mp4',
+                    'format_id': '270p 3G',
+                    'protocol': 'm3u8',
+                    'tbr': 300,
+                    'width': 480,
+                    'height': 270,
+                }, {
+                    'url': 'https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b600.mp4.m3u8',
+                    'manifest_url': 'https://www.vidio.com/videos/165683/playlist.m3u8',
+                    'ext': 'mp4',
+                    'format_id': '360p SD',
+                    'protocol': 'm3u8',
+                    'tbr': 600,
+                    'width': 640,
+                    'height': 360,
+                }, {
+                    'url': 'https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b1200.mp4.m3u8',
+                    'manifest_url': 'https://www.vidio.com/videos/165683/playlist.m3u8',
+                    'ext': 'mp4',
+                    'format_id': '720p HD',
+                    'protocol': 'm3u8',
+                    'tbr': 1200,
+                    'width': 1280,
+                    'height': 720,
+                }]
+            )
+        ]
+
+        for m3u8_file, m3u8_url, expected_formats in _TEST_CASES:
+            with io.open('./test/testdata/m3u8/%s.m3u8' % m3u8_file,
+                         mode='r', encoding='utf-8') as f:
+                formats = self.ie._parse_m3u8_formats(
+                    f.read(), m3u8_url, ext='mp4')
+                self.ie._sort_formats(formats)
+                expect_value(self, formats, expected_formats, None)
+
+    def test_parse_mpd_formats(self):
+        _TEST_CASES = [
+            (
+                # https://github.com/rg3/youtube-dl/issues/13919
+                # Also tests duplicate representation ids, see
+                # https://github.com/rg3/youtube-dl/issues/15111
+                'float_duration',
+                'http://unknown/manifest.mpd',
+                [{
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'm4a',
+                    'format_id': '318597',
+                    'format_note': 'DASH audio',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'none',
+                    'tbr': 61.587,
+                }, {
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'mp4',
+                    'format_id': '318597',
+                    'format_note': 'DASH video',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'none',
+                    'vcodec': 'avc1.42001f',
+                    'tbr': 318.597,
+                    'width': 340,
+                    'height': 192,
+                }, {
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'mp4',
+                    'format_id': '638590',
+                    'format_note': 'DASH video',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'none',
+                    'vcodec': 'avc1.42001f',
+                    'tbr': 638.59,
+                    'width': 512,
+                    'height': 288,
+                }, {
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'mp4',
+                    'format_id': '1022565',
+                    'format_note': 'DASH video',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'none',
+                    'vcodec': 'avc1.4d001f',
+                    'tbr': 1022.565,
+                    'width': 688,
+                    'height': 384,
+                }, {
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'mp4',
+                    'format_id': '2046506',
+                    'format_note': 'DASH video',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'none',
+                    'vcodec': 'avc1.4d001f',
+                    'tbr': 2046.506,
+                    'width': 1024,
+                    'height': 576,
+                }, {
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'mp4',
+                    'format_id': '3998017',
+                    'format_note': 'DASH video',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'none',
+                    'vcodec': 'avc1.640029',
+                    'tbr': 3998.017,
+                    'width': 1280,
+                    'height': 720,
+                }, {
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'mp4',
+                    'format_id': '5997485',
+                    'format_note': 'DASH video',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'none',
+                    'vcodec': 'avc1.640032',
+                    'tbr': 5997.485,
+                    'width': 1920,
+                    'height': 1080,
+                }]
+            ), (
+                # https://github.com/rg3/youtube-dl/pull/14844
+                'urls_only',
+                'http://unknown/manifest.mpd',
+                [{
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'mp4',
+                    'format_id': 'h264_aac_144p_m4s',
+                    'format_note': 'DASH video',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc3.42c01e',
+                    'tbr': 200,
+                    'width': 256,
+                    'height': 144,
+                }, {
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'mp4',
+                    'format_id': 'h264_aac_240p_m4s',
+                    'format_note': 'DASH video',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc3.42c01e',
+                    'tbr': 400,
+                    'width': 424,
+                    'height': 240,
+                }, {
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'mp4',
+                    'format_id': 'h264_aac_360p_m4s',
+                    'format_note': 'DASH video',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc3.42c01e',
+                    'tbr': 800,
+                    'width': 640,
+                    'height': 360,
+                }, {
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'mp4',
+                    'format_id': 'h264_aac_480p_m4s',
+                    'format_note': 'DASH video',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc3.42c01e',
+                    'tbr': 1200,
+                    'width': 856,
+                    'height': 480,
+                }, {
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'mp4',
+                    'format_id': 'h264_aac_576p_m4s',
+                    'format_note': 'DASH video',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc3.42c01e',
+                    'tbr': 1600,
+                    'width': 1024,
+                    'height': 576,
+                }, {
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'mp4',
+                    'format_id': 'h264_aac_720p_m4s',
+                    'format_note': 'DASH video',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc3.42c01e',
+                    'tbr': 2400,
+                    'width': 1280,
+                    'height': 720,
+                }, {
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'mp4',
+                    'format_id': 'h264_aac_1080p_m4s',
+                    'format_note': 'DASH video',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc3.42c01e',
+                    'tbr': 4400,
+                    'width': 1920,
+                    'height': 1080,
+                }]
+            )
+        ]
+
+        for mpd_file, mpd_url, expected_formats in _TEST_CASES:
+            with io.open('./test/testdata/mpd/%s.mpd' % mpd_file,
+                         mode='r', encoding='utf-8') as f:
+                formats = self.ie._parse_mpd_formats(
+                    compat_etree_fromstring(f.read().encode('utf-8')),
+                    mpd_url=mpd_url)
+                self.ie._sort_formats(formats)
+                expect_value(self, formats, expected_formats, None)
+
+    def test_parse_f4m_formats(self):
+        _TEST_CASES = [
+            (
+                # https://github.com/rg3/youtube-dl/issues/14660
+                'custom_base_url',
+                'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m',
+                [{
+                    'manifest_url': 'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m',
+                    'ext': 'flv',
+                    'format_id': '2148',
+                    'protocol': 'f4m',
+                    'tbr': 2148,
+                    'width': 1280,
+                    'height': 720,
+                }]
+            ),
+        ]
+
+        for f4m_file, f4m_url, expected_formats in _TEST_CASES:
+            with io.open('./test/testdata/f4m/%s.f4m' % f4m_file,
+                         mode='r', encoding='utf-8') as f:
+                formats = self.ie._parse_f4m_formats(
+                    compat_etree_fromstring(f.read().encode('utf-8')),
+                    f4m_url, None)
+                self.ie._sort_formats(formats)
+                expect_value(self, formats, expected_formats, None)
+
 
 if __name__ == '__main__':
     unittest.main()

+ 90 - 3
test/test_YoutubeDL.py

@@ -1,4 +1,5 @@
 #!/usr/bin/env python
+# coding: utf-8
 
 from __future__ import unicode_literals
 
@@ -40,6 +41,7 @@ def _make_result(formats, **kwargs):
         'id': 'testid',
         'title': 'testttitle',
         'extractor': 'testex',
+        'extractor_key': 'TestEx',
     }
     res.update(**kwargs)
     return res
@@ -369,6 +371,19 @@ class TestFormatSelection(unittest.TestCase):
         ydl = YDL({'format': 'best[height>360]'})
         self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
 
+    def test_format_selection_issue_10083(self):
+        # See https://github.com/rg3/youtube-dl/issues/10083
+        formats = [
+            {'format_id': 'regular', 'height': 360, 'url': TEST_URL},
+            {'format_id': 'video', 'height': 720, 'acodec': 'none', 'url': TEST_URL},
+            {'format_id': 'audio', 'vcodec': 'none', 'url': TEST_URL},
+        ]
+        info_dict = _make_result(formats)
+
+        ydl = YDL({'format': 'best[height>360]/bestvideo[height>360]+bestaudio'})
+        ydl.process_ie_result(info_dict.copy())
+        self.assertEqual(ydl.downloaded_info_dicts[0]['format_id'], 'video+audio')
+
     def test_invalid_format_specs(self):
         def assert_syntax_error(format_spec):
             ydl = YDL({'format': format_spec})
@@ -447,6 +462,23 @@ class TestFormatSelection(unittest.TestCase):
             pass
         self.assertEqual(ydl.downloaded_info_dicts, [])
 
+    def test_default_format_spec(self):
+        ydl = YDL({'simulate': True})
+        self.assertEqual(ydl._default_format_spec({}), 'bestvideo+bestaudio/best')
+
+        ydl = YDL({})
+        self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio')
+
+        ydl = YDL({'simulate': True})
+        self.assertEqual(ydl._default_format_spec({'is_live': True}), 'bestvideo+bestaudio/best')
+
+        ydl = YDL({'outtmpl': '-'})
+        self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio')
+
+        ydl = YDL({})
+        self.assertEqual(ydl._default_format_spec({}, download=False), 'bestvideo+bestaudio/best')
+        self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio')
+
 
 class TestYoutubeDL(unittest.TestCase):
     def test_subtitles(self):
@@ -525,6 +557,9 @@ class TestYoutubeDL(unittest.TestCase):
             'id': '1234',
             'ext': 'mp4',
             'width': None,
+            'height': 1080,
+            'title1': '$PATH',
+            'title2': '%PATH%',
         }
 
         def fname(templ):
@@ -534,16 +569,33 @@ class TestYoutubeDL(unittest.TestCase):
         self.assertEqual(fname('%(id)s-%(width)s.%(ext)s'), '1234-NA.mp4')
         # Replace missing fields with 'NA'
         self.assertEqual(fname('%(uploader_date)s-%(id)s.%(ext)s'), 'NA-1234.mp4')
+        self.assertEqual(fname('%(height)d.%(ext)s'), '1080.mp4')
+        self.assertEqual(fname('%(height)6d.%(ext)s'), '  1080.mp4')
+        self.assertEqual(fname('%(height)-6d.%(ext)s'), '1080  .mp4')
+        self.assertEqual(fname('%(height)06d.%(ext)s'), '001080.mp4')
+        self.assertEqual(fname('%(height) 06d.%(ext)s'), ' 01080.mp4')
+        self.assertEqual(fname('%(height)   06d.%(ext)s'), ' 01080.mp4')
+        self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4')
+        self.assertEqual(fname('%(height)0   6d.%(ext)s'), ' 01080.mp4')
+        self.assertEqual(fname('%(height)   0   6d.%(ext)s'), ' 01080.mp4')
+        self.assertEqual(fname('%%'), '%')
+        self.assertEqual(fname('%%%%'), '%%')
+        self.assertEqual(fname('%%(height)06d.%(ext)s'), '%(height)06d.mp4')
+        self.assertEqual(fname('%(width)06d.%(ext)s'), 'NA.mp4')
+        self.assertEqual(fname('%(width)06d.%%(ext)s'), 'NA.%(ext)s')
+        self.assertEqual(fname('%%(width)06d.%(ext)s'), '%(width)06d.mp4')
+        self.assertEqual(fname('Hello %(title1)s'), 'Hello $PATH')
+        self.assertEqual(fname('Hello %(title2)s'), 'Hello %PATH%')
 
     def test_format_note(self):
         ydl = YoutubeDL()
         self.assertEqual(ydl._format_note({}), '')
         assertRegexpMatches(self, ydl._format_note({
             'vbr': 10,
-        }), '^\s*10k$')
+        }), r'^\s*10k$')
         assertRegexpMatches(self, ydl._format_note({
             'fps': 30,
-        }), '^30fps$')
+        }), r'^30fps$')
 
     def test_postprocessors(self):
         filename = 'post-processor-testfile.mp4'
@@ -606,6 +658,8 @@ class TestYoutubeDL(unittest.TestCase):
             'duration': 30,
             'filesize': 10 * 1024,
             'playlist_id': '42',
+            'uploader': "變態妍字幕版 太妍 тест",
+            'creator': "тест ' 123 ' тест--",
         }
         second = {
             'id': '2',
@@ -616,6 +670,7 @@ class TestYoutubeDL(unittest.TestCase):
             'description': 'foo',
             'filesize': 5 * 1024,
             'playlist_id': '43',
+            'uploader': "тест 123",
         }
         videos = [first, second]
 
@@ -656,6 +711,26 @@ class TestYoutubeDL(unittest.TestCase):
         res = get_videos(f)
         self.assertEqual(res, ['1'])
 
+        f = match_filter_func('uploader = "變態妍字幕版 太妍 тест"')
+        res = get_videos(f)
+        self.assertEqual(res, ['1'])
+
+        f = match_filter_func('uploader != "變態妍字幕版 太妍 тест"')
+        res = get_videos(f)
+        self.assertEqual(res, ['2'])
+
+        f = match_filter_func('creator = "тест \' 123 \' тест--"')
+        res = get_videos(f)
+        self.assertEqual(res, ['1'])
+
+        f = match_filter_func("creator = 'тест \\' 123 \\' тест--'")
+        res = get_videos(f)
+        self.assertEqual(res, ['1'])
+
+        f = match_filter_func(r"creator = 'тест \' 123 \' тест--' & duration > 30")
+        res = get_videos(f)
+        self.assertEqual(res, [])
+
     def test_playlist_items_selection(self):
         entries = [{
             'id': compat_str(i),
@@ -701,6 +776,12 @@ class TestYoutubeDL(unittest.TestCase):
         result = get_ids({'playlist_items': '10'})
         self.assertEqual(result, [])
 
+        result = get_ids({'playlist_items': '3-10'})
+        self.assertEqual(result, [3, 4])
+
+        result = get_ids({'playlist_items': '2-4,3-4,3'})
+        self.assertEqual(result, [2, 3, 4])
+
     def test_urlopen_no_file_protocol(self):
         # see https://github.com/rg3/youtube-dl/issues/8227
         ydl = YDL()
@@ -717,6 +798,8 @@ class TestYoutubeDL(unittest.TestCase):
                     '_type': 'url_transparent',
                     'url': 'foo2:',
                     'ie_key': 'Foo2',
+                    'title': 'foo1 title',
+                    'id': 'foo1_id',
                 }
 
         class Foo2IE(InfoExtractor):
@@ -733,7 +816,7 @@ class TestYoutubeDL(unittest.TestCase):
             _VALID_URL = r'foo3:'
 
             def _real_extract(self, url):
-                return _make_result([{'url': TEST_URL}])
+                return _make_result([{'url': TEST_URL}], title='foo3 title')
 
         ydl.add_info_extractor(Foo1IE(ydl))
         ydl.add_info_extractor(Foo2IE(ydl))
@@ -741,6 +824,10 @@ class TestYoutubeDL(unittest.TestCase):
         ydl.extract_info('foo1:')
         downloaded = ydl.downloaded_info_dicts[0]
         self.assertEqual(downloaded['url'], TEST_URL)
+        self.assertEqual(downloaded['title'], 'foo1 title')
+        self.assertEqual(downloaded['id'], 'testid')
+        self.assertEqual(downloaded['extractor'], 'testex')
+        self.assertEqual(downloaded['extractor_key'], 'TestEx')
 
 
 if __name__ == '__main__':

+ 8 - 1
test/test_aes.py

@@ -8,7 +8,7 @@ import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_decrypt_text
+from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_cbc_encrypt, aes_decrypt_text
 from youtube_dl.utils import bytes_to_intlist, intlist_to_bytes
 import base64
 
@@ -34,6 +34,13 @@ class TestAES(unittest.TestCase):
         decrypted = intlist_to_bytes(aes_cbc_decrypt(data, self.key, self.iv))
         self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
 
+    def test_cbc_encrypt(self):
+        data = bytes_to_intlist(self.secret_msg)
+        encrypted = intlist_to_bytes(aes_cbc_encrypt(data, self.key, self.iv))
+        self.assertEqual(
+            encrypted,
+            b"\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd")
+
     def test_decrypt_text(self):
         password = intlist_to_bytes(self.key).decode('utf-8')
         encrypted = base64.b64encode(

+ 3 - 3
test/test_compat.py

@@ -27,11 +27,11 @@ from youtube_dl.compat import (
 class TestCompat(unittest.TestCase):
     def test_compat_getenv(self):
         test_str = 'тест'
-        compat_setenv('YOUTUBE-DL-TEST', test_str)
-        self.assertEqual(compat_getenv('YOUTUBE-DL-TEST'), test_str)
+        compat_setenv('YOUTUBE_DL_COMPAT_GETENV', test_str)
+        self.assertEqual(compat_getenv('YOUTUBE_DL_COMPAT_GETENV'), test_str)
 
     def test_compat_setenv(self):
-        test_var = 'YOUTUBE-DL-TEST'
+        test_var = 'YOUTUBE_DL_COMPAT_SETENV'
         test_str = 'тест'
         compat_setenv(test_var, test_str)
         compat_getenv(test_var)

+ 36 - 7
test/test_download.py

@@ -65,15 +65,31 @@ defs = gettestcases()
 
 
 class TestDownload(unittest.TestCase):
+    # Parallel testing in nosetests. See
+    # http://nose.readthedocs.org/en/latest/doc_tests/test_multiprocess/multiprocess.html
+    _multiprocess_shared_ = True
+
     maxDiff = None
 
+    def __str__(self):
+        """Identify each test with the `add_ie` attribute, if available."""
+
+        def strclass(cls):
+            """From 2.7's unittest; 2.6 had _strclass so we can't import it."""
+            return '%s.%s' % (cls.__module__, cls.__name__)
+
+        add_ie = getattr(self, self._testMethodName).add_ie
+        return '%s (%s)%s:' % (self._testMethodName,
+                               strclass(self.__class__),
+                               ' [%s]' % add_ie if add_ie else '')
+
     def setUp(self):
         self.defs = defs
 
 # Dynamically generate tests
 
 
-def generator(test_case):
+def generator(test_case, tname):
 
     def test_template(self):
         ie = youtube_dl.extractor.get_info_extractor(test_case['name'])
@@ -102,6 +118,7 @@ def generator(test_case):
                 return
 
         params = get_params(test_case.get('params', {}))
+        params['outtmpl'] = tname + '_' + params['outtmpl']
         if is_playlist and 'playlist' not in test_case:
             params.setdefault('extract_flat', 'in_playlist')
             params.setdefault('skip_download', True)
@@ -134,7 +151,7 @@ def generator(test_case):
             try_num = 1
             while True:
                 try:
-                    # We're not using .download here sine that is just a shim
+                    # We're not using .download here since that is just a shim
                     # for outside error handling, and returns the exit code
                     # instead of the result dict.
                     res_dict = ydl.extract_info(
@@ -146,7 +163,7 @@ def generator(test_case):
                         raise
 
                     if try_num == RETRIES:
-                        report_warning('Failed due to network errors, skipping...')
+                        report_warning('%s failed due to network errors, skipping...' % tname)
                         return
 
                     print('Retrying: {0} failed tries\n\n##########\n\n'.format(try_num))
@@ -182,7 +199,16 @@ def generator(test_case):
                 self.assertEqual(
                     test_case['playlist_duration_sum'], got_duration)
 
-            for tc in test_cases:
+            # Generalize both playlists and single videos to unified format for
+            # simplicity
+            if 'entries' not in res_dict:
+                res_dict['entries'] = [res_dict]
+
+            for tc_num, tc in enumerate(test_cases):
+                tc_res_dict = res_dict['entries'][tc_num]
+                # First, check test cases' data against extracted data alone
+                expect_info_dict(self, tc_res_dict, tc.get('info_dict', {}))
+                # Now, check downloaded file consistency
                 tc_filename = get_tc_filename(tc)
                 if not test_case.get('params', {}).get('skip_download', False):
                     self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename)
@@ -199,14 +225,15 @@ def generator(test_case):
                                 format_bytes(got_fsize)))
                     if 'md5' in tc:
                         md5_for_file = _file_md5(tc_filename)
-                        self.assertEqual(md5_for_file, tc['md5'])
+                        self.assertEqual(tc['md5'], md5_for_file)
+                # Finally, check test cases' data again but this time against
+                # extracted data from info JSON file written during processing
                 info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json'
                 self.assertTrue(
                     os.path.exists(info_json_fn),
                     'Missing info file %s' % info_json_fn)
                 with io.open(info_json_fn, encoding='utf-8') as infof:
                     info_dict = json.load(infof)
-
                 expect_info_dict(self, info_dict, tc.get('info_dict', {}))
         finally:
             try_rm_tcs_files()
@@ -221,13 +248,15 @@ def generator(test_case):
 
 # And add them to TestDownload
 for n, test_case in enumerate(defs):
-    test_method = generator(test_case)
     tname = 'test_' + str(test_case['name'])
     i = 1
     while hasattr(TestDownload, tname):
         tname = 'test_%s_%d' % (test_case['name'], i)
         i += 1
+    test_method = generator(test_case, tname)
     test_method.__name__ = str(tname)
+    ie_list = test_case.get('add_ie')
+    test_method.add_ie = ie_list and ','.join(ie_list)
     setattr(TestDownload, test_method.__name__, test_method)
     del test_method
 

+ 26 - 0
test/test_options.py

@@ -0,0 +1,26 @@
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from youtube_dl.options import _hide_login_info
+
+
+class TestOptions(unittest.TestCase):
+    def test_hide_login_info(self):
+        self.assertEqual(_hide_login_info(['-u', 'foo', '-p', 'bar']),
+                         ['-u', 'PRIVATE', '-p', 'PRIVATE'])
+        self.assertEqual(_hide_login_info(['-u']), ['-u'])
+        self.assertEqual(_hide_login_info(['-u', 'foo', '-u', 'bar']),
+                         ['-u', 'PRIVATE', '-u', 'PRIVATE'])
+        self.assertEqual(_hide_login_info(['--username=foo']),
+                         ['--username=PRIVATE'])
+
+
+if __name__ == '__main__':
+    unittest.main()

+ 4 - 4
test/test_subtitles.py

@@ -21,7 +21,7 @@ from youtube_dl.extractor import (
     NPOIE,
     ComedyCentralIE,
     NRKTVIE,
-    RaiTVIE,
+    RaiPlayIE,
     VikiIE,
     ThePlatformIE,
     ThePlatformFeedIE,
@@ -258,9 +258,9 @@ class TestNRKSubtitles(BaseTestSubtitles):
         self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2')
 
 
-class TestRaiSubtitles(BaseTestSubtitles):
-    url = 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
-    IE = RaiTVIE
+class TestRaiPlaySubtitles(BaseTestSubtitles):
+    url = 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
+    IE = RaiPlayIE
 
     def test_allsubtitles(self):
         self.DL.params['writesubtitles'] = True

+ 192 - 4
test/test_utils.py

@@ -34,6 +34,9 @@ from youtube_dl.utils import (
     find_xpath_attr,
     fix_xml_ampersands,
     get_element_by_class,
+    get_element_by_attribute,
+    get_elements_by_class,
+    get_elements_by_attribute,
     InAdvancePagedList,
     intlist_to_bytes,
     is_html,
@@ -41,6 +44,7 @@ from youtube_dl.utils import (
     limit_length,
     mimetype2ext,
     month_by_name,
+    multipart_encode,
     ohdave_rsa_encrypt,
     OnDemandPagedList,
     orderedSet,
@@ -49,9 +53,11 @@ from youtube_dl.utils import (
     parse_filesize,
     parse_count,
     parse_iso8601,
+    pkcs1pad,
     read_batch_urls,
     sanitize_filename,
     sanitize_path,
+    expand_path,
     prepend_extension,
     replace_extension,
     remove_start,
@@ -91,6 +97,9 @@ from youtube_dl.utils import (
 from youtube_dl.compat import (
     compat_chr,
     compat_etree_fromstring,
+    compat_getenv,
+    compat_os_name,
+    compat_setenv,
     compat_urlparse,
     compat_parse_qs,
 )
@@ -210,6 +219,18 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(sanitize_path('./abc'), 'abc')
         self.assertEqual(sanitize_path('./../abc'), '..\\abc')
 
+    def test_expand_path(self):
+        def env(var):
+            return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var)
+
+        compat_setenv('YOUTUBE_DL_EXPATH_PATH', 'expanded')
+        self.assertEqual(expand_path(env('YOUTUBE_DL_EXPATH_PATH')), 'expanded')
+        self.assertEqual(expand_path(env('HOME')), compat_getenv('HOME'))
+        self.assertEqual(expand_path('~'), compat_getenv('HOME'))
+        self.assertEqual(
+            expand_path('~/%s' % env('YOUTUBE_DL_EXPATH_PATH')),
+            '%s/expanded' % compat_getenv('HOME'))
+
     def test_prepend_extension(self):
         self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext')
         self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext')
@@ -258,6 +279,7 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(unescapeHTML('&#47;'), '/')
         self.assertEqual(unescapeHTML('&eacute;'), 'é')
         self.assertEqual(unescapeHTML('&#2013266066;'), '&#2013266066;')
+        self.assertEqual(unescapeHTML('&a&quot;'), '&a"')
         # HTML5 entities
         self.assertEqual(unescapeHTML('&period;&apos;'), '.\'')
 
@@ -295,6 +317,9 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(unified_strdate('27.02.2016 17:30'), '20160227')
         self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None)
         self.assertEqual(unified_strdate('Feb 7, 2016 at 6:35 pm'), '20160207')
+        self.assertEqual(unified_strdate('July 15th, 2013'), '20130715')
+        self.assertEqual(unified_strdate('September 1st, 2013'), '20130901')
+        self.assertEqual(unified_strdate('Sep 2nd, 2013'), '20130902')
 
     def test_unified_timestamps(self):
         self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600)
@@ -316,6 +341,9 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(unified_timestamp('UNKNOWN DATE FORMAT'), None)
         self.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500)
         self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100)
+        self.assertEqual(unified_timestamp('2017-03-30T17:52:41Q'), 1490896361)
+        self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540)
+        self.assertEqual(unified_timestamp('December 15, 2017 at 7:49 am'), 1513324140)
 
     def test_determine_ext(self):
         self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
@@ -423,7 +451,9 @@ class TestUtil(unittest.TestCase):
 
     def test_shell_quote(self):
         args = ['ffmpeg', '-i', encodeFilename('ñ€ß\'.mp4')]
-        self.assertEqual(shell_quote(args), """ffmpeg -i 'ñ€ß'"'"'.mp4'""")
+        self.assertEqual(
+            shell_quote(args),
+            """ffmpeg -i 'ñ€ß'"'"'.mp4'""" if compat_os_name != 'nt' else '''ffmpeg -i "ñ€ß'.mp4"''')
 
     def test_str_to_int(self):
         self.assertEqual(str_to_int('123,456'), 123456)
@@ -448,6 +478,9 @@ class TestUtil(unittest.TestCase):
 
     def test_urljoin(self):
         self.assertEqual(urljoin('http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
+        self.assertEqual(urljoin(b'http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
+        self.assertEqual(urljoin('http://foo.de/', b'/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
+        self.assertEqual(urljoin(b'http://foo.de/', b'/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
         self.assertEqual(urljoin('//foo.de/', '/a/b/c.txt'), '//foo.de/a/b/c.txt')
         self.assertEqual(urljoin('http://foo.de/', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt')
         self.assertEqual(urljoin('http://foo.de', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
@@ -507,6 +540,8 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(parse_duration('1 hour 3 minutes'), 3780)
         self.assertEqual(parse_duration('87 Min.'), 5220)
         self.assertEqual(parse_duration('PT1H0.040S'), 3600.04)
+        self.assertEqual(parse_duration('PT00H03M30SZ'), 210)
+        self.assertEqual(parse_duration('P0Y0M0DT0H4M20.880S'), 260.88)
 
     def test_fix_xml_ampersands(self):
         self.assertEqual(
@@ -593,6 +628,16 @@ class TestUtil(unittest.TestCase):
             'http://example.com/path', {'test': '第二行тест'})),
             query_dict('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82'))
 
+    def test_multipart_encode(self):
+        self.assertEqual(
+            multipart_encode({b'field': b'value'}, boundary='AAAAAA')[0],
+            b'--AAAAAA\r\nContent-Disposition: form-data; name="field"\r\n\r\nvalue\r\n--AAAAAA--\r\n')
+        self.assertEqual(
+            multipart_encode({'欄位'.encode('utf-8'): '值'.encode('utf-8')}, boundary='AAAAAA')[0],
+            b'--AAAAAA\r\nContent-Disposition: form-data; name="\xe6\xac\x84\xe4\xbd\x8d"\r\n\r\n\xe5\x80\xbc\r\n--AAAAAA--\r\n')
+        self.assertRaises(
+            ValueError, multipart_encode, {b'field': b'value'}, boundary='value')
+
     def test_dict_get(self):
         FALSE_VALUES = {
             'none': None,
@@ -640,6 +685,14 @@ class TestUtil(unittest.TestCase):
         d = json.loads(stripped)
         self.assertEqual(d, {'status': 'success'})
 
+        stripped = strip_jsonp('window.cb && window.cb({"status": "success"});')
+        d = json.loads(stripped)
+        self.assertEqual(d, {'status': 'success'})
+
+        stripped = strip_jsonp('window.cb && cb({"status": "success"});')
+        d = json.loads(stripped)
+        self.assertEqual(d, {'status': 'success'})
+
     def test_uppercase_escape(self):
         self.assertEqual(uppercase_escape('aä'), 'aä')
         self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
@@ -781,12 +834,27 @@ class TestUtil(unittest.TestCase):
         on = js_to_json('["abc", "def",]')
         self.assertEqual(json.loads(on), ['abc', 'def'])
 
+        on = js_to_json('[/*comment\n*/"abc"/*comment\n*/,/*comment\n*/"def",/*comment\n*/]')
+        self.assertEqual(json.loads(on), ['abc', 'def'])
+
+        on = js_to_json('[//comment\n"abc" //comment\n,//comment\n"def",//comment\n]')
+        self.assertEqual(json.loads(on), ['abc', 'def'])
+
         on = js_to_json('{"abc": "def",}')
         self.assertEqual(json.loads(on), {'abc': 'def'})
 
+        on = js_to_json('{/*comment\n*/"abc"/*comment\n*/:/*comment\n*/"def"/*comment\n*/,/*comment\n*/}')
+        self.assertEqual(json.loads(on), {'abc': 'def'})
+
         on = js_to_json('{ 0: /* " \n */ ",]" , }')
         self.assertEqual(json.loads(on), {'0': ',]'})
 
+        on = js_to_json('{ /*comment\n*/0/*comment\n*/: /* " \n */ ",]" , }')
+        self.assertEqual(json.loads(on), {'0': ',]'})
+
+        on = js_to_json('{ 0: // comment\n1 }')
+        self.assertEqual(json.loads(on), {'0': 1})
+
         on = js_to_json(r'["<p>x<\/p>"]')
         self.assertEqual(json.loads(on), ['<p>x</p>'])
 
@@ -796,15 +864,27 @@ class TestUtil(unittest.TestCase):
         on = js_to_json("['a\\\nb']")
         self.assertEqual(json.loads(on), ['ab'])
 
+        on = js_to_json("/*comment\n*/[/*comment\n*/'a\\\nb'/*comment\n*/]/*comment\n*/")
+        self.assertEqual(json.loads(on), ['ab'])
+
         on = js_to_json('{0xff:0xff}')
         self.assertEqual(json.loads(on), {'255': 255})
 
+        on = js_to_json('{/*comment\n*/0xff/*comment\n*/:/*comment\n*/0xff/*comment\n*/}')
+        self.assertEqual(json.loads(on), {'255': 255})
+
         on = js_to_json('{077:077}')
         self.assertEqual(json.loads(on), {'63': 63})
 
+        on = js_to_json('{/*comment\n*/077/*comment\n*/:/*comment\n*/077/*comment\n*/}')
+        self.assertEqual(json.loads(on), {'63': 63})
+
         on = js_to_json('{42:42}')
         self.assertEqual(json.loads(on), {'42': 42})
 
+        on = js_to_json('{/*comment\n*/42/*comment\n*/:/*comment\n*/42/*comment\n*/}')
+        self.assertEqual(json.loads(on), {'42': 42})
+
     def test_extract_attributes(self):
         self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
         self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
@@ -842,10 +922,13 @@ class TestUtil(unittest.TestCase):
             supports_outside_bmp = False
         if supports_outside_bmp:
             self.assertEqual(extract_attributes('<e x="Smile &#128512;!">'), {'x': 'Smile \U0001f600!'})
+        # Malformed HTML should not break attributes extraction on older Python
+        self.assertEqual(extract_attributes('<mal"formed/>'), {})
 
     def test_clean_html(self):
         self.assertEqual(clean_html('a:\nb'), 'a: b')
         self.assertEqual(clean_html('a:\n   "b"'), 'a:    "b"')
+        self.assertEqual(clean_html('a<br>\xa0b'), 'a\nb')
 
     def test_intlist_to_bytes(self):
         self.assertEqual(
@@ -855,7 +938,7 @@ class TestUtil(unittest.TestCase):
     def test_args_to_str(self):
         self.assertEqual(
             args_to_str(['foo', 'ba/r', '-baz', '2 be', '']),
-            'foo ba/r -baz \'2 be\' \'\''
+            'foo ba/r -baz \'2 be\' \'\'' if compat_os_name != 'nt' else 'foo ba/r -baz "2 be" ""'
         )
 
     def test_parse_filesize(self):
@@ -983,7 +1066,7 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
                     <p begin="3" dur="-1">Ignored, three</p>
                 </div>
             </body>
-            </tt>'''
+            </tt>'''.encode('utf-8')
         srt_data = '''1
 00:00:00,000 --> 00:00:01,000
 The following line contains Chinese characters and special symbols
@@ -1008,7 +1091,7 @@ Line
                     <p begin="0" end="1">The first line</p>
                 </div>
             </body>
-            </tt>'''
+            </tt>'''.encode('utf-8')
         srt_data = '''1
 00:00:00,000 --> 00:00:01,000
 The first line
@@ -1016,6 +1099,67 @@ The first line
 '''
         self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data)
 
+        dfxp_data_with_style = '''<?xml version="1.0" encoding="utf-8"?>
+<tt xmlns="http://www.w3.org/2006/10/ttaf1" xmlns:ttp="http://www.w3.org/2006/10/ttaf1#parameter" ttp:timeBase="media" xmlns:tts="http://www.w3.org/2006/10/ttaf1#style" xml:lang="en" xmlns:ttm="http://www.w3.org/2006/10/ttaf1#metadata">
+  <head>
+    <styling>
+      <style id="s2" style="s0" tts:color="cyan" tts:fontWeight="bold" />
+      <style id="s1" style="s0" tts:color="yellow" tts:fontStyle="italic" />
+      <style id="s3" style="s0" tts:color="lime" tts:textDecoration="underline" />
+      <style id="s0" tts:backgroundColor="black" tts:fontStyle="normal" tts:fontSize="16" tts:fontFamily="sansSerif" tts:color="white" />
+    </styling>
+  </head>
+  <body tts:textAlign="center" style="s0">
+    <div>
+      <p begin="00:00:02.08" id="p0" end="00:00:05.84">default style<span tts:color="red">custom style</span></p>
+      <p style="s2" begin="00:00:02.08" id="p0" end="00:00:05.84"><span tts:color="lime">part 1<br /></span><span tts:color="cyan">part 2</span></p>
+      <p style="s3" begin="00:00:05.84" id="p1" end="00:00:09.56">line 3<br />part 3</p>
+      <p style="s1" tts:textDecoration="underline" begin="00:00:09.56" id="p2" end="00:00:12.36"><span style="s2" tts:color="lime">inner<br /> </span>style</p>
+    </div>
+  </body>
+</tt>'''.encode('utf-8')
+        srt_data = '''1
+00:00:02,080 --> 00:00:05,839
+<font color="white" face="sansSerif" size="16">default style<font color="red">custom style</font></font>
+
+2
+00:00:02,080 --> 00:00:05,839
+<b><font color="cyan" face="sansSerif" size="16"><font color="lime">part 1
+</font>part 2</font></b>
+
+3
+00:00:05,839 --> 00:00:09,560
+<u><font color="lime">line 3
+part 3</font></u>
+
+4
+00:00:09,560 --> 00:00:12,359
+<i><u><font color="yellow"><font color="lime">inner
+ </font>style</font></u></i>
+
+'''
+        self.assertEqual(dfxp2srt(dfxp_data_with_style), srt_data)
+
+        dfxp_data_non_utf8 = '''<?xml version="1.0" encoding="UTF-16"?>
+            <tt xmlns="http://www.w3.org/ns/ttml" xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter">
+            <body>
+                <div xml:lang="en">
+                    <p begin="0" end="1">Line 1</p>
+                    <p begin="1" end="2">第二行</p>
+                </div>
+            </body>
+            </tt>'''.encode('utf-16')
+        srt_data = '''1
+00:00:00,000 --> 00:00:01,000
+Line 1
+
+2
+00:00:01,000 --> 00:00:02,000
+第二行
+
+'''
+        self.assertEqual(dfxp2srt(dfxp_data_non_utf8), srt_data)
+
     def test_cli_option(self):
         self.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128'])
         self.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), [])
@@ -1061,6 +1205,10 @@ The first line
             cli_bool_option(
                 {'nocheckcertificate': False}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='),
             ['--check-certificate=true'])
+        self.assertEqual(
+            cli_bool_option(
+                {}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='),
+            [])
 
     def test_ohdave_rsa_encrypt(self):
         N = 0xab86b6371b5318aaa1d3c9e612a9f1264f372323c8c0f19875b5fc3b3fd3afcc1e5bec527aa94bfa85bffc157e4245aebda05389a5357b75115ac94f074aefcd
@@ -1070,6 +1218,14 @@ The first line
             ohdave_rsa_encrypt(b'aa111222', e, N),
             '726664bd9a23fd0c70f9f1b84aab5e3905ce1e45a584e9cbcf9bcc7510338fc1986d6c599ff990d923aa43c51c0d9013cd572e13bc58f4ae48f2ed8c0b0ba881')
 
+    def test_pkcs1pad(self):
+        data = [1, 2, 3]
+        padded_data = pkcs1pad(data, 32)
+        self.assertEqual(padded_data[:2], [0, 2])
+        self.assertEqual(padded_data[28:], [0, 1, 2, 3])
+
+        self.assertRaises(ValueError, pkcs1pad, data, 8)
+
     def test_encode_base_n(self):
         self.assertEqual(encode_base_n(0, 30), '0')
         self.assertEqual(encode_base_n(80, 30), '2k')
@@ -1093,6 +1249,38 @@ The first line
         self.assertEqual(get_element_by_class('foo', html), 'nice')
         self.assertEqual(get_element_by_class('no-such-class', html), None)
 
+    def test_get_element_by_attribute(self):
+        html = '''
+            <span class="foo bar">nice</span>
+        '''
+
+        self.assertEqual(get_element_by_attribute('class', 'foo bar', html), 'nice')
+        self.assertEqual(get_element_by_attribute('class', 'foo', html), None)
+        self.assertEqual(get_element_by_attribute('class', 'no-such-foo', html), None)
+
+        html = '''
+            <div itemprop="author" itemscope>foo</div>
+        '''
+
+        self.assertEqual(get_element_by_attribute('itemprop', 'author', html), 'foo')
+
+    def test_get_elements_by_class(self):
+        html = '''
+            <span class="foo bar">nice</span><span class="foo bar">also nice</span>
+        '''
+
+        self.assertEqual(get_elements_by_class('foo', html), ['nice', 'also nice'])
+        self.assertEqual(get_elements_by_class('no-such-class', html), [])
+
+    def test_get_elements_by_attribute(self):
+        html = '''
+            <span class="foo bar">nice</span><span class="foo bar">also nice</span>
+        '''
+
+        self.assertEqual(get_elements_by_attribute('class', 'foo bar', html), ['nice', 'also nice'])
+        self.assertEqual(get_elements_by_attribute('class', 'foo', html), [])
+        self.assertEqual(get_elements_by_attribute('class', 'no-such-foo', html), [])
+
 
 if __name__ == '__main__':
     unittest.main()

Filskillnaden har hållts tillbaka eftersom den är för stor
+ 20 - 0
test/test_youtube_chapters.py


+ 10 - 0
test/testdata/f4m/custom_base_url.f4m

@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<manifest xmlns="http://ns.adobe.com/f4m/1.0">
+    <streamType>recorded</streamType>
+    <baseURL>http://vod.livestream.com/events/0000000000673980/</baseURL>
+    <duration>269.293</duration>
+    <bootstrapInfo profile="named" id="bootstrap_1">AAAAm2Fic3QAAAAAAAAAAQAAAAPoAAAAAAAEG+0AAAAAAAAAAAAAAAAAAQAAABlhc3J0AAAAAAAAAAABAAAAAQAAAC4BAAAAVmFmcnQAAAAAAAAD6AAAAAAEAAAAAQAAAAAAAAAAAAAXcAAAAC0AAAAAAAQHQAAAE5UAAAAuAAAAAAAEGtUAAAEYAAAAAAAAAAAAAAAAAAAAAAA=</bootstrapInfo>
+    <media url="b90f532f-b0f6-4f4e-8289-706d490b2fd8_2292" bootstrapInfoId="bootstrap_1" bitrate="2148" width="1280" height="720" videoCodec="avc1.4d401f" audioCodec="mp4a.40.2">
+        <metadata>AgAKb25NZXRhRGF0YQgAAAAIAAhkdXJhdGlvbgBAcNSwIMSbpgAFd2lkdGgAQJQAAAAAAAAABmhlaWdodABAhoAAAAAAAAAJZnJhbWVyYXRlAEA4/7DoLwW3AA12aWRlb2RhdGFyYXRlAECe1DLgjcobAAx2aWRlb2NvZGVjaWQAQBwAAAAAAAAADWF1ZGlvZGF0YXJhdGUAQGSimlvaPKQADGF1ZGlvY29kZWNpZABAJAAAAAAAAAAACQ==</metadata>
+    </media>
+</manifest>

+ 14 - 0
test/testdata/m3u8/pluzz_francetv_11507.m3u8

@@ -0,0 +1,14 @@
+#EXTM3U
+    
#EXT-X-VERSION:5
+    
#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Francais",DEFAULT=NO,FORCED=NO,URI="http://replayftv-pmd.francetv.fr/subtitles/2017/16/156589847-1492488987.m3u8",LANGUAGE="fra"
+    
#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="aac",LANGUAGE="fra",NAME="Francais",DEFAULT=YES, AUTOSELECT=YES
+#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=180000,RESOLUTION=256x144,CODECS="avc1.66.30, mp4a.40.2"
+http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_0_av.m3u8?null=0
+#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=303000,RESOLUTION=320x180,CODECS="avc1.66.30, mp4a.40.2"
+http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_1_av.m3u8?null=0
+#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=575000,RESOLUTION=512x288,CODECS="avc1.66.30, mp4a.40.2"
+http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_2_av.m3u8?null=0
+#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=831000,RESOLUTION=704x396,CODECS="avc1.77.30, mp4a.40.2"
+http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_3_av.m3u8?null=0
+#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=1467000,RESOLUTION=1024x576,CODECS="avc1.77.30, mp4a.40.2"
+http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_4_av.m3u8?null=0

+ 16 - 0
test/testdata/m3u8/teamcoco_11995.m3u8

@@ -0,0 +1,16 @@
+#EXTM3U
+#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio-0",NAME="Default",AUTOSELECT=YES,DEFAULT=YES,URI="hls/CONAN_020217_Highlight_show-audio-160k_v4.m3u8"
+#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio-1",NAME="Default",AUTOSELECT=YES,DEFAULT=YES,URI="hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8"
+#EXT-X-I-FRAME-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=37862000,CODECS="avc1.4d001f",URI="hls/CONAN_020217_Highlight_show-2m_iframe.m3u8"
+#EXT-X-I-FRAME-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=18750000,CODECS="avc1.4d001e",URI="hls/CONAN_020217_Highlight_show-1m_iframe.m3u8"
+#EXT-X-I-FRAME-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=6535000,CODECS="avc1.42001e",URI="hls/CONAN_020217_Highlight_show-400k_iframe.m3u8"
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=2374000,RESOLUTION=1024x576,CODECS="avc1.4d001f,mp4a.40.2",AUDIO="audio-0"
+hls/CONAN_020217_Highlight_show-2m_v4.m3u8
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1205000,RESOLUTION=640x360,CODECS="avc1.4d001e,mp4a.40.2",AUDIO="audio-0"
+hls/CONAN_020217_Highlight_show-1m_v4.m3u8
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=522000,RESOLUTION=400x224,CODECS="avc1.42001e,mp4a.40.2",AUDIO="audio-0"
+hls/CONAN_020217_Highlight_show-400k_v4.m3u8
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=413000,RESOLUTION=400x224,CODECS="avc1.42001e,mp4a.40.5",AUDIO="audio-1"
+hls/CONAN_020217_Highlight_show-400k_v4.m3u8
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=71000,CODECS="mp4a.40.5",AUDIO="audio-1"
+hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8

+ 13 - 0
test/testdata/m3u8/toggle_mobile_12211.m3u8

@@ -0,0 +1,13 @@
+#EXTM3U
+#EXT-X-VERSION:4
+#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio",LANGUAGE="eng",NAME="English",URI="http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_sa2ntrdg/name/a.mp4/index.m3u8"
+#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio",LANGUAGE="und",NAME="Undefined",URI="http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_r7y0nitg/name/a.mp4/index.m3u8"
+
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=155648,RESOLUTION=320x180,AUDIO="audio"
+http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_qlk9hlzr/name/a.mp4/index.m3u8
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=502784,RESOLUTION=480x270,AUDIO="audio"
+http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_oefackmi/name/a.mp4/index.m3u8
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=827392,RESOLUTION=640x360,AUDIO="audio"
+http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_vyg9pj7k/name/a.mp4/index.m3u8
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1396736,RESOLUTION=854x480,AUDIO="audio"
+http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_50n4psvx/name/a.mp4/index.m3u8

+ 20 - 0
test/testdata/m3u8/twitch_vod.m3u8

@@ -0,0 +1,20 @@
+#EXTM3U
+#EXT-X-TWITCH-INFO:ORIGIN="s3",CLUSTER="edgecast_vod",REGION="EU",MANIFEST-CLUSTER="edgecast_vod",USER-IP="109.171.17.81"
+#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="chunked",NAME="Source",AUTOSELECT=YES,DEFAULT=YES
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=3214134,CODECS="avc1.100.31,mp4a.40.2",RESOLUTION="1280x720",VIDEO="chunked"
+https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/chunked/index-muted-HM49I092CC.m3u8
+#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="high",NAME="High",AUTOSELECT=YES,DEFAULT=YES
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1603789,CODECS="avc1.42C01F,mp4a.40.2",RESOLUTION="1280x720",VIDEO="high"
+https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/high/index-muted-HM49I092CC.m3u8
+#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="medium",NAME="Medium",AUTOSELECT=YES,DEFAULT=YES
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=893387,CODECS="avc1.42C01E,mp4a.40.2",RESOLUTION="852x480",VIDEO="medium"
+https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/medium/index-muted-HM49I092CC.m3u8
+#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="low",NAME="Low",AUTOSELECT=YES,DEFAULT=YES
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=628347,CODECS="avc1.42C01E,mp4a.40.2",RESOLUTION="640x360",VIDEO="low"
+https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/low/index-muted-HM49I092CC.m3u8
+#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="mobile",NAME="Mobile",AUTOSELECT=YES,DEFAULT=YES
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=280474,CODECS="avc1.42C00D,mp4a.40.2",RESOLUTION="400x226",VIDEO="mobile"
+https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/mobile/index-muted-HM49I092CC.m3u8
+#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="audio_only",NAME="Audio Only",AUTOSELECT=NO,DEFAULT=NO
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=182725,CODECS="mp4a.40.2",VIDEO="audio_only"
+https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/audio_only/index-muted-HM49I092CC.m3u8

+ 10 - 0
test/testdata/m3u8/vidio.m3u8

@@ -0,0 +1,10 @@
+#EXTM3U
+
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=300000,RESOLUTION=480x270,NAME="270p 3G"
+https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b300.mp4.m3u8
+
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=600000,RESOLUTION=640x360,NAME="360p SD"
+https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b600.mp4.m3u8
+
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1200000,RESOLUTION=1280x720,NAME="720p HD"
+https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b1200.mp4.m3u8

+ 18 - 0
test/testdata/mpd/float_duration.mpd

@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<MPD xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="urn:mpeg:dash:schema:mpd:2011" type="static" minBufferTime="PT2S" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" mediaPresentationDuration="PT6014S">
+	<Period bitstreamSwitching="true">
+		<AdaptationSet mimeType="audio/mp4" codecs="mp4a.40.2" startWithSAP="1" segmentAlignment="true">
+			<SegmentTemplate timescale="1000000" presentationTimeOffset="0" initialization="ai_$RepresentationID$.mp4d" media="a_$RepresentationID$_$Number$.mp4d" duration="2000000.0" startNumber="0"></SegmentTemplate>
+			<Representation id="318597" bandwidth="61587"></Representation>
+		</AdaptationSet>
+		<AdaptationSet mimeType="video/mp4" startWithSAP="1" segmentAlignment="true">
+			<SegmentTemplate timescale="1000000" presentationTimeOffset="0" initialization="vi_$RepresentationID$.mp4d" media="v_$RepresentationID$_$Number$.mp4d" duration="2000000.0" startNumber="0"></SegmentTemplate>
+			<Representation id="318597" codecs="avc1.42001f" width="340" height="192" bandwidth="318597"></Representation>
+			<Representation id="638590" codecs="avc1.42001f" width="512" height="288" bandwidth="638590"></Representation>
+			<Representation id="1022565" codecs="avc1.4d001f" width="688" height="384" bandwidth="1022565"></Representation>
+			<Representation id="2046506" codecs="avc1.4d001f" width="1024" height="576" bandwidth="2046506"></Representation>
+			<Representation id="3998017" codecs="avc1.640029" width="1280" height="720" bandwidth="3998017"></Representation>
+			<Representation id="5997485" codecs="avc1.640032" width="1920" height="1080" bandwidth="5997485"></Representation>
+		</AdaptationSet>
+	</Period>
+</MPD>

+ 218 - 0
test/testdata/mpd/urls_only.mpd

@@ -0,0 +1,218 @@
+<?xml version="1.0" ?>
+<MPD maxSegmentDuration="PT0H0M10.000S" mediaPresentationDuration="PT0H4M1.728S" minBufferTime="PT1.500S" profiles="urn:mpeg:dash:profile:isoff-main:2011" type="static" xmlns="urn:mpeg:dash:schema:mpd:2011">
+  <Period duration="PT0H4M1.728S">
+    <AdaptationSet bitstreamSwitching="true" lang="und" maxHeight="1080" maxWidth="1920" par="16:9" segmentAlignment="true">
+      <ContentComponent contentType="video" id="1"/>
+      <Representation audioSamplingRate="44100" bandwidth="200000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="144" id="h264_aac_144p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="256">
+        <SegmentList duration="10000" timescale="1000">
+          <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/init/432f65a0.mp4"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/0/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/1/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/2/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/3/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/4/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/5/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/6/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/7/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/8/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/9/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/10/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/11/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/12/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/13/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/14/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/15/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/16/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/17/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/18/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/19/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/20/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/21/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/22/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/23/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/24/432f65a0.m4s"/>
+        </SegmentList>
+      </Representation>
+      <Representation audioSamplingRate="44100" bandwidth="400000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="240" id="h264_aac_240p_m4s" mimeType="video/mp4" sar="160:159" startWithSAP="1" width="424">
+        <SegmentList duration="10000" timescale="1000">
+          <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/init/432f65a0.mp4"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/0/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/1/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/2/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/3/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/4/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/5/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/6/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/7/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/8/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/9/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/10/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/11/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/12/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/13/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/14/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/15/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/16/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/17/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/18/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/19/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/20/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/21/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/22/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/23/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/24/432f65a0.m4s"/>
+        </SegmentList>
+      </Representation>
+      <Representation audioSamplingRate="44100" bandwidth="800000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="360" id="h264_aac_360p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="640">
+        <SegmentList duration="10000" timescale="1000">
+          <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/init/432f65a0.mp4"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/0/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/1/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/2/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/3/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/4/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/5/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/6/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/7/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/8/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/9/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/10/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/11/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/12/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/13/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/14/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/15/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/16/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/17/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/18/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/19/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/20/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/21/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/22/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/23/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/24/432f65a0.m4s"/>
+        </SegmentList>
+      </Representation>
+      <Representation audioSamplingRate="44100" bandwidth="1200000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="480" id="h264_aac_480p_m4s" mimeType="video/mp4" sar="320:321" startWithSAP="1" width="856">
+        <SegmentList duration="10000" timescale="1000">
+          <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/init/432f65a0.mp4"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/0/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/1/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/2/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/3/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/4/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/5/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/6/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/7/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/8/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/9/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/10/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/11/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/12/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/13/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/14/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/15/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/16/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/17/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/18/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/19/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/20/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/21/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/22/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/23/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/24/432f65a0.m4s"/>
+        </SegmentList>
+      </Representation>
+      <Representation audioSamplingRate="44100" bandwidth="1600000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="576" id="h264_aac_576p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="1024">
+        <SegmentList duration="10000" timescale="1000">
+          <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/init/432f65a0.mp4"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/0/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/1/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/2/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/3/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/4/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/5/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/6/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/7/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/8/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/9/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/10/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/11/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/12/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/13/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/14/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/15/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/16/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/17/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/18/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/19/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/20/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/21/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/22/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/23/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/24/432f65a0.m4s"/>
+        </SegmentList>
+      </Representation>
+      <Representation audioSamplingRate="44100" bandwidth="2400000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="720" id="h264_aac_720p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="1280">
+        <SegmentList duration="10000" timescale="1000">
+          <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/init/432f65a0.mp4"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/0/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/1/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/2/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/3/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/4/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/5/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/6/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/7/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/8/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/9/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/10/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/11/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/12/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/13/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/14/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/15/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/16/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/17/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/18/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/19/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/20/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/21/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/22/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/23/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/24/432f65a0.m4s"/>
+        </SegmentList>
+      </Representation>
+      <Representation audioSamplingRate="44100" bandwidth="4400000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="1080" id="h264_aac_1080p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="1920">
+        <SegmentList duration="10000" timescale="1000">
+          <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/init/432f65a0.mp4"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/0/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/1/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/2/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/3/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/4/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/5/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/6/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/7/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/8/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/9/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/10/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/11/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/12/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/13/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/14/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/15/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/16/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/17/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/18/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/19/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/20/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/21/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/22/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/23/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/24/432f65a0.m4s"/>
+        </SegmentList>
+      </Representation>
+    </AdaptationSet>
+  </Period>
+</MPD>

+ 291 - 85
youtube_dl/YoutubeDL.py

@@ -24,14 +24,17 @@ import sys
 import time
 import tokenize
 import traceback
+import random
+
+from string import ascii_letters
 
 from .compat import (
     compat_basestring,
     compat_cookiejar,
-    compat_expanduser,
     compat_get_terminal_size,
     compat_http_client,
     compat_kwargs,
+    compat_numeric_types,
     compat_os_name,
     compat_str,
     compat_tokenize_tokenize,
@@ -52,12 +55,17 @@ from .utils import (
     encode_compat_str,
     encodeFilename,
     error_to_compat_str,
+    expand_path,
     ExtractorError,
     format_bytes,
     formatSeconds,
+    GeoRestrictedError,
+    int_or_none,
+    ISO3166Utils,
     locked_file,
     make_HTTPS_handler,
     MaxDownloadsReached,
+    orderedSet,
     PagedList,
     parse_filesize,
     PerRequestProxyHandler,
@@ -85,6 +93,7 @@ from .utils import (
 )
 from .cache import Cache
 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
+from .extractor.openload import PhantomJSwrapper
 from .downloader import get_suitable_downloader
 from .downloader.rtmp import rtmpdump_version
 from .postprocessor import (
@@ -159,6 +168,7 @@ class YoutubeDL(object):
     playlistend:       Playlist item to end at.
     playlist_items:    Specific indices of playlist to download.
     playlistreverse:   Download playlist items in reverse order.
+    playlistrandom:    Download playlist items in random order.
     matchtitle:        Download only matching titles.
     rejecttitle:       Reject downloads for matching titles.
     logger:            Log messages to a logging.Logger instance.
@@ -270,6 +280,12 @@ class YoutubeDL(object):
                        If it returns None, the video is downloaded.
                        match_filter_func in utils.py is one example for this.
     no_color:          Do not emit color codes in output.
+    geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
+                       HTTP header (experimental)
+    geo_bypass_country:
+                       Two-letter ISO 3166-2 country code that will be used for
+                       explicit geographic restriction bypassing via faking
+                       X-Forwarded-For HTTP header (experimental)
 
     The following options determine which downloader is picked:
     external_downloader: Executable of the external downloader to call.
@@ -289,8 +305,25 @@ class YoutubeDL(object):
                        otherwise prefer avconv.
     postprocessor_args: A list of additional command-line arguments for the
                         postprocessor.
+
+    The following options are used by the Youtube extractor:
+    youtube_include_dash_manifest: If True (default), DASH manifests and related
+                        data will be downloaded and processed by extractor.
+                        You can reduce network I/O by disabling it if you don't
+                        care about DASH.
     """
 
+    _NUMERIC_FIELDS = set((
+        'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
+        'timestamp', 'upload_year', 'upload_month', 'upload_day',
+        'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
+        'average_rating', 'comment_count', 'age_limit',
+        'start_time', 'end_time',
+        'chapter_number', 'season_number', 'episode_number',
+        'track_number', 'disc_number', 'release_year',
+        'playlist_index',
+    ))
+
     params = None
     _ies = []
     _pps = []
@@ -317,11 +350,21 @@ class YoutubeDL(object):
         self.params.update(params)
         self.cache = Cache(self)
 
-        if self.params.get('cn_verification_proxy') is not None:
-            self.report_warning('--cn-verification-proxy is deprecated. Use --geo-verification-proxy instead.')
+        def check_deprecated(param, option, suggestion):
+            if self.params.get(param) is not None:
+                self.report_warning(
+                    '%s is deprecated. Use %s instead.' % (option, suggestion))
+                return True
+            return False
+
+        if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
             if self.params.get('geo_verification_proxy') is None:
                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 
+        check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
+        check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
+        check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
+
         if params.get('bidi_workaround', False):
             try:
                 import pty
@@ -349,10 +392,10 @@ class YoutubeDL(object):
                 else:
                     raise
 
-        if (sys.version_info >= (3,) and sys.platform != 'win32' and
+        if (sys.platform != 'win32' and
                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
                 not params.get('restrictfilenames', False)):
-            # On Python 3, the Unicode filesystem API will throw errors (#1474)
+            # Unicode filesystem API will throw errors (#1474, #13027)
             self.report_warning(
                 'Assuming --restrict-filenames since file system encoding '
                 'cannot encode all characters. '
@@ -477,24 +520,25 @@ class YoutubeDL(object):
     def to_console_title(self, message):
         if not self.params.get('consoletitle', False):
             return
-        if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
-            # c_wchar_p() might not be necessary if `message` is
-            # already of type unicode()
-            ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
+        if compat_os_name == 'nt':
+            if ctypes.windll.kernel32.GetConsoleWindow():
+                # c_wchar_p() might not be necessary if `message` is
+                # already of type unicode()
+                ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
         elif 'TERM' in os.environ:
             self._write_string('\033]0;%s\007' % message, self._screen_file)
 
     def save_console_title(self):
         if not self.params.get('consoletitle', False):
             return
-        if 'TERM' in os.environ:
+        if compat_os_name != 'nt' and 'TERM' in os.environ:
             # Save the title on stack
             self._write_string('\033[22;0t', self._screen_file)
 
     def restore_console_title(self):
         if not self.params.get('consoletitle', False):
             return
-        if 'TERM' in os.environ:
+        if compat_os_name != 'nt' and 'TERM' in os.environ:
             # Restore the title from stack
             self._write_string('\033[23;0t', self._screen_file)
 
@@ -583,10 +627,7 @@ class YoutubeDL(object):
             autonumber_size = self.params.get('autonumber_size')
             if autonumber_size is None:
                 autonumber_size = 5
-            autonumber_templ = '%0' + str(autonumber_size) + 'd'
-            template_dict['autonumber'] = autonumber_templ % self._num_downloads
-            if template_dict.get('playlist_index') is not None:
-                template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
+            template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
             if template_dict.get('resolution') is None:
                 if template_dict.get('width') and template_dict.get('height'):
                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
@@ -598,15 +639,64 @@ class YoutubeDL(object):
             sanitize = lambda k, v: sanitize_filename(
                 compat_str(v),
                 restricted=self.params.get('restrictfilenames'),
-                is_id=(k == 'id'))
-            template_dict = dict((k, sanitize(k, v))
+                is_id=(k == 'id' or k.endswith('_id')))
+            template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
                                  for k, v in template_dict.items()
                                  if v is not None and not isinstance(v, (list, tuple, dict)))
             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 
             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
-            tmpl = compat_expanduser(outtmpl)
-            filename = tmpl % template_dict
+
+            # For fields playlist_index and autonumber convert all occurrences
+            # of %(field)s to %(field)0Nd for backward compatibility
+            field_size_compat_map = {
+                'playlist_index': len(str(template_dict['n_entries'])),
+                'autonumber': autonumber_size,
+            }
+            FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
+            mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
+            if mobj:
+                outtmpl = re.sub(
+                    FIELD_SIZE_COMPAT_RE,
+                    r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
+                    outtmpl)
+
+            # Missing numeric fields used together with integer presentation types
+            # in format specification will break the argument substitution since
+            # string 'NA' is returned for missing fields. We will patch output
+            # template for missing fields to meet string presentation type.
+            for numeric_field in self._NUMERIC_FIELDS:
+                if numeric_field not in template_dict:
+                    # As of [1] format syntax is:
+                    #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
+                    # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
+                    FORMAT_RE = r'''(?x)
+                        (?<!%)
+                        %
+                        \({0}\)  # mapping key
+                        (?:[#0\-+ ]+)?  # conversion flags (optional)
+                        (?:\d+)?  # minimum field width (optional)
+                        (?:\.\d+)?  # precision (optional)
+                        [hlL]?  # length modifier (optional)
+                        [diouxXeEfFgGcrs%]  # conversion type
+                    '''
+                    outtmpl = re.sub(
+                        FORMAT_RE.format(numeric_field),
+                        r'%({0})s'.format(numeric_field), outtmpl)
+
+            # expand_path translates '%%' into '%' and '$$' into '$'
+            # correspondingly that is not what we want since we need to keep
+            # '%%' intact for template dict substitution step. Working around
+            # with boundary-alike separator hack.
+            sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
+            outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
+
+            # outtmpl should be expand_path'ed before template dict substitution
+            # because meta fields may contain env variables we don't want to
+            # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
+            # title "Hello $PATH", we don't want `$PATH` to be expanded.
+            filename = expand_path(outtmpl).replace(sep, '') % template_dict
+
             # Temporary fix for #4787
             # 'Treat' all problem characters by passing filename through preferredencoding
             # to workaround encoding issues with subprocess on python2 @ Windows
@@ -705,6 +795,14 @@ class YoutubeDL(object):
                     return self.process_ie_result(ie_result, download, extra_info)
                 else:
                     return ie_result
+            except GeoRestrictedError as e:
+                msg = e.msg
+                if e.countries:
+                    msg += '\nThis video is available in %s.' % ', '.join(
+                        map(ISO3166Utils.short2full, e.countries))
+                msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
+                self.report_error(msg)
+                break
             except ExtractorError as e:  # An error we somewhat expected
                 self.report_error(compat_str(e), e.format_traceback())
                 break
@@ -762,19 +860,32 @@ class YoutubeDL(object):
                 ie_result['url'], ie_key=ie_result.get('ie_key'),
                 extra_info=extra_info, download=False, process=False)
 
+            # extract_info may return None when ignoreerrors is enabled and
+            # extraction failed with an error, don't crash and return early
+            # in this case
+            if not info:
+                return info
+
             force_properties = dict(
                 (k, v) for k, v in ie_result.items() if v is not None)
-            for f in ('_type', 'url', 'ie_key'):
+            for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
                 if f in force_properties:
                     del force_properties[f]
             new_result = info.copy()
             new_result.update(force_properties)
 
-            assert new_result.get('_type') != 'url_transparent'
+            # Extracted info may not be a video result (i.e.
+            # info.get('_type', 'video') != video) but rather an url or
+            # url_transparent. In such cases outer metadata (from ie_result)
+            # should be propagated to inner one (info). For this to happen
+            # _type of info should be overridden with url_transparent. This
+            # fixes issue from https://github.com/rg3/youtube-dl/pull/11163.
+            if new_result.get('_type') == 'url':
+                new_result['_type'] = 'url_transparent'
 
             return self.process_ie_result(
                 new_result, download=download, extra_info=extra_info)
-        elif result_type == 'playlist' or result_type == 'multi_video':
+        elif result_type in ('playlist', 'multi_video'):
             # We process each entry in the playlist
             playlist = ie_result.get('title') or ie_result.get('id')
             self.to_screen('[download] Downloading playlist: %s' % playlist)
@@ -798,15 +909,25 @@ class YoutubeDL(object):
                                 yield int(item)
                         else:
                             yield int(string_segment)
-                playlistitems = iter_playlistitems(playlistitems_str)
+                playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
 
             ie_entries = ie_result['entries']
+
+            def make_playlistitems_entries(list_ie_entries):
+                num_entries = len(list_ie_entries)
+                return [
+                    list_ie_entries[i - 1] for i in playlistitems
+                    if -num_entries <= i - 1 < num_entries]
+
+            def report_download(num_entries):
+                self.to_screen(
+                    '[%s] playlist %s: Downloading %d videos' %
+                    (ie_result['extractor'], playlist, num_entries))
+
             if isinstance(ie_entries, list):
                 n_all_entries = len(ie_entries)
                 if playlistitems:
-                    entries = [
-                        ie_entries[i - 1] for i in playlistitems
-                        if -n_all_entries <= i - 1 < n_all_entries]
+                    entries = make_playlistitems_entries(ie_entries)
                 else:
                     entries = ie_entries[playliststart:playlistend]
                 n_entries = len(entries)
@@ -824,31 +945,38 @@ class YoutubeDL(object):
                     entries = ie_entries.getslice(
                         playliststart, playlistend)
                 n_entries = len(entries)
-                self.to_screen(
-                    '[%s] playlist %s: Downloading %d videos' %
-                    (ie_result['extractor'], playlist, n_entries))
+                report_download(n_entries)
             else:  # iterable
                 if playlistitems:
-                    entry_list = list(ie_entries)
-                    entries = [entry_list[i - 1] for i in playlistitems]
+                    entries = make_playlistitems_entries(list(itertools.islice(
+                        ie_entries, 0, max(playlistitems))))
                 else:
                     entries = list(itertools.islice(
                         ie_entries, playliststart, playlistend))
                 n_entries = len(entries)
-                self.to_screen(
-                    '[%s] playlist %s: Downloading %d videos' %
-                    (ie_result['extractor'], playlist, n_entries))
+                report_download(n_entries)
 
             if self.params.get('playlistreverse', False):
                 entries = entries[::-1]
 
+            if self.params.get('playlistrandom', False):
+                random.shuffle(entries)
+
+            x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
+
             for i, entry in enumerate(entries, 1):
                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
+                # This __x_forwarded_for_ip thing is a bit ugly but requires
+                # minimal changes
+                if x_forwarded_for:
+                    entry['__x_forwarded_for_ip'] = x_forwarded_for
                 extra = {
                     'n_entries': n_entries,
                     'playlist': playlist,
                     'playlist_id': ie_result.get('id'),
                     'playlist_title': ie_result.get('title'),
+                    'playlist_uploader': ie_result.get('uploader'),
+                    'playlist_uploader_id': ie_result.get('uploader_id'),
                     'playlist_index': i + playliststart,
                     'extractor': ie_result['extractor'],
                     'webpage_url': ie_result['webpage_url'],
@@ -952,6 +1080,30 @@ class YoutubeDL(object):
             return op(actual_value, comparison_value)
         return _filter
 
+    def _default_format_spec(self, info_dict, download=True):
+
+        def can_merge():
+            merger = FFmpegMergerPP(self)
+            return merger.available and merger.can_merge()
+
+        def prefer_best():
+            if self.params.get('simulate', False):
+                return False
+            if not download:
+                return False
+            if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
+                return True
+            if info_dict.get('is_live'):
+                return True
+            if not can_merge():
+                return True
+            return False
+
+        req_format_list = ['bestvideo+bestaudio', 'best']
+        if prefer_best():
+            req_format_list.reverse()
+        return '/'.join(req_format_list)
+
     def build_format_selector(self, format_spec):
         def syntax_error(note, start):
             message = (
@@ -1228,6 +1380,11 @@ class YoutubeDL(object):
         if cookies:
             res['Cookie'] = cookies
 
+        if 'X-Forwarded-For' not in res:
+            x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
+            if x_forwarded_for_ip:
+                res['X-Forwarded-For'] = x_forwarded_for_ip
+
         return res
 
     def _calc_cookies(self, info_dict):
@@ -1243,9 +1400,28 @@ class YoutubeDL(object):
         if 'title' not in info_dict:
             raise ExtractorError('Missing "title" field in extractor result')
 
-        if not isinstance(info_dict['id'], compat_str):
-            self.report_warning('"id" field is not a string - forcing string conversion')
-            info_dict['id'] = compat_str(info_dict['id'])
+        def report_force_conversion(field, field_not, conversion):
+            self.report_warning(
+                '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
+                % (field, field_not, conversion))
+
+        def sanitize_string_field(info, string_field):
+            field = info.get(string_field)
+            if field is None or isinstance(field, compat_str):
+                return
+            report_force_conversion(string_field, 'a string', 'string')
+            info[string_field] = compat_str(field)
+
+        def sanitize_numeric_fields(info):
+            for numeric_field in self._NUMERIC_FIELDS:
+                field = info.get(numeric_field)
+                if field is None or isinstance(field, compat_numeric_types):
+                    continue
+                report_force_conversion(numeric_field, 'numeric', 'int')
+                info[numeric_field] = int_or_none(field)
+
+        sanitize_string_field(info_dict, 'id')
+        sanitize_numeric_fields(info_dict)
 
         if 'playlist' not in info_dict:
             # It isn't part of a playlist
@@ -1326,16 +1502,28 @@ class YoutubeDL(object):
         if not formats:
             raise ExtractorError('No video formats found!')
 
+        def is_wellformed(f):
+            url = f.get('url')
+            if not url:
+                self.report_warning(
+                    '"url" field is missing or empty - skipping format, '
+                    'there is an error in extractor')
+                return False
+            if isinstance(url, bytes):
+                sanitize_string_field(f, 'url')
+            return True
+
+        # Filter out malformed formats for better extraction robustness
+        formats = list(filter(is_wellformed, formats))
+
         formats_dict = {}
 
         # We check that all the formats have the format and format_id fields
         for i, format in enumerate(formats):
-            if 'url' not in format:
-                raise ExtractorError('Missing "url" key in result (index %d)' % i)
-
+            sanitize_string_field(format, 'format_id')
+            sanitize_numeric_fields(format)
             format['url'] = sanitize_url(format['url'])
-
-            if format.get('format_id') is None:
+            if not format.get('format_id'):
                 format['format_id'] = compat_str(i)
             else:
                 # Sanitize format_id from characters used in format selector expression
@@ -1363,13 +1551,16 @@ class YoutubeDL(object):
                 format['ext'] = determine_ext(format['url']).lower()
             # Automatically determine protocol if missing (useful for format
             # selection purposes)
-            if 'protocol' not in format:
+            if format.get('protocol') is None:
                 format['protocol'] = determine_protocol(format)
             # Add HTTP headers, so that external programs can use them from the
             # json output
             full_format_info = info_dict.copy()
             full_format_info.update(format)
             format['http_headers'] = self._calc_headers(full_format_info)
+        # Remove private housekeeping stuff
+        if '__x_forwarded_for_ip' in info_dict:
+            del info_dict['__x_forwarded_for_ip']
 
         # TODO Central sorting goes here
 
@@ -1385,14 +1576,10 @@ class YoutubeDL(object):
 
         req_format = self.params.get('format')
         if req_format is None:
-            req_format_list = []
-            if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
-                    not info_dict.get('is_live')):
-                merger = FFmpegMergerPP(self)
-                if merger.available and merger.can_merge():
-                    req_format_list.append('bestvideo+bestaudio')
-            req_format_list.append('best')
-            req_format = '/'.join(req_format_list)
+            req_format = self._default_format_spec(info_dict, download=download)
+            if self.params.get('verbose'):
+                self.to_stdout('[debug] Default format spec: %s' % req_format)
+
         format_selector = self.build_format_selector(req_format)
 
         # While in format selection we may need to have an access to the original
@@ -1544,12 +1731,17 @@ class YoutubeDL(object):
         if filename is None:
             return
 
-        try:
-            dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
-            if dn and not os.path.exists(dn):
-                os.makedirs(dn)
-        except (OSError, IOError) as err:
-            self.report_error('unable to create directory ' + error_to_compat_str(err))
+        def ensure_dir_exists(path):
+            try:
+                dn = os.path.dirname(path)
+                if dn and not os.path.exists(dn):
+                    os.makedirs(dn)
+                return True
+            except (OSError, IOError) as err:
+                self.report_error('unable to create directory ' + error_to_compat_str(err))
+                return False
+
+        if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
             return
 
         if self.params.get('writedescription', False):
@@ -1592,29 +1784,30 @@ class YoutubeDL(object):
             ie = self.get_info_extractor(info_dict['extractor_key'])
             for sub_lang, sub_info in subtitles.items():
                 sub_format = sub_info['ext']
-                if sub_info.get('data') is not None:
-                    sub_data = sub_info['data']
+                sub_filename = subtitles_filename(filename, sub_lang, sub_format)
+                if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
+                    self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
                 else:
-                    try:
-                        sub_data = ie._download_webpage(
-                            sub_info['url'], info_dict['id'], note=False)
-                    except ExtractorError as err:
-                        self.report_warning('Unable to download subtitle for "%s": %s' %
-                                            (sub_lang, error_to_compat_str(err.cause)))
-                        continue
-                try:
-                    sub_filename = subtitles_filename(filename, sub_lang, sub_format)
-                    if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
-                        self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
+                    self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
+                    if sub_info.get('data') is not None:
+                        try:
+                            # Use newline='' to prevent conversion of newline characters
+                            # See https://github.com/rg3/youtube-dl/issues/10268
+                            with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
+                                subfile.write(sub_info['data'])
+                        except (OSError, IOError):
+                            self.report_error('Cannot write subtitles file ' + sub_filename)
+                            return
                     else:
-                        self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
-                        # Use newline='' to prevent conversion of newline characters
-                        # See https://github.com/rg3/youtube-dl/issues/10268
-                        with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
-                            subfile.write(sub_data)
-                except (OSError, IOError):
-                    self.report_error('Cannot write subtitles file ' + sub_filename)
-                    return
+                        try:
+                            sub_data = ie._request_webpage(
+                                sub_info['url'], info_dict['id'], note=False).read()
+                            with io.open(encodeFilename(sub_filename), 'wb') as subfile:
+                                subfile.write(sub_data)
+                        except (ExtractorError, IOError, OSError, ValueError) as err:
+                            self.report_warning('Unable to download subtitle for "%s": %s' %
+                                                (sub_lang, error_to_compat_str(err)))
+                            continue
 
         if self.params.get('writeinfojson', False):
             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
@@ -1687,8 +1880,11 @@ class YoutubeDL(object):
                         for f in requested_formats:
                             new_info = dict(info_dict)
                             new_info.update(f)
-                            fname = self.prepare_filename(new_info)
-                            fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
+                            fname = prepend_extension(
+                                self.prepare_filename(new_info),
+                                'f%s' % f['format_id'], new_info['ext'])
+                            if not ensure_dir_exists(fname):
+                                return
                             downloaded.append(fname)
                             partial_success = dl(fname, new_info)
                             success = success and partial_success
@@ -1755,7 +1951,7 @@ class YoutubeDL(object):
                         info_dict.get('protocol') == 'm3u8' and
                         self.params.get('hls_prefer_native')):
                     if fixup_policy == 'warn':
-                        self.report_warning('%s: malformated aac bitstream.' % (
+                        self.report_warning('%s: malformed AAC bitstream detected.' % (
                             info_dict['id']))
                     elif fixup_policy == 'detect_or_warn':
                         fixup_pp = FFmpegFixupM3u8PP(self)
@@ -1764,7 +1960,7 @@ class YoutubeDL(object):
                             info_dict['__postprocessors'].append(fixup_pp)
                         else:
                             self.report_warning(
-                                '%s: malformated aac bitstream. %s'
+                                '%s: malformed AAC bitstream detected. %s'
                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
                     else:
                         assert fixup_policy in ('ignore', 'never')
@@ -1780,6 +1976,7 @@ class YoutubeDL(object):
         """Download a given list of URLs."""
         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
         if (len(url_list) > 1 and
+                outtmpl != '-' and
                 '%' not in outtmpl and
                 self.params.get('max_downloads') != 1):
             raise SameFileError(outtmpl)
@@ -2036,11 +2233,20 @@ class YoutubeDL(object):
                 sys.exc_clear()
             except Exception:
                 pass
-        self._write_string('[debug] Python version %s - %s\n' % (
-            platform.python_version(), platform_name()))
+
+        def python_implementation():
+            impl_name = platform.python_implementation()
+            if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
+                return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
+            return impl_name
+
+        self._write_string('[debug] Python version %s (%s) - %s\n' % (
+            platform.python_version(), python_implementation(),
+            platform_name()))
 
         exe_versions = FFmpegPostProcessor.get_versions(self)
         exe_versions['rtmpdump'] = rtmpdump_version()
+        exe_versions['phantomjs'] = PhantomJSwrapper._version()
         exe_str = ', '.join(
             '%s %s' % (exe, v)
             for exe, v in sorted(exe_versions.items())
@@ -2077,7 +2283,7 @@ class YoutubeDL(object):
         if opts_cookiefile is None:
             self.cookiejar = compat_cookiejar.CookieJar()
         else:
-            opts_cookiefile = compat_expanduser(opts_cookiefile)
+            opts_cookiefile = expand_path(opts_cookiefile)
             self.cookiejar = compat_cookiejar.MozillaCookieJar(
                 opts_cookiefile)
             if os.access(opts_cookiefile, os.R_OK):

+ 30 - 9
youtube_dl/__init__.py

@@ -16,7 +16,6 @@ from .options import (
     parseOpts,
 )
 from .compat import (
-    compat_expanduser,
     compat_getpass,
     compat_shlex_split,
     workaround_optparse_bug9161,
@@ -26,6 +25,7 @@ from .utils import (
     decodeOption,
     DEFAULT_OUTTMPL,
     DownloadError,
+    expand_path,
     match_filter_func,
     MaxDownloadsReached,
     preferredencoding,
@@ -88,7 +88,7 @@ def _real_main(argv=None):
                 batchfd = sys.stdin
             else:
                 batchfd = io.open(
-                    compat_expanduser(opts.batchfile),
+                    expand_path(opts.batchfile),
                     'r', encoding='utf-8', errors='ignore')
             batch_urls = read_batch_urls(batchfd)
             if opts.verbose:
@@ -133,6 +133,12 @@ def _real_main(argv=None):
         parser.error('TV Provider account username missing\n')
     if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid):
         parser.error('using output template conflicts with using title, video ID or auto number')
+    if opts.autonumber_size is not None:
+        if opts.autonumber_size <= 0:
+            parser.error('auto number size must be positive')
+    if opts.autonumber_start is not None:
+        if opts.autonumber_start < 0:
+            parser.error('auto number start must be positive or 0')
     if opts.usetitle and opts.useid:
         parser.error('using title conflicts with using video ID')
     if opts.username is not None and opts.password is None:
@@ -190,7 +196,7 @@ def _real_main(argv=None):
     if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart:
         raise ValueError('Playlist end must be greater than playlist start')
     if opts.extractaudio:
-        if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
+        if opts.audioformat not in ['best', 'aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
             parser.error('invalid audio format specified')
     if opts.audioquality:
         opts.audioquality = opts.audioquality.strip('k').strip('K')
@@ -200,7 +206,7 @@ def _real_main(argv=None):
         if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv', 'avi']:
             parser.error('invalid video recode format specified')
     if opts.convertsubtitles is not None:
-        if opts.convertsubtitles not in ['srt', 'vtt', 'ass']:
+        if opts.convertsubtitles not in ['srt', 'vtt', 'ass', 'lrc']:
             parser.error('invalid subtitle format specified')
 
     if opts.date is not None:
@@ -232,18 +238,15 @@ def _real_main(argv=None):
 
     any_getting = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
     any_printing = opts.print_json
-    download_archive_fn = compat_expanduser(opts.download_archive) if opts.download_archive is not None else opts.download_archive
+    download_archive_fn = expand_path(opts.download_archive) if opts.download_archive is not None else opts.download_archive
 
     # PostProcessors
     postprocessors = []
-    # Add the metadata pp first, the other pps will copy it
     if opts.metafromtitle:
         postprocessors.append({
             'key': 'MetadataFromTitle',
             'titleformat': opts.metafromtitle
         })
-    if opts.addmetadata:
-        postprocessors.append({'key': 'FFmpegMetadata'})
     if opts.extractaudio:
         postprocessors.append({
             'key': 'FFmpegExtractAudio',
@@ -256,6 +259,16 @@ def _real_main(argv=None):
             'key': 'FFmpegVideoConvertor',
             'preferedformat': opts.recodevideo,
         })
+    # FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and
+    # FFmpegExtractAudioPP as containers before conversion may not support
+    # metadata (3gp, webm, etc.)
+    # And this post-processor should be placed before other metadata
+    # manipulating post-processors (FFmpegEmbedSubtitle) to prevent loss of
+    # extra metadata. By default ffmpeg preserves metadata applicable for both
+    # source and target containers. From this point the container won't change,
+    # so metadata can be added here.
+    if opts.addmetadata:
+        postprocessors.append({'key': 'FFmpegMetadata'})
     if opts.convertsubtitles:
         postprocessors.append({
             'key': 'FFmpegSubtitlesConvertor',
@@ -321,6 +334,7 @@ def _real_main(argv=None):
         'listformats': opts.listformats,
         'outtmpl': outtmpl,
         'autonumber_size': opts.autonumber_size,
+        'autonumber_start': opts.autonumber_start,
         'restrictfilenames': opts.restrictfilenames,
         'ignoreerrors': opts.ignoreerrors,
         'force_generic_extractor': opts.force_generic_extractor,
@@ -329,6 +343,7 @@ def _real_main(argv=None):
         'retries': opts.retries,
         'fragment_retries': opts.fragment_retries,
         'skip_unavailable_fragments': opts.skip_unavailable_fragments,
+        'keep_fragments': opts.keep_fragments,
         'buffersize': opts.buffersize,
         'noresizebuffer': opts.noresizebuffer,
         'continuedl': opts.continue_dl,
@@ -337,6 +352,7 @@ def _real_main(argv=None):
         'playliststart': opts.playliststart,
         'playlistend': opts.playlistend,
         'playlistreverse': opts.playlist_reverse,
+        'playlistrandom': opts.playlist_random,
         'noplaylist': opts.noplaylist,
         'logtostderr': opts.outtmpl == '-',
         'consoletitle': opts.consoletitle,
@@ -406,6 +422,11 @@ def _real_main(argv=None):
         'cn_verification_proxy': opts.cn_verification_proxy,
         'geo_verification_proxy': opts.geo_verification_proxy,
         'config_location': opts.config_location,
+        'geo_bypass': opts.geo_bypass,
+        'geo_bypass_country': opts.geo_bypass_country,
+        # just for deprecation check
+        'autonumber': opts.autonumber if opts.autonumber is True else None,
+        'usetitle': opts.usetitle if opts.usetitle is True else None,
     }
 
     with YoutubeDL(ydl_opts) as ydl:
@@ -429,7 +450,7 @@ def _real_main(argv=None):
 
         try:
             if opts.load_info_filename is not None:
-                retcode = ydl.download_with_info_file(compat_expanduser(opts.load_info_filename))
+                retcode = ydl.download_with_info_file(expand_path(opts.load_info_filename))
             else:
                 retcode = ydl.download(all_urls)
         except MaxDownloadsReached:

+ 28 - 0
youtube_dl/aes.py

@@ -60,6 +60,34 @@ def aes_cbc_decrypt(data, key, iv):
     return decrypted_data
 
 
+def aes_cbc_encrypt(data, key, iv):
+    """
+    Encrypt with aes in CBC mode. Using PKCS#7 padding
+
+    @param {int[]} data        cleartext
+    @param {int[]} key         16/24/32-Byte cipher key
+    @param {int[]} iv          16-Byte IV
+    @returns {int[]}           encrypted data
+    """
+    expanded_key = key_expansion(key)
+    block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
+
+    encrypted_data = []
+    previous_cipher_block = iv
+    for i in range(block_count):
+        block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
+        remaining_length = BLOCK_SIZE_BYTES - len(block)
+        block += [remaining_length] * remaining_length
+        mixed_block = xor(block, previous_cipher_block)
+
+        encrypted_block = aes_encrypt(mixed_block, expanded_key)
+        encrypted_data += encrypted_block
+
+        previous_cipher_block = encrypted_block
+
+    return encrypted_data
+
+
 def key_expansion(data):
     """
     Generate key schedule

+ 6 - 3
youtube_dl/cache.py

@@ -8,8 +8,11 @@ import re
 import shutil
 import traceback
 
-from .compat import compat_expanduser, compat_getenv
-from .utils import write_json_file
+from .compat import compat_getenv
+from .utils import (
+    expand_path,
+    write_json_file,
+)
 
 
 class Cache(object):
@@ -21,7 +24,7 @@ class Cache(object):
         if res is None:
             cache_root = compat_getenv('XDG_CACHE_HOME', '~/.cache')
             res = os.path.join(cache_root, 'youtube-dl')
-        return compat_expanduser(res)
+        return expand_path(res)
 
     def _get_cache_fn(self, section, key, dtype):
         assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \

+ 86 - 12
youtube_dl/compat.py

@@ -3,11 +3,14 @@ from __future__ import unicode_literals
 
 import binascii
 import collections
+import ctypes
 import email
 import getpass
 import io
+import itertools
 import optparse
 import os
+import platform
 import re
 import shlex
 import shutil
@@ -15,7 +18,6 @@ import socket
 import struct
 import subprocess
 import sys
-import itertools
 import xml.etree.ElementTree
 
 
@@ -2322,6 +2324,19 @@ try:
 except ImportError:  # Python 2
     from HTMLParser import HTMLParser as compat_HTMLParser
 
+try:  # Python 2
+    from HTMLParser import HTMLParseError as compat_HTMLParseError
+except ImportError:  # Python <3.4
+    try:
+        from html.parser import HTMLParseError as compat_HTMLParseError
+    except ImportError:  # Python >3.4
+
+        # HTMLParseError has been deprecated in Python 3.3 and removed in
+        # Python 3.5. Introducing dummy exception for Python >3.5 for compatible
+        # and uniform cross-version exceptiong handling
+        class compat_HTMLParseError(Exception):
+            pass
+
 try:
     from subprocess import DEVNULL
     compat_subprocess_get_DEVNULL = lambda: DEVNULL
@@ -2529,6 +2544,24 @@ else:
                 el.text = el.text.decode('utf-8')
         return doc
 
+if hasattr(etree, 'register_namespace'):
+    compat_etree_register_namespace = etree.register_namespace
+else:
+    def compat_etree_register_namespace(prefix, uri):
+        """Register a namespace prefix.
+        The registry is global, and any existing mapping for either the
+        given prefix or the namespace URI will be removed.
+        *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and
+        attributes in this namespace will be serialized with prefix if possible.
+        ValueError is raised if prefix is reserved or is invalid.
+        """
+        if re.match(r"ns\d+$", prefix):
+            raise ValueError("Prefix format reserved for internal use")
+        for k, v in list(etree._namespace_map.items()):
+            if k == uri or v == prefix:
+                del etree._namespace_map[k]
+        etree._namespace_map[uri] = prefix
+
 if sys.version_info < (2, 7):
     # Here comes the crazy part: In 2.6, if the xpath is a unicode,
     # .//node does not match if a node is a direct child of . !
@@ -2586,14 +2619,22 @@ except ImportError:  # Python 2
                 parsed_result[name] = [value]
         return parsed_result
 
-try:
-    from shlex import quote as compat_shlex_quote
-except ImportError:  # Python < 3.3
+
+compat_os_name = os._name if os.name == 'java' else os.name
+
+
+if compat_os_name == 'nt':
     def compat_shlex_quote(s):
-        if re.match(r'^[-_\w./]+$', s):
-            return s
-        else:
-            return "'" + s.replace("'", "'\"'\"'") + "'"
+        return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"')
+else:
+    try:
+        from shlex import quote as compat_shlex_quote
+    except ImportError:  # Python < 3.3
+        def compat_shlex_quote(s):
+            if re.match(r'^[-_\w./]+$', s):
+                return s
+            else:
+                return "'" + s.replace("'", "'\"'\"'") + "'"
 
 
 try:
@@ -2618,9 +2659,6 @@ def compat_ord(c):
         return ord(c)
 
 
-compat_os_name = os._name if os.name == 'java' else os.name
-
-
 if sys.version_info >= (3, 0):
     compat_getenv = os.getenv
     compat_expanduser = os.path.expanduser
@@ -2674,7 +2712,7 @@ else:
                 userhome = pwent.pw_dir
             userhome = userhome.rstrip('/')
             return (userhome + path[i:]) or '/'
-    elif compat_os_name == 'nt' or compat_os_name == 'ce':
+    elif compat_os_name in ('nt', 'ce'):
         def compat_expanduser(path):
             """Expand ~ and ~user constructs.
 
@@ -2742,6 +2780,12 @@ else:
     compat_kwargs = lambda kwargs: kwargs
 
 
+try:
+    compat_numeric_types = (int, float, long, complex)
+except NameError:  # Python 3
+    compat_numeric_types = (int, float, complex)
+
+
 if sys.version_info < (2, 7):
     def compat_socket_create_connection(address, timeout, source_address=None):
         host, port = address
@@ -2856,15 +2900,43 @@ else:
     compat_struct_pack = struct.pack
     compat_struct_unpack = struct.unpack
 
+try:
+    from future_builtins import zip as compat_zip
+except ImportError:  # not 2.6+ or is 3.x
+    try:
+        from itertools import izip as compat_zip  # < 2.5 or 3.x
+    except ImportError:
+        compat_zip = zip
+
+if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0):
+    # PyPy2 prior to version 5.4.0 expects byte strings as Windows function
+    # names, see the original PyPy issue [1] and the youtube-dl one [2].
+    # 1. https://bitbucket.org/pypy/pypy/issues/2360/windows-ctypescdll-typeerror-function-name
+    # 2. https://github.com/rg3/youtube-dl/pull/4392
+    def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
+        real = ctypes.WINFUNCTYPE(*args, **kwargs)
+
+        def resf(tpl, *args, **kwargs):
+            funcname, dll = tpl
+            return real((str(funcname), dll), *args, **kwargs)
+
+        return resf
+else:
+    def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
+        return ctypes.WINFUNCTYPE(*args, **kwargs)
+
 
 __all__ = [
+    'compat_HTMLParseError',
     'compat_HTMLParser',
     'compat_HTTPError',
     'compat_basestring',
     'compat_chr',
     'compat_cookiejar',
     'compat_cookies',
+    'compat_ctypes_WINFUNCTYPE',
     'compat_etree_fromstring',
+    'compat_etree_register_namespace',
     'compat_expanduser',
     'compat_get_terminal_size',
     'compat_getenv',
@@ -2876,6 +2948,7 @@ __all__ = [
     'compat_input',
     'compat_itertools_count',
     'compat_kwargs',
+    'compat_numeric_types',
     'compat_ord',
     'compat_os_name',
     'compat_parse_qs',
@@ -2903,5 +2976,6 @@ __all__ = [
     'compat_urlretrieve',
     'compat_xml_parse_error',
     'compat_xpath',
+    'compat_zip',
     'workaround_optparse_bug9161',
 ]

+ 3 - 0
youtube_dl/downloader/__init__.py

@@ -43,6 +43,9 @@ def get_suitable_downloader(info_dict, params={}):
         if ed.can_download(info_dict):
             return ed
 
+    if protocol.startswith('m3u8') and info_dict.get('is_live'):
+        return FFmpegFD
+
     if protocol == 'm3u8' and params.get('hls_prefer_native') is True:
         return HlsFD
 

+ 28 - 25
youtube_dl/downloader/common.py

@@ -8,10 +8,11 @@ import random
 
 from ..compat import compat_os_name
 from ..utils import (
+    decodeArgument,
     encodeFilename,
     error_to_compat_str,
-    decodeArgument,
     format_bytes,
+    shell_quote,
     timeconvert,
 )
 
@@ -187,6 +188,9 @@ class FileDownloader(object):
             return filename[:-len('.part')]
         return filename
 
+    def ytdl_filename(self, filename):
+        return filename + '.ytdl'
+
     def try_rename(self, old_filename, new_filename):
         try:
             if old_filename == new_filename:
@@ -300,11 +304,11 @@ class FileDownloader(object):
         """Report attempt to resume at given byte."""
         self.to_screen('[download] Resuming download at byte %s' % resume_len)
 
-    def report_retry(self, count, retries):
+    def report_retry(self, err, count, retries):
         """Report retry in case of HTTP error 5xx"""
         self.to_screen(
-            '[download] Got server HTTP error. Retrying (attempt %d of %s)...'
-            % (count, self.format_retries(retries)))
+            '[download] Got server HTTP error: %s. Retrying (attempt %d of %s)...'
+            % (error_to_compat_str(err), count, self.format_retries(retries)))
 
     def report_file_already_downloaded(self, file_name):
         """Report file has already been fully downloaded."""
@@ -327,27 +331,31 @@ class FileDownloader(object):
             os.path.exists(encodeFilename(filename))
         )
 
-        continuedl_and_exists = (
-            self.params.get('continuedl', True) and
-            os.path.isfile(encodeFilename(filename)) and
-            not self.params.get('nopart', False)
-        )
-
-        # Check file already present
-        if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists):
-            self.report_file_already_downloaded(filename)
-            self._hook_progress({
-                'filename': filename,
-                'status': 'finished',
-                'total_bytes': os.path.getsize(encodeFilename(filename)),
-            })
-            return True
+        if not hasattr(filename, 'write'):
+            continuedl_and_exists = (
+                self.params.get('continuedl', True) and
+                os.path.isfile(encodeFilename(filename)) and
+                not self.params.get('nopart', False)
+            )
+
+            # Check file already present
+            if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists):
+                self.report_file_already_downloaded(filename)
+                self._hook_progress({
+                    'filename': filename,
+                    'status': 'finished',
+                    'total_bytes': os.path.getsize(encodeFilename(filename)),
+                })
+                return True
 
         min_sleep_interval = self.params.get('sleep_interval')
         if min_sleep_interval:
             max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
             sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
-            self.to_screen('[download] Sleeping %s seconds...' % sleep_interval)
+            self.to_screen(
+                '[download] Sleeping %s seconds...' % (
+                    int(sleep_interval) if sleep_interval.is_integer()
+                    else '%.2f' % sleep_interval))
             time.sleep(sleep_interval)
 
         return self.real_download(filename, info_dict)
@@ -374,10 +382,5 @@ class FileDownloader(object):
         if exe is None:
             exe = os.path.basename(str_args[0])
 
-        try:
-            import pipes
-            shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
-        except ImportError:
-            shell_quote = repr
         self.to_screen('[debug] %s command line: %s' % (
             exe, shell_quote(str_args)))

+ 19 - 31
youtube_dl/downloader/dash.py

@@ -1,13 +1,8 @@
 from __future__ import unicode_literals
 
-import os
-
 from .fragment import FragmentFD
 from ..compat import compat_urllib_error
-from ..utils import (
-    sanitize_open,
-    encodeFilename,
-)
+from ..utils import urljoin
 
 
 class DashSegmentsFD(FragmentFD):
@@ -18,38 +13,39 @@ class DashSegmentsFD(FragmentFD):
     FD_NAME = 'dashsegments'
 
     def real_download(self, filename, info_dict):
-        segments = info_dict['fragments'][:1] if self.params.get(
+        fragment_base_url = info_dict.get('fragment_base_url')
+        fragments = info_dict['fragments'][:1] if self.params.get(
             'test', False) else info_dict['fragments']
 
         ctx = {
             'filename': filename,
-            'total_frags': len(segments),
+            'total_frags': len(fragments),
         }
 
         self._prepare_and_start_frag_download(ctx)
 
-        segments_filenames = []
-
         fragment_retries = self.params.get('fragment_retries', 0)
         skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
 
-        def process_segment(segment, tmp_filename, num):
-            segment_url = segment['url']
-            segment_name = 'Frag%d' % num
-            target_filename = '%s-%s' % (tmp_filename, segment_name)
+        frag_index = 0
+        for i, fragment in enumerate(fragments):
+            frag_index += 1
+            if frag_index <= ctx['fragment_index']:
+                continue
             # In DASH, the first segment contains necessary headers to
             # generate a valid MP4 file, so always abort for the first segment
-            fatal = num == 0 or not skip_unavailable_fragments
+            fatal = i == 0 or not skip_unavailable_fragments
             count = 0
             while count <= fragment_retries:
                 try:
-                    success = ctx['dl'].download(target_filename, {'url': segment_url})
+                    fragment_url = fragment.get('url')
+                    if not fragment_url:
+                        assert fragment_base_url
+                        fragment_url = urljoin(fragment_base_url, fragment['path'])
+                    success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
                     if not success:
                         return False
-                    down, target_sanitized = sanitize_open(target_filename, 'rb')
-                    ctx['dest_stream'].write(down.read())
-                    down.close()
-                    segments_filenames.append(target_sanitized)
+                    self._append_fragment(ctx, frag_content)
                     break
                 except compat_urllib_error.HTTPError as err:
                     # YouTube may often return 404 HTTP error for a fragment causing the
@@ -60,22 +56,14 @@ class DashSegmentsFD(FragmentFD):
                     # HTTP error.
                     count += 1
                     if count <= fragment_retries:
-                        self.report_retry_fragment(err, segment_name, count, fragment_retries)
+                        self.report_retry_fragment(err, frag_index, count, fragment_retries)
             if count > fragment_retries:
                 if not fatal:
-                    self.report_skip_fragment(segment_name)
-                    return True
+                    self.report_skip_fragment(frag_index)
+                    continue
                 self.report_error('giving up after %s fragment retries' % fragment_retries)
                 return False
-            return True
-
-        for i, segment in enumerate(segments):
-            if not process_segment(segment, ctx['tmpfilename'], i):
-                return False
 
         self._finish_frag_download(ctx)
 
-        for segment_file in segments_filenames:
-            os.remove(encodeFilename(segment_file))
-
         return True

+ 37 - 3
youtube_dl/downloader/external.py

@@ -6,7 +6,10 @@ import sys
 import re
 
 from .common import FileDownloader
-from ..compat import compat_setenv
+from ..compat import (
+    compat_setenv,
+    compat_str,
+)
 from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
 from ..utils import (
     cli_option,
@@ -17,6 +20,7 @@ from ..utils import (
     encodeArgument,
     handle_youtubedl_headers,
     check_executable,
+    is_outdated_version,
 )
 
 
@@ -25,7 +29,17 @@ class ExternalFD(FileDownloader):
         self.report_destination(filename)
         tmpfilename = self.temp_name(filename)
 
-        retval = self._call_downloader(tmpfilename, info_dict)
+        try:
+            retval = self._call_downloader(tmpfilename, info_dict)
+        except KeyboardInterrupt:
+            if not info_dict.get('is_live'):
+                raise
+            # Live stream downloading cancellation should be considered as
+            # correct and expected termination thus all postprocessing
+            # should take place
+            retval = 0
+            self.to_screen('[%s] Interrupted by user' % self.get_basename())
+
         if retval == 0:
             fsize = os.path.getsize(encodeFilename(tmpfilename))
             self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize))
@@ -198,6 +212,20 @@ class FFmpegFD(ExternalFD):
 
         args = [ffpp.executable, '-y']
 
+        for log_level in ('quiet', 'verbose'):
+            if self.params.get(log_level, False):
+                args += ['-loglevel', log_level]
+                break
+
+        seekable = info_dict.get('_seekable')
+        if seekable is not None:
+            # setting -seekable prevents ffmpeg from guessing if the server
+            # supports seeking(by adding the header `Range: bytes=0-`), which
+            # can cause problems in some cases
+            # https://github.com/rg3/youtube-dl/issues/11800#issuecomment-275037127
+            # http://trac.ffmpeg.org/ticket/6125#comment:10
+            args += ['-seekable', '1' if seekable else '0']
+
         args += self._configuration_args()
 
         # start_time = info_dict.get('start_time') or 0
@@ -260,11 +288,17 @@ class FFmpegFD(ExternalFD):
                 args += ['-rtmp_live', 'live']
 
         args += ['-i', url, '-c', 'copy']
+
+        if self.params.get('test', False):
+            args += ['-fs', compat_str(self._TEST_FILE_SIZE)]
+
         if protocol in ('m3u8', 'm3u8_native'):
             if self.params.get('hls_use_mpegts', False) or tmpfilename == '-':
                 args += ['-f', 'mpegts']
             else:
-                args += ['-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
+                args += ['-f', 'mp4']
+                if (ffpp.basename == 'ffmpeg' and is_outdated_version(ffpp._versions['ffmpeg'], '3.2', False)) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')):
+                    args += ['-bsf:a', 'aac_adtstoasc']
         elif protocol == 'rtmp':
             args += ['-f', 'flv']
         else:

+ 27 - 31
youtube_dl/downloader/f4m.py

@@ -3,7 +3,6 @@ from __future__ import division, unicode_literals
 import base64
 import io
 import itertools
-import os
 import time
 
 from .fragment import FragmentFD
@@ -16,9 +15,7 @@ from ..compat import (
     compat_struct_unpack,
 )
 from ..utils import (
-    encodeFilename,
     fix_xml_ampersands,
-    sanitize_open,
     xpath_text,
 )
 
@@ -246,8 +243,17 @@ def remove_encrypted_media(media):
                        media))
 
 
-def _add_ns(prop):
-    return '{http://ns.adobe.com/f4m/1.0}%s' % prop
+def _add_ns(prop, ver=1):
+    return '{http://ns.adobe.com/f4m/%d.0}%s' % (ver, prop)
+
+
+def get_base_url(manifest):
+    base_url = xpath_text(
+        manifest, [_add_ns('baseURL'), _add_ns('baseURL', 2)],
+        'base URL', default=None)
+    if base_url:
+        base_url = base_url.strip()
+    return base_url
 
 
 class F4mFD(FragmentFD):
@@ -333,13 +339,13 @@ class F4mFD(FragmentFD):
             rate, media = list(filter(
                 lambda f: int(f[0]) == requested_bitrate, formats))[0]
 
-        base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
+        # Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec.
+        man_base_url = get_base_url(doc) or man_url
+
+        base_url = compat_urlparse.urljoin(man_base_url, media.attrib['url'])
         bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
-        # From Adobe F4M 3.0 spec:
-        # The <baseURL> element SHALL be the base URL for all relative
-        # (HTTP-based) URLs in the manifest. If <baseURL> is not present, said
-        # URLs should be relative to the location of the containing document.
-        boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, man_url)
+        boot_info, bootstrap_url = self._parse_bootstrap_node(
+            bootstrap_node, man_base_url)
         live = boot_info['live']
         metadata_node = media.find(_add_ns('metadata'))
         if metadata_node is not None:
@@ -366,17 +372,21 @@ class F4mFD(FragmentFD):
 
         dest_stream = ctx['dest_stream']
 
-        write_flv_header(dest_stream)
-        if not live:
-            write_metadata_tag(dest_stream, metadata)
+        if ctx['complete_frags_downloaded_bytes'] == 0:
+            write_flv_header(dest_stream)
+            if not live:
+                write_metadata_tag(dest_stream, metadata)
 
         base_url_parsed = compat_urllib_parse_urlparse(base_url)
 
         self._start_frag_download(ctx)
 
-        frags_filenames = []
+        frag_index = 0
         while fragments_list:
             seg_i, frag_i = fragments_list.pop(0)
+            frag_index += 1
+            if frag_index <= ctx['fragment_index']:
+                continue
             name = 'Seg%d-Frag%d' % (seg_i, frag_i)
             query = []
             if base_url_parsed.query:
@@ -386,17 +396,10 @@ class F4mFD(FragmentFD):
             if info_dict.get('extra_param_to_segment_url'):
                 query.append(info_dict['extra_param_to_segment_url'])
             url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query))
-            frag_filename = '%s-%s' % (ctx['tmpfilename'], name)
             try:
-                success = ctx['dl'].download(frag_filename, {
-                    'url': url_parsed.geturl(),
-                    'http_headers': info_dict.get('http_headers'),
-                })
+                success, down_data = self._download_fragment(ctx, url_parsed.geturl(), info_dict)
                 if not success:
                     return False
-                (down, frag_sanitized) = sanitize_open(frag_filename, 'rb')
-                down_data = down.read()
-                down.close()
                 reader = FlvReader(down_data)
                 while True:
                     try:
@@ -411,12 +414,8 @@ class F4mFD(FragmentFD):
                             break
                         raise
                     if box_type == b'mdat':
-                        dest_stream.write(box_data)
+                        self._append_fragment(ctx, box_data)
                         break
-                if live:
-                    os.remove(encodeFilename(frag_sanitized))
-                else:
-                    frags_filenames.append(frag_sanitized)
             except (compat_urllib_error.HTTPError, ) as err:
                 if live and (err.code == 404 or err.code == 410):
                     # We didn't keep up with the live window. Continue
@@ -436,7 +435,4 @@ class F4mFD(FragmentFD):
 
         self._finish_frag_download(ctx)
 
-        for frag_file in frags_filenames:
-            os.remove(encodeFilename(frag_file))
-
         return True

+ 124 - 15
youtube_dl/downloader/fragment.py

@@ -2,6 +2,7 @@ from __future__ import division, unicode_literals
 
 import os
 import time
+import json
 
 from .common import FileDownloader
 from .http import HttpFD
@@ -28,15 +29,37 @@ class FragmentFD(FileDownloader):
                         and hlsnative only)
     skip_unavailable_fragments:
                         Skip unavailable fragments (DASH and hlsnative only)
+    keep_fragments:     Keep downloaded fragments on disk after downloading is
+                        finished
+
+    For each incomplete fragment download youtube-dl keeps on disk a special
+    bookkeeping file with download state and metadata (in future such files will
+    be used for any incomplete download handled by youtube-dl). This file is
+    used to properly handle resuming, check download file consistency and detect
+    potential errors. The file has a .ytdl extension and represents a standard
+    JSON file of the following format:
+
+    extractor:
+        Dictionary of extractor related data. TBD.
+
+    downloader:
+        Dictionary of downloader related data. May contain following data:
+            current_fragment:
+                Dictionary with current (being downloaded) fragment data:
+                index:  0-based index of current fragment among all fragments
+            fragment_count:
+                Total count of fragments
+
+    This feature is experimental and file format may change in future.
     """
 
-    def report_retry_fragment(self, err, fragment_name, count, retries):
+    def report_retry_fragment(self, err, frag_index, count, retries):
         self.to_screen(
-            '[download] Got server HTTP error: %s. Retrying fragment %s (attempt %d of %s)...'
-            % (error_to_compat_str(err), fragment_name, count, self.format_retries(retries)))
+            '[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s)...'
+            % (error_to_compat_str(err), frag_index, count, self.format_retries(retries)))
 
-    def report_skip_fragment(self, fragment_name):
-        self.to_screen('[download] Skipping fragment %s...' % fragment_name)
+    def report_skip_fragment(self, frag_index):
+        self.to_screen('[download] Skipping fragment %d...' % frag_index)
 
     def _prepare_url(self, info_dict, url):
         headers = info_dict.get('http_headers')
@@ -46,12 +69,64 @@ class FragmentFD(FileDownloader):
         self._prepare_frag_download(ctx)
         self._start_frag_download(ctx)
 
+    @staticmethod
+    def __do_ytdl_file(ctx):
+        return not ctx['live'] and not ctx['tmpfilename'] == '-'
+
+    def _read_ytdl_file(self, ctx):
+        stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'r')
+        ctx['fragment_index'] = json.loads(stream.read())['downloader']['current_fragment']['index']
+        stream.close()
+
+    def _write_ytdl_file(self, ctx):
+        frag_index_stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'w')
+        downloader = {
+            'current_fragment': {
+                'index': ctx['fragment_index'],
+            },
+        }
+        if ctx.get('fragment_count') is not None:
+            downloader['fragment_count'] = ctx['fragment_count']
+        frag_index_stream.write(json.dumps({'downloader': downloader}))
+        frag_index_stream.close()
+
+    def _download_fragment(self, ctx, frag_url, info_dict, headers=None):
+        fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index'])
+        success = ctx['dl'].download(fragment_filename, {
+            'url': frag_url,
+            'http_headers': headers or info_dict.get('http_headers'),
+        })
+        if not success:
+            return False, None
+        down, frag_sanitized = sanitize_open(fragment_filename, 'rb')
+        ctx['fragment_filename_sanitized'] = frag_sanitized
+        frag_content = down.read()
+        down.close()
+        return True, frag_content
+
+    def _append_fragment(self, ctx, frag_content):
+        try:
+            ctx['dest_stream'].write(frag_content)
+            ctx['dest_stream'].flush()
+        finally:
+            if self.__do_ytdl_file(ctx):
+                self._write_ytdl_file(ctx)
+            if not self.params.get('keep_fragments', False):
+                os.remove(encodeFilename(ctx['fragment_filename_sanitized']))
+            del ctx['fragment_filename_sanitized']
+
     def _prepare_frag_download(self, ctx):
         if 'live' not in ctx:
             ctx['live'] = False
+        if not ctx['live']:
+            total_frags_str = '%d' % ctx['total_frags']
+            ad_frags = ctx.get('ad_frags', 0)
+            if ad_frags:
+                total_frags_str += ' (not including %d ad)' % ad_frags
+        else:
+            total_frags_str = 'unknown (live)'
         self.to_screen(
-            '[%s] Total fragments: %s'
-            % (self.FD_NAME, ctx['total_frags'] if not ctx['live'] else 'unknown (live)'))
+            '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str))
         self.report_destination(ctx['filename'])
         dl = HttpQuietDownloader(
             self.ydl,
@@ -61,15 +136,46 @@ class FragmentFD(FileDownloader):
                 'noprogress': True,
                 'ratelimit': self.params.get('ratelimit'),
                 'retries': self.params.get('retries', 0),
+                'nopart': self.params.get('nopart', False),
                 'test': self.params.get('test', False),
             }
         )
         tmpfilename = self.temp_name(ctx['filename'])
-        dest_stream, tmpfilename = sanitize_open(tmpfilename, 'wb')
+        open_mode = 'wb'
+        resume_len = 0
+
+        # Establish possible resume length
+        if os.path.isfile(encodeFilename(tmpfilename)):
+            open_mode = 'ab'
+            resume_len = os.path.getsize(encodeFilename(tmpfilename))
+
+        # Should be initialized before ytdl file check
+        ctx.update({
+            'tmpfilename': tmpfilename,
+            'fragment_index': 0,
+        })
+
+        if self.__do_ytdl_file(ctx):
+            if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
+                self._read_ytdl_file(ctx)
+                if ctx['fragment_index'] > 0 and resume_len == 0:
+                    self.report_warning(
+                        'Inconsistent state of incomplete fragment download. '
+                        'Restarting from the beginning...')
+                    ctx['fragment_index'] = resume_len = 0
+                    self._write_ytdl_file(ctx)
+            else:
+                self._write_ytdl_file(ctx)
+                assert ctx['fragment_index'] == 0
+
+        dest_stream, tmpfilename = sanitize_open(tmpfilename, open_mode)
+
         ctx.update({
             'dl': dl,
             'dest_stream': dest_stream,
             'tmpfilename': tmpfilename,
+            # Total complete fragments downloaded so far in bytes
+            'complete_frags_downloaded_bytes': resume_len,
         })
 
     def _start_frag_download(self, ctx):
@@ -78,9 +184,9 @@ class FragmentFD(FileDownloader):
         # hook
         state = {
             'status': 'downloading',
-            'downloaded_bytes': 0,
-            'frag_index': 0,
-            'frag_count': total_frags,
+            'downloaded_bytes': ctx['complete_frags_downloaded_bytes'],
+            'fragment_index': ctx['fragment_index'],
+            'fragment_count': total_frags,
             'filename': ctx['filename'],
             'tmpfilename': ctx['tmpfilename'],
         }
@@ -88,8 +194,6 @@ class FragmentFD(FileDownloader):
         start = time.time()
         ctx.update({
             'started': start,
-            # Total complete fragments downloaded so far in bytes
-            'complete_frags_downloaded_bytes': 0,
             # Amount of fragment's bytes downloaded by the time of the previous
             # frag progress hook invocation
             'prev_frag_downloaded_bytes': 0,
@@ -105,11 +209,12 @@ class FragmentFD(FileDownloader):
             if not ctx['live']:
                 estimated_size = (
                     (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) /
-                    (state['frag_index'] + 1) * total_frags)
+                    (state['fragment_index'] + 1) * total_frags)
                 state['total_bytes_estimate'] = estimated_size
 
             if s['status'] == 'finished':
-                state['frag_index'] += 1
+                state['fragment_index'] += 1
+                ctx['fragment_index'] = state['fragment_index']
                 state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes']
                 ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes']
                 ctx['prev_frag_downloaded_bytes'] = 0
@@ -131,6 +236,10 @@ class FragmentFD(FileDownloader):
 
     def _finish_frag_download(self, ctx):
         ctx['dest_stream'].close()
+        if self.__do_ytdl_file(ctx):
+            ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename']))
+            if os.path.isfile(ytdl_filename):
+                os.remove(ytdl_filename)
         elapsed = time.time() - ctx['started']
         self.try_rename(ctx['tmpfilename'], ctx['filename'])
         fsize = os.path.getsize(encodeFilename(ctx['filename']))

+ 57 - 30
youtube_dl/downloader/hls.py

@@ -1,6 +1,5 @@
 from __future__ import unicode_literals
 
-import os.path
 import re
 import binascii
 try:
@@ -18,8 +17,6 @@ from ..compat import (
     compat_struct_pack,
 )
 from ..utils import (
-    encodeFilename,
-    sanitize_open,
     parse_m3u8_attributes,
     update_url_query,
 )
@@ -34,7 +31,7 @@ class HlsFD(FragmentFD):
     def can_download(manifest, info_dict):
         UNSUPPORTED_FEATURES = (
             r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)',  # encrypted streams [1]
-            r'#EXT-X-BYTERANGE',  # playlists composed of byte ranges of media files [2]
+            # r'#EXT-X-BYTERANGE',  # playlists composed of byte ranges of media files [2]
 
             # Live streams heuristic does not always work (e.g. geo restricted to Germany
             # http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0)
@@ -52,7 +49,9 @@ class HlsFD(FragmentFD):
             # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5
         )
         check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES]
-        check_results.append(can_decrypt_frag or '#EXT-X-KEY:METHOD=AES-128' not in manifest)
+        is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest
+        check_results.append(can_decrypt_frag or not is_aes128_enc)
+        check_results.append(not (is_aes128_enc and r'#EXT-X-BYTERANGE' in manifest))
         check_results.append(not info_dict.get('is_live'))
         return all(check_results)
 
@@ -60,9 +59,9 @@ class HlsFD(FragmentFD):
         man_url = info_dict['url']
         self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
 
-        manifest = self.ydl.urlopen(self._prepare_url(info_dict, man_url)).read()
-
-        s = manifest.decode('utf-8', 'ignore')
+        urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
+        man_url = urlh.geturl()
+        s = urlh.read().decode('utf-8', 'ignore')
 
         if not self.can_download(s, info_dict):
             if info_dict.get('extra_param_to_segment_url'):
@@ -76,15 +75,30 @@ class HlsFD(FragmentFD):
                 fd.add_progress_hook(ph)
             return fd.real_download(filename, info_dict)
 
-        total_frags = 0
+        def anvato_ad(s):
+            return s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s
+
+        media_frags = 0
+        ad_frags = 0
+        ad_frag_next = False
         for line in s.splitlines():
             line = line.strip()
-            if line and not line.startswith('#'):
-                total_frags += 1
+            if not line:
+                continue
+            if line.startswith('#'):
+                if anvato_ad(line):
+                    ad_frags += 1
+                    ad_frag_next = True
+                continue
+            if ad_frag_next:
+                ad_frag_next = False
+                continue
+            media_frags += 1
 
         ctx = {
             'filename': filename,
-            'total_frags': total_frags,
+            'total_frags': media_frags,
+            'ad_frags': ad_frags,
         }
 
         self._prepare_and_start_frag_download(ctx)
@@ -100,31 +114,35 @@ class HlsFD(FragmentFD):
         i = 0
         media_sequence = 0
         decrypt_info = {'METHOD': 'NONE'}
-        frags_filenames = []
+        byte_range = {}
+        frag_index = 0
+        ad_frag_next = False
         for line in s.splitlines():
             line = line.strip()
             if line:
                 if not line.startswith('#'):
+                    if ad_frag_next:
+                        ad_frag_next = False
+                        continue
+                    frag_index += 1
+                    if frag_index <= ctx['fragment_index']:
+                        continue
                     frag_url = (
                         line
                         if re.match(r'^https?://', line)
                         else compat_urlparse.urljoin(man_url, line))
-                    frag_name = 'Frag%d' % i
-                    frag_filename = '%s-%s' % (ctx['tmpfilename'], frag_name)
                     if extra_query:
                         frag_url = update_url_query(frag_url, extra_query)
                     count = 0
+                    headers = info_dict.get('http_headers', {})
+                    if byte_range:
+                        headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'])
                     while count <= fragment_retries:
                         try:
-                            success = ctx['dl'].download(frag_filename, {
-                                'url': frag_url,
-                                'http_headers': info_dict.get('http_headers'),
-                            })
+                            success, frag_content = self._download_fragment(
+                                ctx, frag_url, info_dict, headers)
                             if not success:
                                 return False
-                            down, frag_sanitized = sanitize_open(frag_filename, 'rb')
-                            frag_content = down.read()
-                            down.close()
                             break
                         except compat_urllib_error.HTTPError as err:
                             # Unavailable (possibly temporary) fragments may be served.
@@ -133,28 +151,30 @@ class HlsFD(FragmentFD):
                             # https://github.com/rg3/youtube-dl/issues/10448).
                             count += 1
                             if count <= fragment_retries:
-                                self.report_retry_fragment(err, frag_name, count, fragment_retries)
+                                self.report_retry_fragment(err, frag_index, count, fragment_retries)
                     if count > fragment_retries:
                         if skip_unavailable_fragments:
                             i += 1
                             media_sequence += 1
-                            self.report_skip_fragment(frag_name)
+                            self.report_skip_fragment(frag_index)
                             continue
                         self.report_error(
                             'giving up after %s fragment retries' % fragment_retries)
                         return False
                     if decrypt_info['METHOD'] == 'AES-128':
                         iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
+                        decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
+                            self._prepare_url(info_dict, decrypt_info['URI'])).read()
                         frag_content = AES.new(
                             decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
-                    ctx['dest_stream'].write(frag_content)
-                    frags_filenames.append(frag_sanitized)
+                    self._append_fragment(ctx, frag_content)
                     # We only download the first fragment during the test
                     if test:
                         break
                     i += 1
                     media_sequence += 1
                 elif line.startswith('#EXT-X-KEY'):
+                    decrypt_url = decrypt_info.get('URI')
                     decrypt_info = parse_m3u8_attributes(line[11:])
                     if decrypt_info['METHOD'] == 'AES-128':
                         if 'IV' in decrypt_info:
@@ -164,13 +184,20 @@ class HlsFD(FragmentFD):
                                 man_url, decrypt_info['URI'])
                         if extra_query:
                             decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
-                        decrypt_info['KEY'] = self.ydl.urlopen(decrypt_info['URI']).read()
+                        if decrypt_url != decrypt_info['URI']:
+                            decrypt_info['KEY'] = None
                 elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
                     media_sequence = int(line[22:])
+                elif line.startswith('#EXT-X-BYTERANGE'):
+                    splitted_byte_range = line[17:].split('@')
+                    sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end']
+                    byte_range = {
+                        'start': sub_range_start,
+                        'end': sub_range_start + int(splitted_byte_range[0]),
+                    }
+                elif anvato_ad(line):
+                    ad_frag_next = True
 
         self._finish_frag_download(ctx)
 
-        for frag_file in frags_filenames:
-            os.remove(encodeFilename(frag_file))
-
         return True

+ 195 - 151
youtube_dl/downloader/http.py

@@ -22,8 +22,16 @@ from ..utils import (
 class HttpFD(FileDownloader):
     def real_download(self, filename, info_dict):
         url = info_dict['url']
-        tmpfilename = self.temp_name(filename)
-        stream = None
+
+        class DownloadContext(dict):
+            __getattr__ = dict.get
+            __setattr__ = dict.__setitem__
+            __delattr__ = dict.__delitem__
+
+        ctx = DownloadContext()
+        ctx.filename = filename
+        ctx.tmpfilename = self.temp_name(filename)
+        ctx.stream = None
 
         # Do not include the Accept-Encoding header
         headers = {'Youtubedl-no-compression': 'True'}
@@ -38,46 +46,51 @@ class HttpFD(FileDownloader):
         if is_test:
             request.add_header('Range', 'bytes=0-%s' % str(self._TEST_FILE_SIZE - 1))
 
-        # Establish possible resume length
-        if os.path.isfile(encodeFilename(tmpfilename)):
-            resume_len = os.path.getsize(encodeFilename(tmpfilename))
-        else:
-            resume_len = 0
-
-        open_mode = 'wb'
-        if resume_len != 0:
-            if self.params.get('continuedl', True):
-                self.report_resuming_byte(resume_len)
-                request.add_header('Range', 'bytes=%d-' % resume_len)
-                open_mode = 'ab'
-            else:
-                resume_len = 0
+        ctx.open_mode = 'wb'
+        ctx.resume_len = 0
+
+        if self.params.get('continuedl', True):
+            # Establish possible resume length
+            if os.path.isfile(encodeFilename(ctx.tmpfilename)):
+                ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename))
 
         count = 0
         retries = self.params.get('retries', 0)
-        while count <= retries:
+
+        class SucceedDownload(Exception):
+            pass
+
+        class RetryDownload(Exception):
+            def __init__(self, source_error):
+                self.source_error = source_error
+
+        def establish_connection():
+            if ctx.resume_len != 0:
+                self.report_resuming_byte(ctx.resume_len)
+                request.add_header('Range', 'bytes=%d-' % ctx.resume_len)
+                ctx.open_mode = 'ab'
             # Establish connection
             try:
-                data = self.ydl.urlopen(request)
+                ctx.data = self.ydl.urlopen(request)
                 # When trying to resume, Content-Range HTTP header of response has to be checked
                 # to match the value of requested Range HTTP header. This is due to a webservers
                 # that don't support resuming and serve a whole file with no Content-Range
                 # set in response despite of requested Range (see
                 # https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799)
-                if resume_len > 0:
-                    content_range = data.headers.get('Content-Range')
+                if ctx.resume_len > 0:
+                    content_range = ctx.data.headers.get('Content-Range')
                     if content_range:
                         content_range_m = re.search(r'bytes (\d+)-', content_range)
                         # Content-Range is present and matches requested Range, resume is possible
-                        if content_range_m and resume_len == int(content_range_m.group(1)):
-                            break
+                        if content_range_m and ctx.resume_len == int(content_range_m.group(1)):
+                            return
                     # Content-Range is either not present or invalid. Assuming remote webserver is
                     # trying to send the whole file, resume is not possible, so wiping the local file
                     # and performing entire redownload
                     self.report_unable_to_resume()
-                    resume_len = 0
-                    open_mode = 'wb'
-                break
+                    ctx.resume_len = 0
+                    ctx.open_mode = 'wb'
+                return
             except (compat_urllib_error.HTTPError, ) as err:
                 if (err.code < 500 or err.code >= 600) and err.code != 416:
                     # Unexpected HTTP error
@@ -86,15 +99,15 @@ class HttpFD(FileDownloader):
                     # Unable to resume (requested range not satisfiable)
                     try:
                         # Open the connection again without the range header
-                        data = self.ydl.urlopen(basic_request)
-                        content_length = data.info()['Content-Length']
+                        ctx.data = self.ydl.urlopen(basic_request)
+                        content_length = ctx.data.info()['Content-Length']
                     except (compat_urllib_error.HTTPError, ) as err:
                         if err.code < 500 or err.code >= 600:
                             raise
                     else:
                         # Examine the reported length
                         if (content_length is not None and
-                                (resume_len - 100 < int(content_length) < resume_len + 100)):
+                                (ctx.resume_len - 100 < int(content_length) < ctx.resume_len + 100)):
                             # The file had already been fully downloaded.
                             # Explanation to the above condition: in issue #175 it was revealed that
                             # YouTube sometimes adds or removes a few bytes from the end of the file,
@@ -102,152 +115,183 @@ class HttpFD(FileDownloader):
                             # I decided to implement a suggested change and consider the file
                             # completely downloaded if the file size differs less than 100 bytes from
                             # the one in the hard drive.
-                            self.report_file_already_downloaded(filename)
-                            self.try_rename(tmpfilename, filename)
+                            self.report_file_already_downloaded(ctx.filename)
+                            self.try_rename(ctx.tmpfilename, ctx.filename)
                             self._hook_progress({
-                                'filename': filename,
+                                'filename': ctx.filename,
                                 'status': 'finished',
-                                'downloaded_bytes': resume_len,
-                                'total_bytes': resume_len,
+                                'downloaded_bytes': ctx.resume_len,
+                                'total_bytes': ctx.resume_len,
                             })
-                            return True
+                            raise SucceedDownload()
                         else:
                             # The length does not match, we start the download over
                             self.report_unable_to_resume()
-                            resume_len = 0
-                            open_mode = 'wb'
-                            break
-            except socket.error as e:
-                if e.errno != errno.ECONNRESET:
+                            ctx.resume_len = 0
+                            ctx.open_mode = 'wb'
+                            return
+                raise RetryDownload(err)
+            except socket.error as err:
+                if err.errno != errno.ECONNRESET:
                     # Connection reset is no problem, just retry
                     raise
+                raise RetryDownload(err)
+
+        def download():
+            data_len = ctx.data.info().get('Content-length', None)
+
+            # Range HTTP header may be ignored/unsupported by a webserver
+            # (e.g. extractor/scivee.py, extractor/bambuser.py).
+            # However, for a test we still would like to download just a piece of a file.
+            # To achieve this we limit data_len to _TEST_FILE_SIZE and manually control
+            # block size when downloading a file.
+            if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE):
+                data_len = self._TEST_FILE_SIZE
+
+            if data_len is not None:
+                data_len = int(data_len) + ctx.resume_len
+                min_data_len = self.params.get('min_filesize')
+                max_data_len = self.params.get('max_filesize')
+                if min_data_len is not None and data_len < min_data_len:
+                    self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
+                    return False
+                if max_data_len is not None and data_len > max_data_len:
+                    self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
+                    return False
 
-            # Retry
-            count += 1
-            if count <= retries:
-                self.report_retry(count, retries)
-
-        if count > retries:
-            self.report_error('giving up after %s retries' % retries)
-            return False
-
-        data_len = data.info().get('Content-length', None)
-
-        # Range HTTP header may be ignored/unsupported by a webserver
-        # (e.g. extractor/scivee.py, extractor/bambuser.py).
-        # However, for a test we still would like to download just a piece of a file.
-        # To achieve this we limit data_len to _TEST_FILE_SIZE and manually control
-        # block size when downloading a file.
-        if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE):
-            data_len = self._TEST_FILE_SIZE
-
-        if data_len is not None:
-            data_len = int(data_len) + resume_len
-            min_data_len = self.params.get('min_filesize')
-            max_data_len = self.params.get('max_filesize')
-            if min_data_len is not None and data_len < min_data_len:
-                self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
-                return False
-            if max_data_len is not None and data_len > max_data_len:
-                self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
-                return False
-
-        byte_counter = 0 + resume_len
-        block_size = self.params.get('buffersize', 1024)
-        start = time.time()
+            byte_counter = 0 + ctx.resume_len
+            block_size = self.params.get('buffersize', 1024)
+            start = time.time()
 
-        # measure time over whole while-loop, so slow_down() and best_block_size() work together properly
-        now = None  # needed for slow_down() in the first loop run
-        before = start  # start measuring
-        while True:
+            # measure time over whole while-loop, so slow_down() and best_block_size() work together properly
+            now = None  # needed for slow_down() in the first loop run
+            before = start  # start measuring
 
-            # Download and write
-            data_block = data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
-            byte_counter += len(data_block)
+            def retry(e):
+                if ctx.tmpfilename != '-':
+                    ctx.stream.close()
+                ctx.stream = None
+                ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename))
+                raise RetryDownload(e)
 
-            # exit loop when download is finished
-            if len(data_block) == 0:
-                break
+            while True:
+                try:
+                    # Download and write
+                    data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
+                # socket.timeout is a subclass of socket.error but may not have
+                # errno set
+                except socket.timeout as e:
+                    retry(e)
+                except socket.error as e:
+                    if e.errno not in (errno.ECONNRESET, errno.ETIMEDOUT):
+                        raise
+                    retry(e)
+
+                byte_counter += len(data_block)
+
+                # exit loop when download is finished
+                if len(data_block) == 0:
+                    break
+
+                # Open destination file just in time
+                if ctx.stream is None:
+                    try:
+                        ctx.stream, ctx.tmpfilename = sanitize_open(
+                            ctx.tmpfilename, ctx.open_mode)
+                        assert ctx.stream is not None
+                        ctx.filename = self.undo_temp_name(ctx.tmpfilename)
+                        self.report_destination(ctx.filename)
+                    except (OSError, IOError) as err:
+                        self.report_error('unable to open for writing: %s' % str(err))
+                        return False
+
+                    if self.params.get('xattr_set_filesize', False) and data_len is not None:
+                        try:
+                            write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8'))
+                        except (XAttrUnavailableError, XAttrMetadataError) as err:
+                            self.report_error('unable to set filesize xattr: %s' % str(err))
 
-            # Open destination file just in time
-            if stream is None:
                 try:
-                    (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
-                    assert stream is not None
-                    filename = self.undo_temp_name(tmpfilename)
-                    self.report_destination(filename)
-                except (OSError, IOError) as err:
-                    self.report_error('unable to open for writing: %s' % str(err))
+                    ctx.stream.write(data_block)
+                except (IOError, OSError) as err:
+                    self.to_stderr('\n')
+                    self.report_error('unable to write data: %s' % str(err))
                     return False
 
-                if self.params.get('xattr_set_filesize', False) and data_len is not None:
-                    try:
-                        write_xattr(tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8'))
-                    except (XAttrUnavailableError, XAttrMetadataError) as err:
-                        self.report_error('unable to set filesize xattr: %s' % str(err))
-
-            try:
-                stream.write(data_block)
-            except (IOError, OSError) as err:
+                # Apply rate limit
+                self.slow_down(start, now, byte_counter - ctx.resume_len)
+
+                # end measuring of one loop run
+                now = time.time()
+                after = now
+
+                # Adjust block size
+                if not self.params.get('noresizebuffer', False):
+                    block_size = self.best_block_size(after - before, len(data_block))
+
+                before = after
+
+                # Progress message
+                speed = self.calc_speed(start, now, byte_counter - ctx.resume_len)
+                if data_len is None:
+                    eta = None
+                else:
+                    eta = self.calc_eta(start, time.time(), data_len - ctx.resume_len, byte_counter - ctx.resume_len)
+
+                self._hook_progress({
+                    'status': 'downloading',
+                    'downloaded_bytes': byte_counter,
+                    'total_bytes': data_len,
+                    'tmpfilename': ctx.tmpfilename,
+                    'filename': ctx.filename,
+                    'eta': eta,
+                    'speed': speed,
+                    'elapsed': now - start,
+                })
+
+                if is_test and byte_counter == data_len:
+                    break
+
+            if ctx.stream is None:
                 self.to_stderr('\n')
-                self.report_error('unable to write data: %s' % str(err))
+                self.report_error('Did not get any data blocks')
                 return False
+            if ctx.tmpfilename != '-':
+                ctx.stream.close()
 
-            # Apply rate limit
-            self.slow_down(start, now, byte_counter - resume_len)
+            if data_len is not None and byte_counter != data_len:
+                err = ContentTooShortError(byte_counter, int(data_len))
+                if count <= retries:
+                    retry(err)
+                raise err
 
-            # end measuring of one loop run
-            now = time.time()
-            after = now
+            self.try_rename(ctx.tmpfilename, ctx.filename)
 
-            # Adjust block size
-            if not self.params.get('noresizebuffer', False):
-                block_size = self.best_block_size(after - before, len(data_block))
-
-            before = after
-
-            # Progress message
-            speed = self.calc_speed(start, now, byte_counter - resume_len)
-            if data_len is None:
-                eta = None
-            else:
-                eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
+            # Update file modification time
+            if self.params.get('updatetime', True):
+                info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.info().get('last-modified', None))
 
             self._hook_progress({
-                'status': 'downloading',
                 'downloaded_bytes': byte_counter,
-                'total_bytes': data_len,
-                'tmpfilename': tmpfilename,
-                'filename': filename,
-                'eta': eta,
-                'speed': speed,
-                'elapsed': now - start,
+                'total_bytes': byte_counter,
+                'filename': ctx.filename,
+                'status': 'finished',
+                'elapsed': time.time() - start,
             })
 
-            if is_test and byte_counter == data_len:
-                break
-
-        if stream is None:
-            self.to_stderr('\n')
-            self.report_error('Did not get any data blocks')
-            return False
-        if tmpfilename != '-':
-            stream.close()
-
-        if data_len is not None and byte_counter != data_len:
-            raise ContentTooShortError(byte_counter, int(data_len))
-        self.try_rename(tmpfilename, filename)
-
-        # Update file modification time
-        if self.params.get('updatetime', True):
-            info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
-
-        self._hook_progress({
-            'downloaded_bytes': byte_counter,
-            'total_bytes': byte_counter,
-            'filename': filename,
-            'status': 'finished',
-            'elapsed': time.time() - start,
-        })
-
-        return True
+            return True
+
+        while count <= retries:
+            try:
+                establish_connection()
+                return download()
+            except RetryDownload as e:
+                count += 1
+                if count <= retries:
+                    self.report_retry(e.source_error, count, retries)
+                continue
+            except SucceedDownload:
+                return True
+
+        self.report_error('giving up after %s retries' % retries)
+        return False

+ 10 - 24
youtube_dl/downloader/ism.py

@@ -1,6 +1,5 @@
 from __future__ import unicode_literals
 
-import os
 import time
 import struct
 import binascii
@@ -8,10 +7,6 @@ import io
 
 from .fragment import FragmentFD
 from ..compat import compat_urllib_error
-from ..utils import (
-    sanitize_open,
-    encodeFilename,
-)
 
 
 u8 = struct.Struct(b'>B')
@@ -103,7 +98,7 @@ def write_piff_header(stream, params):
 
     if is_audio:
         smhd_payload = s88.pack(0)  # balance
-        smhd_payload = u16.pack(0)  # reserved
+        smhd_payload += u16.pack(0)  # reserved
         media_header_box = full_box(b'smhd', 0, 0, smhd_payload)  # Sound Media Header
     else:
         vmhd_payload = u16.pack(0)  # graphics mode
@@ -131,7 +126,6 @@ def write_piff_header(stream, params):
         if fourcc == 'AACL':
             sample_entry_box = box(b'mp4a', sample_entry_payload)
     else:
-        sample_entry_payload = sample_entry_payload
         sample_entry_payload += u16.pack(0)  # pre defined
         sample_entry_payload += u16.pack(0)  # reserved
         sample_entry_payload += u32.pack(0) * 3  # pre defined
@@ -225,47 +219,39 @@ class IsmFD(FragmentFD):
 
         self._prepare_and_start_frag_download(ctx)
 
-        segments_filenames = []
-
         fragment_retries = self.params.get('fragment_retries', 0)
         skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
 
         track_written = False
+        frag_index = 0
         for i, segment in enumerate(segments):
-            segment_url = segment['url']
-            segment_name = 'Frag%d' % i
-            target_filename = '%s-%s' % (ctx['tmpfilename'], segment_name)
+            frag_index += 1
+            if frag_index <= ctx['fragment_index']:
+                continue
             count = 0
             while count <= fragment_retries:
                 try:
-                    success = ctx['dl'].download(target_filename, {'url': segment_url})
+                    success, frag_content = self._download_fragment(ctx, segment['url'], info_dict)
                     if not success:
                         return False
-                    down, target_sanitized = sanitize_open(target_filename, 'rb')
-                    down_data = down.read()
                     if not track_written:
-                        tfhd_data = extract_box_data(down_data, [b'moof', b'traf', b'tfhd'])
+                        tfhd_data = extract_box_data(frag_content, [b'moof', b'traf', b'tfhd'])
                         info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0]
                         write_piff_header(ctx['dest_stream'], info_dict['_download_params'])
                         track_written = True
-                    ctx['dest_stream'].write(down_data)
-                    down.close()
-                    segments_filenames.append(target_sanitized)
+                    self._append_fragment(ctx, frag_content)
                     break
                 except compat_urllib_error.HTTPError as err:
                     count += 1
                     if count <= fragment_retries:
-                        self.report_retry_fragment(err, segment_name, count, fragment_retries)
+                        self.report_retry_fragment(err, frag_index, count, fragment_retries)
             if count > fragment_retries:
                 if skip_unavailable_fragments:
-                    self.report_skip_fragment(segment_name)
+                    self.report_skip_fragment(frag_index)
                     continue
                 self.report_error('giving up after %s fragment retries' % fragment_retries)
                 return False
 
         self._finish_frag_download(ctx)
 
-        for segment_file in segments_filenames:
-            os.remove(encodeFilename(segment_file))
-
         return True

+ 1 - 1
youtube_dl/downloader/rtmp.py

@@ -169,7 +169,7 @@ class RtmpFD(FileDownloader):
             self.report_error('[rtmpdump] Could not connect to RTMP server.')
             return False
 
-        while (retval == RD_INCOMPLETE or retval == RD_FAILED) and not test and not live:
+        while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live:
             prevsize = os.path.getsize(encodeFilename(tmpfilename))
             self.to_screen('[rtmpdump] %s bytes' % prevsize)
             time.sleep(5.0)  # This seems to be needed

+ 41 - 9
youtube_dl/extractor/abc.py

@@ -1,13 +1,19 @@
 from __future__ import unicode_literals
 
+import hashlib
+import hmac
 import re
+import time
 
 from .common import InfoExtractor
+from ..compat import compat_str
 from ..utils import (
     ExtractorError,
     js_to_json,
     int_or_none,
     parse_iso8601,
+    try_get,
+    update_url_query,
 )
 
 
@@ -99,21 +105,24 @@ class ABCIE(InfoExtractor):
 class ABCIViewIE(InfoExtractor):
     IE_NAME = 'abc.net.au:iview'
     _VALID_URL = r'https?://iview\.abc\.net\.au/programs/[^/]+/(?P<id>[^/?#]+)'
+    _GEO_COUNTRIES = ['AU']
 
     # ABC iview programs are normally available for 14 days only.
     _TESTS = [{
-        'url': 'http://iview.abc.net.au/programs/diaries-of-a-broken-mind/ZX9735A001S00',
+        'url': 'http://iview.abc.net.au/programs/call-the-midwife/ZW0898A003S00',
         'md5': 'cde42d728b3b7c2b32b1b94b4a548afc',
         'info_dict': {
-            'id': 'ZX9735A001S00',
+            'id': 'ZW0898A003S00',
             'ext': 'mp4',
-            'title': 'Diaries Of A Broken Mind',
-            'description': 'md5:7de3903874b7a1be279fe6b68718fc9e',
-            'upload_date': '20161010',
-            'uploader_id': 'abc2',
-            'timestamp': 1476064920,
+            'title': 'Series 5 Ep 3',
+            'description': 'md5:e0ef7d4f92055b86c4f33611f180ed79',
+            'upload_date': '20171228',
+            'uploader_id': 'abc1',
+            'timestamp': 1514499187,
+        },
+        'params': {
+            'skip_download': True,
         },
-        'skip': 'Video gone',
     }]
 
     def _real_extract(self, url):
@@ -124,7 +133,30 @@ class ABCIViewIE(InfoExtractor):
         title = video_params.get('title') or video_params['seriesTitle']
         stream = next(s for s in video_params['playlist'] if s.get('type') == 'program')
 
-        formats = self._extract_akamai_formats(stream['hds-unmetered'], video_id)
+        house_number = video_params.get('episodeHouseNumber')
+        path = '/auth/hls/sign?ts={0}&hn={1}&d=android-mobile'.format(
+            int(time.time()), house_number)
+        sig = hmac.new(
+            'android.content.res.Resources'.encode('utf-8'),
+            path.encode('utf-8'), hashlib.sha256).hexdigest()
+        token = self._download_webpage(
+            'http://iview.abc.net.au{0}&sig={1}'.format(path, sig), video_id)
+
+        def tokenize_url(url, token):
+            return update_url_query(url, {
+                'hdnea': token,
+            })
+
+        for sd in ('sd', 'sd-low'):
+            sd_url = try_get(
+                stream, lambda x: x['streams']['hls'][sd], compat_str)
+            if not sd_url:
+                continue
+            formats = self._extract_m3u8_formats(
+                tokenize_url(sd_url, token), video_id, 'mp4',
+                entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
+            if formats:
+                break
         self._sort_formats(formats)
 
         subtitles = {}

+ 15 - 5
youtube_dl/extractor/abcnews.py

@@ -7,12 +7,21 @@ import time
 
 from .amp import AMPIE
 from .common import InfoExtractor
+from .youtube import YoutubeIE
 from ..compat import compat_urlparse
 
 
 class AbcNewsVideoIE(AMPIE):
     IE_NAME = 'abcnews:video'
-    _VALID_URL = r'https?://abcnews\.go\.com/[^/]+/video/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
+    _VALID_URL = r'''(?x)
+                    https?://
+                        abcnews\.go\.com/
+                        (?:
+                            [^/]+/video/(?P<display_id>[0-9a-z-]+)-|
+                            video/embed\?.*?\bid=
+                        )
+                        (?P<id>\d+)
+                    '''
 
     _TESTS = [{
         'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932',
@@ -29,6 +38,9 @@ class AbcNewsVideoIE(AMPIE):
             # m3u8 download
             'skip_download': True,
         },
+    }, {
+        'url': 'http://abcnews.go.com/video/embed?id=46979033',
+        'only_matching': True,
     }, {
         'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478',
         'only_matching': True,
@@ -97,9 +109,7 @@ class AbcNewsIE(InfoExtractor):
             r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL')
         full_video_url = compat_urlparse.urljoin(url, video_url)
 
-        youtube_url = self._html_search_regex(
-            r'<iframe[^>]+src="(https://www\.youtube\.com/embed/[^"]+)"',
-            webpage, 'YouTube URL', default=None)
+        youtube_url = YoutubeIE._extract_url(webpage)
 
         timestamp = None
         date_str = self._html_search_regex(
@@ -129,7 +139,7 @@ class AbcNewsIE(InfoExtractor):
         }
 
         if youtube_url:
-            entries = [entry, self.url_result(youtube_url, 'Youtube')]
+            entries = [entry, self.url_result(youtube_url, ie=YoutubeIE.ie_key())]
             return self.playlist_result(entries)
 
         return entry

+ 1 - 1
youtube_dl/extractor/abcotvs.py

@@ -22,7 +22,7 @@ class ABCOTVSIE(InfoExtractor):
                 'display_id': 'east-bay-museum-celebrates-vintage-synthesizers',
                 'ext': 'mp4',
                 'title': 'East Bay museum celebrates vintage synthesizers',
-                'description': 'md5:a4f10fb2f2a02565c1749d4adbab4b10',
+                'description': 'md5:24ed2bd527096ec2a5c67b9d5a9005f3',
                 'thumbnail': r're:^https?://.*\.jpg$',
                 'timestamp': 1421123075,
                 'upload_date': '20150113',

+ 12 - 11
youtube_dl/extractor/acast.py

@@ -8,7 +8,7 @@ from .common import InfoExtractor
 from ..compat import compat_str
 from ..utils import (
     int_or_none,
-    parse_iso8601,
+    unified_timestamp,
     OnDemandPagedList,
 )
 
@@ -32,7 +32,7 @@ class ACastIE(InfoExtractor):
     }, {
         # test with multiple blings
         'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
-        'md5': '55c0097badd7095f494c99a172f86501',
+        'md5': 'e87d5b8516cd04c0d81b6ee1caca28d0',
         'info_dict': {
             'id': '2a92b283-1a75-4ad8-8396-499c641de0d9',
             'ext': 'mp3',
@@ -40,23 +40,24 @@ class ACastIE(InfoExtractor):
             'timestamp': 1477346700,
             'upload_date': '20161024',
             'description': 'md5:4f81f6d8cf2e12ee21a321d8bca32db4',
-            'duration': 2797,
+            'duration': 2766,
         }
     }]
 
     def _real_extract(self, url):
         channel, display_id = re.match(self._VALID_URL, url).groups()
         cast_data = self._download_json(
-            'https://embed.acast.com/api/acasts/%s/%s' % (channel, display_id), display_id)
+            'https://play-api.acast.com/splash/%s/%s' % (channel, display_id), display_id)
+        e = cast_data['result']['episode']
         return {
-            'id': compat_str(cast_data['id']),
+            'id': compat_str(e['id']),
             'display_id': display_id,
-            'url': [b['audio'] for b in cast_data['blings'] if b['type'] == 'BlingAudio'][0],
-            'title': cast_data['name'],
-            'description': cast_data.get('description'),
-            'thumbnail': cast_data.get('image'),
-            'timestamp': parse_iso8601(cast_data.get('publishingDate')),
-            'duration': int_or_none(cast_data.get('duration')),
+            'url': e['mediaUrl'],
+            'title': e['name'],
+            'description': e.get('description'),
+            'thumbnail': e.get('image'),
+            'timestamp': unified_timestamp(e.get('publishingDate')),
+            'duration': int_or_none(e.get('duration')),
         }
 
 

+ 2 - 1
youtube_dl/extractor/addanime.py

@@ -25,7 +25,8 @@ class AddAnimeIE(InfoExtractor):
             'ext': 'mp4',
             'description': 'One Piece 606',
             'title': 'One Piece 606',
-        }
+        },
+        'skip': 'Video is gone',
     }, {
         'url': 'http://add-anime.net/video/MDUGWYKNGBD8/One-Piece-687',
         'only_matching': True,

+ 150 - 0
youtube_dl/extractor/adn.py

@@ -0,0 +1,150 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import base64
+import json
+import os
+
+from .common import InfoExtractor
+from ..aes import aes_cbc_decrypt
+from ..compat import compat_ord
+from ..utils import (
+    bytes_to_intlist,
+    ExtractorError,
+    float_or_none,
+    intlist_to_bytes,
+    srt_subtitles_timecode,
+    strip_or_none,
+    urljoin,
+)
+
+
+class ADNIE(InfoExtractor):
+    IE_DESC = 'Anime Digital Network'
+    _VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
+        'md5': 'e497370d847fd79d9d4c74be55575c7a',
+        'info_dict': {
+            'id': '7778',
+            'ext': 'mp4',
+            'title': 'Blue Exorcist - Kyôto Saga - Épisode 1',
+            'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5',
+        }
+    }
+    _BASE_URL = 'http://animedigitalnetwork.fr'
+
+    def _get_subtitles(self, sub_path, video_id):
+        if not sub_path:
+            return None
+
+        enc_subtitles = self._download_webpage(
+            urljoin(self._BASE_URL, sub_path),
+            video_id, fatal=False, headers={
+                'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:53.0) Gecko/20100101 Firefox/53.0',
+            })
+        if not enc_subtitles:
+            return None
+
+        # http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
+        dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
+            bytes_to_intlist(base64.b64decode(enc_subtitles[24:])),
+            bytes_to_intlist(b'\x1b\xe0\x29\x61\x38\x94\x24\x00\x12\xbd\xc5\x80\xac\xce\xbe\xb0'),
+            bytes_to_intlist(base64.b64decode(enc_subtitles[:24]))
+        ))
+        subtitles_json = self._parse_json(
+            dec_subtitles[:-compat_ord(dec_subtitles[-1])].decode(),
+            None, fatal=False)
+        if not subtitles_json:
+            return None
+
+        subtitles = {}
+        for sub_lang, sub in subtitles_json.items():
+            srt = ''
+            for num, current in enumerate(sub):
+                start, end, text = (
+                    float_or_none(current.get('startTime')),
+                    float_or_none(current.get('endTime')),
+                    current.get('text'))
+                if start is None or end is None or text is None:
+                    continue
+                srt += os.linesep.join(
+                    (
+                        '%d' % num,
+                        '%s --> %s' % (
+                            srt_subtitles_timecode(start),
+                            srt_subtitles_timecode(end)),
+                        text,
+                        os.linesep,
+                    ))
+
+            if sub_lang == 'vostf':
+                sub_lang = 'fr'
+            subtitles.setdefault(sub_lang, []).extend([{
+                'ext': 'json',
+                'data': json.dumps(sub),
+            }, {
+                'ext': 'srt',
+                'data': srt,
+            }])
+        return subtitles
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        player_config = self._parse_json(self._search_regex(
+            r'playerConfig\s*=\s*({.+});', webpage, 'player config'), video_id)
+
+        video_info = {}
+        video_info_str = self._search_regex(
+            r'videoInfo\s*=\s*({.+});', webpage,
+            'video info', fatal=False)
+        if video_info_str:
+            video_info = self._parse_json(
+                video_info_str, video_id, fatal=False) or {}
+
+        options = player_config.get('options') or {}
+        metas = options.get('metas') or {}
+        title = metas.get('title') or video_info['title']
+        links = player_config.get('links') or {}
+        error = None
+        if not links:
+            links_url = player_config['linksurl']
+            links_data = self._download_json(urljoin(
+                self._BASE_URL, links_url), video_id)
+            links = links_data.get('links') or {}
+            error = links_data.get('error')
+
+        formats = []
+        for format_id, qualities in links.items():
+            if not isinstance(qualities, dict):
+                continue
+            for load_balancer_url in qualities.values():
+                load_balancer_data = self._download_json(
+                    load_balancer_url, video_id, fatal=False) or {}
+                m3u8_url = load_balancer_data.get('location')
+                if not m3u8_url:
+                    continue
+                m3u8_formats = self._extract_m3u8_formats(
+                    m3u8_url, video_id, 'mp4', 'm3u8_native',
+                    m3u8_id=format_id, fatal=False)
+                if format_id == 'vf':
+                    for f in m3u8_formats:
+                        f['language'] = 'fr'
+                formats.extend(m3u8_formats)
+        if not error:
+            error = options.get('error')
+        if not formats and error:
+            raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': strip_or_none(metas.get('summary') or video_info.get('resume')),
+            'thumbnail': video_info.get('image'),
+            'formats': formats,
+            'subtitles': self.extract_subtitles(player_config.get('subtitles'), video_id),
+            'episode': metas.get('subtitle') or video_info.get('videoTitle'),
+            'series': video_info.get('playlistTitle'),
+        }

+ 113 - 18
youtube_dl/extractor/adobepass.py

@@ -6,12 +6,16 @@ import time
 import xml.etree.ElementTree as etree
 
 from .common import InfoExtractor
-from ..compat import compat_urlparse
+from ..compat import (
+    compat_kwargs,
+    compat_urlparse,
+)
 from ..utils import (
     unescapeHTML,
     urlencode_postdata,
     unified_timestamp,
     ExtractorError,
+    NO_DEFAULT,
 )
 
 
@@ -21,6 +25,11 @@ MSO_INFO = {
         'username_field': 'username',
         'password_field': 'password',
     },
+    'ATTOTT': {
+        'name': 'DIRECTV NOW',
+        'username_field': 'email',
+        'password_field': 'loginpassword',
+    },
     'Rogers': {
         'name': 'Rogers',
         'username_field': 'UserName',
@@ -31,6 +40,26 @@ MSO_INFO = {
         'username_field': 'user',
         'password_field': 'passwd',
     },
+    'TWC': {
+        'name': 'Time Warner Cable | Spectrum',
+        'username_field': 'Ecom_User_ID',
+        'password_field': 'Ecom_Password',
+    },
+    'Brighthouse': {
+        'name': 'Bright House Networks | Spectrum',
+        'username_field': 'j_username',
+        'password_field': 'j_password',
+    },
+    'Charter_Direct': {
+        'name': 'Charter Spectrum',
+        'username_field': 'IDToken1',
+        'password_field': 'IDToken2',
+    },
+    'Verizon': {
+        'name': 'Verizon FiOS',
+        'username_field': 'IDToken1',
+        'password_field': 'IDToken2',
+    },
     'thr030': {
         'name': '3 Rivers Communications'
     },
@@ -1293,6 +1322,15 @@ class AdobePassIE(InfoExtractor):
     _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
     _MVPD_CACHE = 'ap-mvpd'
 
+    _DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page'
+
+    def _download_webpage_handle(self, *args, **kwargs):
+        headers = kwargs.get('headers', {})
+        headers.update(self.geo_verification_headers())
+        kwargs['headers'] = headers
+        return super(AdobePassIE, self)._download_webpage_handle(
+            *args, **compat_kwargs(kwargs))
+
     @staticmethod
     def _get_mvpd_resource(provider_id, title, guid, rating):
         channel = etree.Element('channel')
@@ -1335,6 +1373,21 @@ class AdobePassIE(InfoExtractor):
                 'Use --ap-mso to specify Adobe Pass Multiple-system operator Identifier '
                 'and --ap-username and --ap-password or --netrc to provide account credentials.', expected=True)
 
+        def extract_redirect_url(html, url=None, fatal=False):
+            # TODO: eliminate code duplication with generic extractor and move
+            # redirection code into _download_webpage_handle
+            REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
+            redirect_url = self._search_regex(
+                r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
+                r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
+                html, 'meta refresh redirect',
+                default=NO_DEFAULT if fatal else None, fatal=fatal)
+            if not redirect_url:
+                return None
+            if url:
+                redirect_url = compat_urlparse.urljoin(url, unescapeHTML(redirect_url))
+            return redirect_url
+
         mvpd_headers = {
             'ap_42': 'anonymous',
             'ap_11': 'Linux i686',
@@ -1374,42 +1427,82 @@ class AdobePassIE(InfoExtractor):
                     # Comcast page flow varies by video site and whether you
                     # are on Comcast's network.
                     provider_redirect_page, urlh = provider_redirect_page_res
-                    # Check for Comcast auto login
                     if 'automatically signing you in' in provider_redirect_page:
                         oauth_redirect_url = self._html_search_regex(
                             r'window\.location\s*=\s*[\'"]([^\'"]+)',
                             provider_redirect_page, 'oauth redirect')
-                        # Just need to process the request. No useful data comes back
                         self._download_webpage(
                             oauth_redirect_url, video_id, 'Confirming auto login')
                     else:
                         if '<form name="signin"' in provider_redirect_page:
-                            # already have the form, just fill it
                             provider_login_page_res = provider_redirect_page_res
                         elif 'http-equiv="refresh"' in provider_redirect_page:
-                            # redirects to the login page
-                            oauth_redirect_url = self._html_search_regex(
-                                r'content="0;\s*url=([^\'"]+)',
-                                provider_redirect_page, 'meta refresh redirect')
+                            oauth_redirect_url = extract_redirect_url(
+                                provider_redirect_page, fatal=True)
                             provider_login_page_res = self._download_webpage_handle(
-                                oauth_redirect_url,
-                                video_id, 'Downloading Provider Login Page')
+                                oauth_redirect_url, video_id,
+                                self._DOWNLOADING_LOGIN_PAGE)
                         else:
                             provider_login_page_res = post_form(
-                                provider_redirect_page_res, 'Downloading Provider Login Page')
+                                provider_redirect_page_res,
+                                self._DOWNLOADING_LOGIN_PAGE)
 
-                        mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', {
-                            mso_info.get('username_field', 'username'): username,
-                            mso_info.get('password_field', 'password'): password,
-                        })
+                        mvpd_confirm_page_res = post_form(
+                            provider_login_page_res, 'Logging in', {
+                                mso_info['username_field']: username,
+                                mso_info['password_field']: password,
+                            })
                         mvpd_confirm_page, urlh = mvpd_confirm_page_res
                         if '<button class="submit" value="Resume">Resume</button>' in mvpd_confirm_page:
                             post_form(mvpd_confirm_page_res, 'Confirming Login')
-
+                elif mso_id == 'Verizon':
+                    # In general, if you're connecting from a Verizon-assigned IP,
+                    # you will not actually pass your credentials.
+                    provider_redirect_page, urlh = provider_redirect_page_res
+                    if 'Please wait ...' in provider_redirect_page:
+                        saml_redirect_url = self._html_search_regex(
+                            r'self\.parent\.location=(["\'])(?P<url>.+?)\1',
+                            provider_redirect_page,
+                            'SAML Redirect URL', group='url')
+                        saml_login_page = self._download_webpage(
+                            saml_redirect_url, video_id,
+                            'Downloading SAML Login Page')
+                    else:
+                        saml_login_page_res = post_form(
+                            provider_redirect_page_res, 'Logging in', {
+                                mso_info['username_field']: username,
+                                mso_info['password_field']: password,
+                            })
+                        saml_login_page, urlh = saml_login_page_res
+                        if 'Please try again.' in saml_login_page:
+                            raise ExtractorError(
+                                'We\'re sorry, but either the User ID or Password entered is not correct.')
+                    saml_login_url = self._search_regex(
+                        r'xmlHttp\.open\("POST"\s*,\s*(["\'])(?P<url>.+?)\1',
+                        saml_login_page, 'SAML Login URL', group='url')
+                    saml_response_json = self._download_json(
+                        saml_login_url, video_id, 'Downloading SAML Response',
+                        headers={'Content-Type': 'text/xml'})
+                    self._download_webpage(
+                        saml_response_json['targetValue'], video_id,
+                        'Confirming Login', data=urlencode_postdata({
+                            'SAMLResponse': saml_response_json['SAMLResponse'],
+                            'RelayState': saml_response_json['RelayState']
+                        }), headers={
+                            'Content-Type': 'application/x-www-form-urlencoded'
+                        })
                 else:
-                    # Normal, non-Comcast flow
+                    # Some providers (e.g. DIRECTV NOW) have another meta refresh
+                    # based redirect that should be followed.
+                    provider_redirect_page, urlh = provider_redirect_page_res
+                    provider_refresh_redirect_url = extract_redirect_url(
+                        provider_redirect_page, url=urlh.geturl())
+                    if provider_refresh_redirect_url:
+                        provider_redirect_page_res = self._download_webpage_handle(
+                            provider_refresh_redirect_url, video_id,
+                            'Downloading Provider Redirect Page (meta refresh)')
                     provider_login_page_res = post_form(
-                        provider_redirect_page_res, 'Downloading Provider Login Page')
+                        provider_redirect_page_res, self._DOWNLOADING_LOGIN_PAGE)
                     mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', {
                         mso_info.get('username_field', 'username'): username,
                         mso_info.get('password_field', 'password'): password,
@@ -1448,6 +1541,8 @@ class AdobePassIE(InfoExtractor):
                     self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {})
                     count += 1
                     continue
+                if '<error' in authorize:
+                    raise ExtractorError(xml_text(authorize, 'details'), expected=True)
                 authz_token = unescapeHTML(xml_text(authorize, 'authzToken'))
                 requestor_info[guid] = authz_token
                 self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)

+ 104 - 177
youtube_dl/extractor/adultswim.py

@@ -5,91 +5,52 @@ import re
 
 from .turner import TurnerBaseIE
 from ..utils import (
-    ExtractorError,
     int_or_none,
+    strip_or_none,
 )
 
 
 class AdultSwimIE(TurnerBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?adultswim\.com/videos/(?P<is_playlist>playlists/)?(?P<show_path>[^/]+)/(?P<episode_path>[^/?#]+)/?'
+    _VALID_URL = r'https?://(?:www\.)?adultswim\.com/videos/(?P<show_path>[^/?#]+)(?:/(?P<episode_path>[^/?#]+))?'
 
     _TESTS = [{
         'url': 'http://adultswim.com/videos/rick-and-morty/pilot',
-        'playlist': [
-            {
-                'md5': '247572debc75c7652f253c8daa51a14d',
-                'info_dict': {
-                    'id': 'rQxZvXQ4ROaSOqq-or2Mow-0',
-                    'ext': 'flv',
-                    'title': 'Rick and Morty - Pilot Part 1',
-                    'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
-                },
-            },
-            {
-                'md5': '77b0e037a4b20ec6b98671c4c379f48d',
-                'info_dict': {
-                    'id': 'rQxZvXQ4ROaSOqq-or2Mow-3',
-                    'ext': 'flv',
-                    'title': 'Rick and Morty - Pilot Part 4',
-                    'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
-                },
-            },
-        ],
         'info_dict': {
             'id': 'rQxZvXQ4ROaSOqq-or2Mow',
+            'ext': 'mp4',
             'title': 'Rick and Morty - Pilot',
-            'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
+            'description': 'Rick moves in with his daughter\'s family and establishes himself as a bad influence on his grandson, Morty.',
+            'timestamp': 1493267400,
+            'upload_date': '20170427',
         },
-        'skip': 'This video is only available for registered users',
-    }, {
-        'url': 'http://www.adultswim.com/videos/playlists/american-parenting/putting-francine-out-of-business/',
-        'playlist': [
-            {
-                'md5': '2eb5c06d0f9a1539da3718d897f13ec5',
-                'info_dict': {
-                    'id': '-t8CamQlQ2aYZ49ItZCFog-0',
-                    'ext': 'flv',
-                    'title': 'American Dad - Putting Francine Out of Business',
-                    'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
-                },
-            }
-        ],
-        'info_dict': {
-            'id': '-t8CamQlQ2aYZ49ItZCFog',
-            'title': 'American Dad - Putting Francine Out of Business',
-            'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
+        'params': {
+            # m3u8 download
+            'skip_download': True,
         },
+        'expected_warnings': ['Unable to download f4m manifest'],
     }, {
         'url': 'http://www.adultswim.com/videos/tim-and-eric-awesome-show-great-job/dr-steve-brule-for-your-wine/',
-        'playlist': [
-            {
-                'md5': '3e346a2ab0087d687a05e1e7f3b3e529',
-                'info_dict': {
-                    'id': 'sY3cMUR_TbuE4YmdjzbIcQ-0',
-                    'ext': 'mp4',
-                    'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
-                    'description': 'Dr. Brule reports live from Wine Country with a special report on wines.  \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
-                },
-            }
-        ],
         'info_dict': {
             'id': 'sY3cMUR_TbuE4YmdjzbIcQ',
+            'ext': 'mp4',
             'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
-            'description': 'Dr. Brule reports live from Wine Country with a special report on wines.  \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
+            'description': 'Dr. Brule reports live from Wine Country with a special report on wines.  \nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.',
+            'upload_date': '20080124',
+            'timestamp': 1201150800,
         },
         'params': {
             # m3u8 download
             'skip_download': True,
-        }
+        },
     }, {
-        # heroMetadata.trailer
         'url': 'http://www.adultswim.com/videos/decker/inside-decker-a-new-hero/',
         'info_dict': {
             'id': 'I0LQFQkaSUaFp8PnAWHhoQ',
             'ext': 'mp4',
             'title': 'Decker - Inside Decker: A New Hero',
-            'description': 'md5:c916df071d425d62d70c86d4399d3ee0',
-            'duration': 249.008,
+            'description': 'The guys recap the conclusion of the season. They announce a new hero, take a peek into the Victorville Film Archive and welcome back the talented James Dean.',
+            'timestamp': 1469480460,
+            'upload_date': '20160725',
         },
         'params': {
             # m3u8 download
@@ -97,136 +58,102 @@ class AdultSwimIE(TurnerBaseIE):
         },
         'expected_warnings': ['Unable to download f4m manifest'],
     }, {
-        'url': 'http://www.adultswim.com/videos/toonami/friday-october-14th-2016/',
+        'url': 'http://www.adultswim.com/videos/attack-on-titan',
+        'info_dict': {
+            'id': 'b7A69dzfRzuaXIECdxW8XQ',
+            'title': 'Attack on Titan',
+            'description': 'md5:6c8e003ea0777b47013e894767f5e114',
+        },
+        'playlist_mincount': 12,
+    }, {
+        'url': 'http://www.adultswim.com/videos/streams/williams-stream',
         'info_dict': {
-            'id': 'eYiLsKVgQ6qTC6agD67Sig',
-            'title': 'Toonami - Friday, October 14th, 2016',
-            'description': 'md5:99892c96ffc85e159a428de85c30acde',
+            'id': 'd8DEBj7QRfetLsRgFnGEyg',
+            'ext': 'mp4',
+            'title': r're:^Williams Stream \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
+            'description': 'original programming',
         },
-        'playlist': [{
-            'md5': '',
-            'info_dict': {
-                'id': 'eYiLsKVgQ6qTC6agD67Sig',
-                'ext': 'mp4',
-                'title': 'Toonami - Friday, October 14th, 2016',
-                'description': 'md5:99892c96ffc85e159a428de85c30acde',
-            },
-        }],
         'params': {
             # m3u8 download
             'skip_download': True,
         },
-        'expected_warnings': ['Unable to download f4m manifest'],
     }]
 
-    @staticmethod
-    def find_video_info(collection, slug):
-        for video in collection.get('videos'):
-            if video.get('slug') == slug:
-                return video
-
-    @staticmethod
-    def find_collection_by_linkURL(collections, linkURL):
-        for collection in collections:
-            if collection.get('linkURL') == linkURL:
-                return collection
-
-    @staticmethod
-    def find_collection_containing_video(collections, slug):
-        for collection in collections:
-            for video in collection.get('videos'):
-                if video.get('slug') == slug:
-                    return collection, video
-        return None, None
-
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        show_path = mobj.group('show_path')
-        episode_path = mobj.group('episode_path')
-        is_playlist = True if mobj.group('is_playlist') else False
-
-        webpage = self._download_webpage(url, episode_path)
-
-        # Extract the value of `bootstrappedData` from the Javascript in the page.
-        bootstrapped_data = self._parse_json(self._search_regex(
-            r'var bootstrappedData = ({.*});', webpage, 'bootstraped data'), episode_path)
-
-        # Downloading videos from a /videos/playlist/ URL needs to be handled differently.
-        # NOTE: We are only downloading one video (the current one) not the playlist
-        if is_playlist:
-            collections = bootstrapped_data['playlists']['collections']
-            collection = self.find_collection_by_linkURL(collections, show_path)
-            video_info = self.find_video_info(collection, episode_path)
-
-            show_title = video_info['showTitle']
-            segment_ids = [video_info['videoPlaybackID']]
+        show_path, episode_path = re.match(self._VALID_URL, url).groups()
+        display_id = episode_path or show_path
+        webpage = self._download_webpage(url, display_id)
+        initial_data = self._parse_json(self._search_regex(
+            r'AS_INITIAL_DATA(?:__)?\s*=\s*({.+?});',
+            webpage, 'initial data'), display_id)
+
+        is_stream = show_path == 'streams'
+        if is_stream:
+            if not episode_path:
+                episode_path = 'live-stream'
+
+            video_data = next(stream for stream_path, stream in initial_data['streams'].items() if stream_path == episode_path)
+            video_id = video_data.get('stream')
+
+            if not video_id:
+                entries = []
+                for episode in video_data.get('archiveEpisodes', []):
+                    episode_url = episode.get('url')
+                    if not episode_url:
+                        continue
+                    entries.append(self.url_result(
+                        episode_url, 'AdultSwim', episode.get('id')))
+                return self.playlist_result(
+                    entries, video_data.get('id'), video_data.get('title'),
+                    strip_or_none(video_data.get('description')))
         else:
-            collections = bootstrapped_data['show']['collections']
-            collection, video_info = self.find_collection_containing_video(collections, episode_path)
-            # Video wasn't found in the collections, let's try `slugged_video`.
-            if video_info is None:
-                if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path:
-                    video_info = bootstrapped_data['slugged_video']
-            if not video_info:
-                video_info = bootstrapped_data.get(
-                    'heroMetadata', {}).get('trailer', {}).get('video')
-            if not video_info:
-                video_info = bootstrapped_data.get('onlineOriginals', [None])[0]
-            if not video_info:
-                raise ExtractorError('Unable to find video info')
-
-            show = bootstrapped_data['show']
-            show_title = show['title']
-            stream = video_info.get('stream')
-            if stream and stream.get('videoPlaybackID'):
-                segment_ids = [stream['videoPlaybackID']]
-            elif video_info.get('clips'):
-                segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]
-            elif video_info.get('videoPlaybackID'):
-                segment_ids = [video_info['videoPlaybackID']]
-            elif video_info.get('id'):
-                segment_ids = [video_info['id']]
-            else:
-                if video_info.get('auth') is True:
-                    raise ExtractorError(
-                        'This video is only available via cable service provider subscription that'
-                        ' is not currently supported. You may want to use --cookies.', expected=True)
-                else:
-                    raise ExtractorError('Unable to find stream or clips')
-
-        episode_id = video_info['id']
-        episode_title = video_info['title']
-        episode_description = video_info.get('description')
-        episode_duration = int_or_none(video_info.get('duration'))
-        view_count = int_or_none(video_info.get('views'))
+            show_data = initial_data['show']
+
+            if not episode_path:
+                entries = []
+                for video in show_data.get('videos', []):
+                    slug = video.get('slug')
+                    if not slug:
+                        continue
+                    entries.append(self.url_result(
+                        'http://adultswim.com/videos/%s/%s' % (show_path, slug),
+                        'AdultSwim', video.get('id')))
+                return self.playlist_result(
+                    entries, show_data.get('id'), show_data.get('title'),
+                    strip_or_none(show_data.get('metadata', {}).get('description')))
+
+            video_data = show_data['sluggedVideo']
+            video_id = video_data['id']
+
+        info = self._extract_cvp_info(
+            'http://www.adultswim.com/videos/api/v0/assets?platform=desktop&id=' + video_id,
+            video_id, {
+                'secure': {
+                    'media_src': 'http://androidhls-secure.cdn.turner.com/adultswim/big',
+                    'tokenizer_src': 'http://www.adultswim.com/astv/mvpd/processors/services/token_ipadAdobe.do',
+                },
+            }, {
+                'url': url,
+                'site_name': 'AdultSwim',
+                'auth_required': video_data.get('auth'),
+            })
 
-        entries = []
-        for part_num, segment_id in enumerate(segment_ids):
-            segement_info = self._extract_cvp_info(
-                'http://www.adultswim.com/videos/api/v0/assets?id=%s&platform=desktop' % segment_id,
-                segment_id, {
-                    'secure': {
-                        'media_src': 'http://androidhls-secure.cdn.turner.com/adultswim/big',
-                        'tokenizer_src': 'http://www.adultswim.com/astv/mvpd/processors/services/token_ipadAdobe.do',
-                    },
-                })
-            segment_title = '%s - %s' % (show_title, episode_title)
-            if len(segment_ids) > 1:
-                segment_title += ' Part %d' % (part_num + 1)
-            segement_info.update({
-                'id': segment_id,
-                'title': segment_title,
-                'description': episode_description,
+        info.update({
+            'id': video_id,
+            'display_id': display_id,
+            'description': info.get('description') or strip_or_none(video_data.get('description')),
+        })
+        if not is_stream:
+            info.update({
+                'duration': info.get('duration') or int_or_none(video_data.get('duration')),
+                'timestamp': info.get('timestamp') or int_or_none(video_data.get('launch_date')),
+                'season_number': info.get('season_number') or int_or_none(video_data.get('season_number')),
+                'episode': info['title'],
+                'episode_number': info.get('episode_number') or int_or_none(video_data.get('episode_number')),
             })
-            entries.append(segement_info)
 
-        return {
-            '_type': 'playlist',
-            'id': episode_id,
-            'display_id': episode_path,
-            'entries': entries,
-            'title': '%s - %s' % (show_title, episode_title),
-            'description': episode_description,
-            'duration': episode_duration,
-            'view_count': view_count,
-        }
+            info['series'] = video_data.get('collection_title') or info.get('series')
+            if info['series'] and info['series'] != info['title']:
+                info['title'] = '%s - %s' % (info['series'], info['title'])
+
+        return info

+ 37 - 11
youtube_dl/extractor/aenetworks.py

@@ -23,7 +23,19 @@ class AENetworksBaseIE(ThePlatformIE):
 class AENetworksIE(AENetworksBaseIE):
     IE_NAME = 'aenetworks'
     IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network'
-    _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?:shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|movies/(?P<movie_display_id>[^/]+)/full-movie)'
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:www\.)?
+                        (?P<domain>
+                            (?:history|aetv|mylifetime|lifetimemovieclub)\.com|
+                            fyi\.tv
+                        )/
+                        (?:
+                            shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|
+                            movies/(?P<movie_display_id>[^/]+)(?:/full-movie)?|
+                            specials/(?P<special_display_id>[^/]+)/full-special
+                        )
+                    '''
     _TESTS = [{
         'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
         'md5': 'a97a65f7e823ae10e9244bc5433d5fe6',
@@ -62,17 +74,24 @@ class AENetworksIE(AENetworksBaseIE):
     }, {
         'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie',
         'only_matching': True
+    }, {
+        'url': 'https://www.lifetimemovieclub.com/movies/a-killer-among-us',
+        'only_matching': True
+    }, {
+        'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special',
+        'only_matching': True
     }]
     _DOMAIN_TO_REQUESTOR_ID = {
         'history.com': 'HISTORY',
         'aetv.com': 'AETV',
         'mylifetime.com': 'LIFETIME',
+        'lifetimemovieclub.com': 'LIFETIMEMOVIECLUB',
         'fyi.tv': 'FYI',
     }
 
     def _real_extract(self, url):
-        domain, show_path, movie_display_id = re.match(self._VALID_URL, url).groups()
-        display_id = show_path or movie_display_id
+        domain, show_path, movie_display_id, special_display_id = re.match(self._VALID_URL, url).groups()
+        display_id = show_path or movie_display_id or special_display_id
         webpage = self._download_webpage(url, display_id)
         if show_path:
             url_parts = show_path.split('/')
@@ -82,18 +101,22 @@ class AENetworksIE(AENetworksBaseIE):
                 for season_url_path in re.findall(r'(?s)<li[^>]+data-href="(/shows/%s/season-\d+)"' % url_parts[0], webpage):
                     entries.append(self.url_result(
                         compat_urlparse.urljoin(url, season_url_path), 'AENetworks'))
-                return self.playlist_result(
-                    entries, self._html_search_meta('aetn:SeriesId', webpage),
-                    self._html_search_meta('aetn:SeriesTitle', webpage))
-            elif url_parts_len == 2:
+                if entries:
+                    return self.playlist_result(
+                        entries, self._html_search_meta('aetn:SeriesId', webpage),
+                        self._html_search_meta('aetn:SeriesTitle', webpage))
+                else:
+                    # single season
+                    url_parts_len = 2
+            if url_parts_len == 2:
                 entries = []
-                for episode_item in re.findall(r'(?s)<div[^>]+class="[^"]*episode-item[^"]*"[^>]*>', webpage):
+                for episode_item in re.findall(r'(?s)<[^>]+class="[^"]*(?:episode|program)-item[^"]*"[^>]*>', webpage):
                     episode_attributes = extract_attributes(episode_item)
                     episode_url = compat_urlparse.urljoin(
                         url, episode_attributes['data-canonical'])
                     entries.append(self.url_result(
                         episode_url, 'AENetworks',
-                        episode_attributes['data-videoid']))
+                        episode_attributes.get('data-videoid') or episode_attributes.get('data-video-id')))
                 return self.playlist_result(
                     entries, self._html_search_meta('aetn:SeasonId', webpage))
 
@@ -103,9 +126,12 @@ class AENetworksIE(AENetworksBaseIE):
         }
         video_id = self._html_search_meta('aetn:VideoID', webpage)
         media_url = self._search_regex(
-            r"media_url\s*=\s*'([^']+)'", webpage, 'video url')
+            [r"media_url\s*=\s*'(?P<url>[^']+)'",
+             r'data-media-url=(?P<url>(?:https?:)?//[^\s>]+)',
+             r'data-media-url=(["\'])(?P<url>(?:(?!\1).)+?)\1'],
+            webpage, 'video url', group='url')
         theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
-            r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
+            r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
         info = self._parse_theplatform_metadata(theplatform_metadata)
         if theplatform_metadata.get('AETN$isBehindWall'):
             requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain]

+ 175 - 38
youtube_dl/extractor/afreecatv.py

@@ -4,20 +4,17 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_urllib_parse_urlparse,
-    compat_urlparse,
-)
+from ..compat import compat_xpath
 from ..utils import (
+    determine_ext,
     ExtractorError,
     int_or_none,
-    update_url_query,
-    xpath_element,
     xpath_text,
 )
 
 
 class AfreecaTVIE(InfoExtractor):
+    IE_NAME = 'afreecatv'
     IE_DESC = 'afreecatv.com'
     _VALID_URL = r'''(?x)
                     https?://
@@ -42,7 +39,8 @@ class AfreecaTVIE(InfoExtractor):
             'uploader': 'dailyapril',
             'uploader_id': 'dailyapril',
             'upload_date': '20160503',
-        }
+        },
+        'skip': 'Video is gone',
     }, {
         'url': 'http://afbbs.afreecatv.com:8080/app/read_ucc_bbs.cgi?nStationNo=16711924&nTitleNo=36153164&szBjId=dailyapril&nBbsNo=18605867',
         'info_dict': {
@@ -70,6 +68,93 @@ class AfreecaTVIE(InfoExtractor):
                 'upload_date': '20160502',
             },
         }],
+        'skip': 'Video is gone',
+    }, {
+        'url': 'http://vod.afreecatv.com/PLAYER/STATION/18650793',
+        'info_dict': {
+            'id': '18650793',
+            'ext': 'mp4',
+            'title': '오늘은 다르다! 쏘님의 우월한 위아래~ 댄스리액션!',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'uploader': '윈아디',
+            'uploader_id': 'badkids',
+            'duration': 107,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        'url': 'http://vod.afreecatv.com/PLAYER/STATION/10481652',
+        'info_dict': {
+            'id': '10481652',
+            'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
+            'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
+            'uploader': 'dailyapril',
+            'uploader_id': 'dailyapril',
+            'duration': 6492,
+        },
+        'playlist_count': 2,
+        'playlist': [{
+            'md5': 'd8b7c174568da61d774ef0203159bf97',
+            'info_dict': {
+                'id': '20160502_c4c62b9d_174361386_1',
+                'ext': 'mp4',
+                'title': "BJ유트루와 함께하는 '팅커벨 메이크업!' (part 1)",
+                'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
+                'uploader': 'dailyapril',
+                'uploader_id': 'dailyapril',
+                'upload_date': '20160502',
+                'duration': 3601,
+            },
+        }, {
+            'md5': '58f2ce7f6044e34439ab2d50612ab02b',
+            'info_dict': {
+                'id': '20160502_39e739bb_174361386_2',
+                'ext': 'mp4',
+                'title': "BJ유트루와 함께하는 '팅커벨 메이크업!' (part 2)",
+                'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
+                'uploader': 'dailyapril',
+                'uploader_id': 'dailyapril',
+                'upload_date': '20160502',
+                'duration': 2891,
+            },
+        }],
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        # non standard key
+        'url': 'http://vod.afreecatv.com/PLAYER/STATION/20515605',
+        'info_dict': {
+            'id': '20170411_BE689A0E_190960999_1_2_h',
+            'ext': 'mp4',
+            'title': '혼자사는여자집',
+            'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
+            'uploader': '♥이슬이',
+            'uploader_id': 'dasl8121',
+            'upload_date': '20170411',
+            'duration': 213,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        # adult video
+        'url': 'http://vod.afreecatv.com/PLAYER/STATION/26542731',
+        'info_dict': {
+            'id': '20171001_F1AE1711_196617479_1',
+            'ext': 'mp4',
+            'title': '[생]서아 초심 찾기 방송 (part 1)',
+            'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
+            'uploader': 'BJ서아',
+            'uploader_id': 'bjdyrksu',
+            'upload_date': '20171001',
+            'duration': 3600,
+            'age_limit': 18,
+        },
+        'params': {
+            'skip_download': True,
+        },
     }, {
         'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
         'only_matching': True,
@@ -84,42 +169,93 @@ class AfreecaTVIE(InfoExtractor):
         m = re.match(r'^(?P<upload_date>\d{8})_\w+_(?P<part>\d+)$', key)
         if m:
             video_key['upload_date'] = m.group('upload_date')
-            video_key['part'] = m.group('part')
+            video_key['part'] = int(m.group('part'))
         return video_key
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        parsed_url = compat_urllib_parse_urlparse(url)
-        info_url = compat_urlparse.urlunparse(parsed_url._replace(
-            netloc='afbbs.afreecatv.com:8080',
-            path='/api/video/get_video_info.php'))
 
         video_xml = self._download_xml(
-            update_url_query(info_url, {'nTitleNo': video_id}), video_id)
+            'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
+            video_id, query={
+                'nTitleNo': video_id,
+                'partialView': 'SKIP_ADULT',
+            })
 
-        if xpath_element(video_xml, './track/video/file') is None:
+        flag = xpath_text(video_xml, './track/flag', 'flag', default=None)
+        if flag and flag != 'SUCCEED':
+            raise ExtractorError(
+                '%s said: %s' % (self.IE_NAME, flag), expected=True)
+
+        video_element = video_xml.findall(compat_xpath('./track/video'))[1]
+        if video_element is None or video_element.text is None:
             raise ExtractorError('Specified AfreecaTV video does not exist',
                                  expected=True)
 
-        title = xpath_text(video_xml, './track/title', 'title')
+        video_url = video_element.text.strip()
+
+        title = xpath_text(video_xml, './track/title', 'title', fatal=True)
+
         uploader = xpath_text(video_xml, './track/nickname', 'uploader')
         uploader_id = xpath_text(video_xml, './track/bj_id', 'uploader id')
-        duration = int_or_none(xpath_text(video_xml, './track/duration',
-                                          'duration'))
+        duration = int_or_none(xpath_text(
+            video_xml, './track/duration', 'duration'))
         thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail')
 
-        entries = []
-        for i, video_file in enumerate(video_xml.findall('./track/video/file')):
-            video_key = self.parse_video_key(video_file.get('key', ''))
-            if not video_key:
-                continue
-            entries.append({
-                'id': '%s_%s' % (video_id, video_key.get('part', i + 1)),
-                'title': title,
-                'upload_date': video_key.get('upload_date'),
-                'duration': int_or_none(video_file.get('duration')),
-                'url': video_file.text,
+        common_entry = {
+            'uploader': uploader,
+            'uploader_id': uploader_id,
+            'thumbnail': thumbnail,
+        }
+
+        info = common_entry.copy()
+        info.update({
+            'id': video_id,
+            'title': title,
+            'duration': duration,
+        })
+
+        if not video_url:
+            entries = []
+            file_elements = video_element.findall(compat_xpath('./file'))
+            one = len(file_elements) == 1
+            for file_num, file_element in enumerate(file_elements, start=1):
+                file_url = file_element.text
+                if not file_url:
+                    continue
+                key = file_element.get('key', '')
+                upload_date = self._search_regex(
+                    r'^(\d{8})_', key, 'upload date', default=None)
+                file_duration = int_or_none(file_element.get('duration'))
+                format_id = key if key else '%s_%s' % (video_id, file_num)
+                if determine_ext(file_url) == 'm3u8':
+                    formats = self._extract_m3u8_formats(
+                        file_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                        m3u8_id='hls',
+                        note='Downloading part %d m3u8 information' % file_num)
+                else:
+                    formats = [{
+                        'url': file_url,
+                        'format_id': 'http',
+                    }]
+                if not formats:
+                    continue
+                self._sort_formats(formats)
+                file_info = common_entry.copy()
+                file_info.update({
+                    'id': format_id,
+                    'title': title if one else '%s (part %d)' % (title, file_num),
+                    'upload_date': upload_date,
+                    'duration': file_duration,
+                    'formats': formats,
+                })
+                entries.append(file_info)
+            entries_info = info.copy()
+            entries_info.update({
+                '_type': 'multi_video',
+                'entries': entries,
             })
+            return entries_info
 
         info = {
             'id': video_id,
@@ -130,16 +266,17 @@ class AfreecaTVIE(InfoExtractor):
             'thumbnail': thumbnail,
         }
 
-        if len(entries) > 1:
-            info['_type'] = 'multi_video'
-            info['entries'] = entries
-        elif len(entries) == 1:
-            info['url'] = entries[0]['url']
-            info['upload_date'] = entries[0].get('upload_date')
+        if determine_ext(video_url) == 'm3u8':
+            info['formats'] = self._extract_m3u8_formats(
+                video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                m3u8_id='hls')
         else:
-            raise ExtractorError(
-                'No files found for the specified AfreecaTV video, either'
-                ' the URL is incorrect or the video has been made private.',
-                expected=True)
+            app, playpath = video_url.split('mp4:')
+            info.update({
+                'url': app,
+                'ext': 'flv',
+                'play_path': 'mp4:' + playpath,
+                'rtmp_live': True,  # downloading won't end without this
+            })
 
         return info

+ 10 - 18
youtube_dl/extractor/airmozilla.py

@@ -15,12 +15,12 @@ class AirMozillaIE(InfoExtractor):
     _VALID_URL = r'https?://air\.mozilla\.org/(?P<id>[0-9a-z-]+)/?'
     _TEST = {
         'url': 'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/',
-        'md5': '2e3e7486ba5d180e829d453875b9b8bf',
+        'md5': '8d02f53ee39cf006009180e21df1f3ba',
         'info_dict': {
             'id': '6x4q2w',
             'ext': 'mp4',
             'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco',
-            'thumbnail': r're:https?://vid\.ly/(?P<id>[0-9a-z-]+)/poster',
+            'thumbnail': r're:https?://.*/poster\.jpg',
             'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...',
             'timestamp': 1422487800,
             'upload_date': '20150128',
@@ -34,21 +34,13 @@ class AirMozillaIE(InfoExtractor):
     def _real_extract(self, url):
         display_id = self._match_id(url)
         webpage = self._download_webpage(url, display_id)
-        video_id = self._html_search_regex(r'//vid.ly/(.*?)/embed', webpage, 'id')
+        video_id = self._html_search_regex(r'//vid\.ly/(.*?)/embed', webpage, 'id')
 
         embed_script = self._download_webpage('https://vid.ly/{0}/embed'.format(video_id), video_id)
-        jwconfig = self._search_regex(r'\svar jwconfig = (\{.*?\});\s', embed_script, 'metadata')
-        metadata = self._parse_json(jwconfig, video_id)
-
-        formats = [{
-            'url': source['file'],
-            'ext': source['type'],
-            'format_id': self._search_regex(r'&format=(.*)$', source['file'], 'video format'),
-            'format': source['label'],
-            'height': int(source['label'].rstrip('p')),
-        } for source in metadata['playlist'][0]['sources']]
-        self._sort_formats(formats)
+        jwconfig = self._parse_json(self._search_regex(
+            r'initCallback\((.*)\);', embed_script, 'metadata'), video_id)['config']
 
+        info_dict = self._parse_jwplayer_data(jwconfig, video_id)
         view_count = int_or_none(self._html_search_regex(
             r'Views since archived: ([0-9]+)',
             webpage, 'view count', fatal=False))
@@ -58,17 +50,17 @@ class AirMozillaIE(InfoExtractor):
             r'Duration:\s*(\d+\s*hours?\s*\d+\s*minutes?)',
             webpage, 'duration', fatal=False))
 
-        return {
+        info_dict.update({
             'id': video_id,
             'title': self._og_search_title(webpage),
-            'formats': formats,
             'url': self._og_search_url(webpage),
             'display_id': display_id,
-            'thumbnail': metadata['playlist'][0].get('image'),
             'description': self._og_search_description(webpage),
             'timestamp': timestamp,
             'location': self._html_search_regex(r'Location: (.*)', webpage, 'location', default=None),
             'duration': duration,
             'view_count': view_count,
             'categories': re.findall(r'<a href=".*?" class="channel">(.*?)</a>', webpage),
-        }
+        })
+
+        return info_dict

+ 53 - 0
youtube_dl/extractor/aliexpress.py

@@ -0,0 +1,53 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    float_or_none,
+    try_get,
+)
+
+
+class AliExpressLiveIE(InfoExtractor):
+    _VALID_URL = r'https?://live\.aliexpress\.com/live/(?P<id>\d+)'
+    _TEST = {
+        'url': 'https://live.aliexpress.com/live/2800002704436634',
+        'md5': 'e729e25d47c5e557f2630eaf99b740a5',
+        'info_dict': {
+            'id': '2800002704436634',
+            'ext': 'mp4',
+            'title': 'CASIMA7.22',
+            'thumbnail': r're:http://.*\.jpg',
+            'uploader': 'CASIMA Official Store',
+            'timestamp': 1500717600,
+            'upload_date': '20170722',
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        data = self._parse_json(
+            self._search_regex(
+                r'(?s)runParams\s*=\s*({.+?})\s*;?\s*var',
+                webpage, 'runParams'),
+            video_id)
+
+        title = data['title']
+
+        formats = self._extract_m3u8_formats(
+            data['replyStreamUrl'], video_id, 'mp4',
+            entry_protocol='m3u8_native', m3u8_id='hls')
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': data.get('coverUrl'),
+            'uploader': try_get(
+                data, lambda x: x['followBar']['name'], compat_str),
+            'timestamp': float_or_none(data.get('startTimeLong'), scale=1000),
+            'formats': formats,
+        }

+ 6 - 3
youtube_dl/extractor/aljazeera.py

@@ -4,9 +4,9 @@ from .common import InfoExtractor
 
 
 class AlJazeeraIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?aljazeera\.com/programmes/.*?/(?P<id>[^/]+)\.html'
+    _VALID_URL = r'https?://(?:www\.)?aljazeera\.com/(?:programmes|video)/.*?/(?P<id>[^/]+)\.html'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html',
         'info_dict': {
             'id': '3792260579001',
@@ -19,7 +19,10 @@ class AlJazeeraIE(InfoExtractor):
         },
         'add_ie': ['BrightcoveNew'],
         'skip': 'Not accessible from Travis CI server',
-    }
+    }, {
+        'url': 'http://www.aljazeera.com/video/news/2017/05/sierra-leone-709-carat-diamond-auctioned-170511100111930.html',
+        'only_matching': True,
+    }]
     BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/665003303001/default_default/index.html?videoId=%s'
 
     def _real_extract(self, url):

+ 33 - 11
youtube_dl/extractor/allocine.py

@@ -2,9 +2,13 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
+from ..compat import compat_str
 from ..utils import (
-    remove_end,
+    int_or_none,
     qualities,
+    remove_end,
+    try_get,
+    unified_timestamp,
     url_basename,
 )
 
@@ -22,6 +26,10 @@ class AllocineIE(InfoExtractor):
             'title': 'Astérix - Le Domaine des Dieux Teaser VF',
             'description': 'md5:4a754271d9c6f16c72629a8a993ee884',
             'thumbnail': r're:http://.*\.jpg',
+            'duration': 39,
+            'timestamp': 1404273600,
+            'upload_date': '20140702',
+            'view_count': int,
         },
     }, {
         'url': 'http://www.allocine.fr/video/player_gen_cmedia=19540403&cfilm=222257.html',
@@ -33,6 +41,10 @@ class AllocineIE(InfoExtractor):
             'title': 'Planes 2 Bande-annonce VF',
             'description': 'Regardez la bande annonce du film Planes 2 (Planes 2 Bande-annonce VF). Planes 2, un film de Roberts Gannaway',
             'thumbnail': r're:http://.*\.jpg',
+            'duration': 69,
+            'timestamp': 1385659800,
+            'upload_date': '20131128',
+            'view_count': int,
         },
     }, {
         'url': 'http://www.allocine.fr/video/player_gen_cmedia=19544709&cfilm=181290.html',
@@ -44,6 +56,10 @@ class AllocineIE(InfoExtractor):
             'title': 'Dragons 2 - Bande annonce finale VF',
             'description': 'md5:6cdd2d7c2687d4c6aafe80a35e17267a',
             'thumbnail': r're:http://.*\.jpg',
+            'duration': 144,
+            'timestamp': 1397589900,
+            'upload_date': '20140415',
+            'view_count': int,
         },
     }, {
         'url': 'http://www.allocine.fr/video/video-19550147/',
@@ -69,34 +85,37 @@ class AllocineIE(InfoExtractor):
             r'data-model="([^"]+)"', webpage, 'data model', default=None)
         if model:
             model_data = self._parse_json(model, display_id)
-
-            for video_url in model_data['sources'].values():
+            video = model_data['videos'][0]
+            title = video['title']
+            for video_url in video['sources'].values():
                 video_id, format_id = url_basename(video_url).split('_')[:2]
                 formats.append({
                     'format_id': format_id,
                     'quality': quality(format_id),
                     'url': video_url,
                 })
-
-            title = model_data['title']
+            duration = int_or_none(video.get('duration'))
+            view_count = int_or_none(video.get('view_count'))
+            timestamp = unified_timestamp(try_get(
+                video, lambda x: x['added_at']['date'], compat_str))
         else:
             video_id = display_id
             media_data = self._download_json(
                 'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id)
+            title = remove_end(
+                self._html_search_regex(
+                    r'(?s)<title>(.+?)</title>', webpage, 'title').strip(),
+                ' - AlloCiné')
             for key, value in media_data['video'].items():
                 if not key.endswith('Path'):
                     continue
-
                 format_id = key[:-len('Path')]
                 formats.append({
                     'format_id': format_id,
                     'quality': quality(format_id),
                     'url': value,
                 })
-
-            title = remove_end(self._html_search_regex(
-                r'(?s)<title>(.+?)</title>', webpage, 'title'
-            ).strip(), ' - AlloCiné')
+            duration, view_count, timestamp = [None] * 3
 
         self._sort_formats(formats)
 
@@ -104,7 +123,10 @@ class AllocineIE(InfoExtractor):
             'id': video_id,
             'display_id': display_id,
             'title': title,
+            'description': self._og_search_description(webpage),
             'thumbnail': self._og_search_thumbnail(webpage),
+            'duration': duration,
+            'timestamp': timestamp,
+            'view_count': view_count,
             'formats': formats,
-            'description': self._og_search_description(webpage),
         }

+ 33 - 13
youtube_dl/extractor/amcnetworks.py

@@ -3,14 +3,15 @@ from __future__ import unicode_literals
 
 from .theplatform import ThePlatformIE
 from ..utils import (
-    update_url_query,
-    parse_age_limit,
     int_or_none,
+    parse_age_limit,
+    try_get,
+    update_url_query,
 )
 
 
 class AMCNetworksIE(ThePlatformIE):
-    _VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies/|shows/[^/]+/(?:full-episodes/)?[^/]+/episode-\d+(?:-(?:[^/]+/)?|/))(?P<id>[^/?#]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies|shows(?:/[^/]+)+)/(?P<id>[^/?#]+)'
     _TESTS = [{
         'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1',
         'md5': '',
@@ -44,6 +45,12 @@ class AMCNetworksIE(ThePlatformIE):
     }, {
         'url': 'http://www.bbcamerica.com/shows/doctor-who/full-episodes/the-power-of-the-daleks/episode-01-episode-1-color-version',
         'only_matching': True,
+    }, {
+        'url': 'http://www.wetv.com/shows/mama-june-from-not-to-hot/full-episode/season-01/thin-tervention',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.wetv.com/shows/la-hair/videos/season-05/episode-09-episode-9-2/episode-9-sneak-peek-3',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -53,20 +60,31 @@ class AMCNetworksIE(ThePlatformIE):
             'mbr': 'true',
             'manifest': 'm3u',
         }
-        media_url = self._search_regex(r'window\.platformLinkURL\s*=\s*[\'"]([^\'"]+)', webpage, 'media url')
+        media_url = self._search_regex(
+            r'window\.platformLinkURL\s*=\s*[\'"]([^\'"]+)',
+            webpage, 'media url')
         theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
-            r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), display_id)
+            r'link\.theplatform\.com/s/([^?]+)',
+            media_url, 'theplatform_path'), display_id)
         info = self._parse_theplatform_metadata(theplatform_metadata)
         video_id = theplatform_metadata['pid']
         title = theplatform_metadata['title']
-        rating = theplatform_metadata['ratings'][0]['rating']
-        auth_required = self._search_regex(r'window\.authRequired\s*=\s*(true|false);', webpage, 'auth required')
+        rating = try_get(
+            theplatform_metadata, lambda x: x['ratings'][0]['rating'])
+        auth_required = self._search_regex(
+            r'window\.authRequired\s*=\s*(true|false);',
+            webpage, 'auth required')
         if auth_required == 'true':
-            requestor_id = self._search_regex(r'window\.requestor_id\s*=\s*[\'"]([^\'"]+)', webpage, 'requestor id')
-            resource = self._get_mvpd_resource(requestor_id, title, video_id, rating)
-            query['auth'] = self._extract_mvpd_auth(url, video_id, requestor_id, resource)
+            requestor_id = self._search_regex(
+                r'window\.requestor_id\s*=\s*[\'"]([^\'"]+)',
+                webpage, 'requestor id')
+            resource = self._get_mvpd_resource(
+                requestor_id, title, video_id, rating)
+            query['auth'] = self._extract_mvpd_auth(
+                url, video_id, requestor_id, resource)
         media_url = update_url_query(media_url, query)
-        formats, subtitles = self._extract_theplatform_smil(media_url, video_id)
+        formats, subtitles = self._extract_theplatform_smil(
+            media_url, video_id)
         self._sort_formats(formats)
         info.update({
             'id': video_id,
@@ -78,9 +96,11 @@ class AMCNetworksIE(ThePlatformIE):
         if ns_keys:
             ns = list(ns_keys)[0]
             series = theplatform_metadata.get(ns + '$show')
-            season_number = int_or_none(theplatform_metadata.get(ns + '$season'))
+            season_number = int_or_none(
+                theplatform_metadata.get(ns + '$season'))
             episode = theplatform_metadata.get(ns + '$episodeTitle')
-            episode_number = int_or_none(theplatform_metadata.get(ns + '$episode'))
+            episode_number = int_or_none(
+                theplatform_metadata.get(ns + '$episode'))
             if season_number:
                 title = 'Season %d - %s' % (season_number, title)
             if series:

+ 85 - 0
youtube_dl/extractor/americastestkitchen.py

@@ -0,0 +1,85 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    clean_html,
+    int_or_none,
+    try_get,
+    unified_strdate,
+)
+
+
+class AmericasTestKitchenIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/(?:episode|videos)/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://www.americastestkitchen.com/episode/548-summer-dinner-party',
+        'md5': 'b861c3e365ac38ad319cfd509c30577f',
+        'info_dict': {
+            'id': '1_5g5zua6e',
+            'title': 'Summer Dinner Party',
+            'ext': 'mp4',
+            'description': 'md5:858d986e73a4826979b6a5d9f8f6a1ec',
+            'thumbnail': r're:^https?://.*\.jpg',
+            'timestamp': 1497285541,
+            'upload_date': '20170612',
+            'uploader_id': 'roger.metcalf@americastestkitchen.com',
+            'release_date': '20170617',
+            'series': "America's Test Kitchen",
+            'season_number': 17,
+            'episode': 'Summer Dinner Party',
+            'episode_number': 24,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        partner_id = self._search_regex(
+            r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
+            webpage, 'kaltura partner id')
+
+        video_data = self._parse_json(
+            self._search_regex(
+                r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*</script>',
+                webpage, 'initial context'),
+            video_id)
+
+        ep_data = try_get(
+            video_data,
+            (lambda x: x['episodeDetail']['content']['data'],
+             lambda x: x['videoDetail']['content']['data']), dict)
+        ep_meta = ep_data.get('full_video', {})
+        external_id = ep_data.get('external_id') or ep_meta['external_id']
+
+        title = ep_data.get('title') or ep_meta.get('title')
+        description = clean_html(ep_meta.get('episode_description') or ep_data.get(
+            'description') or ep_meta.get('description'))
+        thumbnail = try_get(ep_meta, lambda x: x['photo']['image_url'])
+        release_date = unified_strdate(ep_data.get('aired_at'))
+
+        season_number = int_or_none(ep_meta.get('season_number'))
+        episode = ep_meta.get('title')
+        episode_number = int_or_none(ep_meta.get('episode_number'))
+
+        return {
+            '_type': 'url_transparent',
+            'url': 'kaltura:%s:%s' % (partner_id, external_id),
+            'ie_key': 'Kaltura',
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'release_date': release_date,
+            'series': "America's Test Kitchen",
+            'season_number': season_number,
+            'episode': episode,
+            'episode_number': episode_number,
+        }

+ 19 - 7
youtube_dl/extractor/amp.py

@@ -7,15 +7,19 @@ from ..utils import (
     parse_iso8601,
     mimetype2ext,
     determine_ext,
+    ExtractorError,
 )
 
 
 class AMPIE(InfoExtractor):
     # parse Akamai Adaptive Media Player feed
     def _extract_feed_info(self, url):
-        item = self._download_json(
+        feed = self._download_json(
             url, None, 'Downloading Akamai AMP feed',
-            'Unable to download Akamai AMP feed')['channel']['item']
+            'Unable to download Akamai AMP feed')
+        item = feed.get('channel', {}).get('item')
+        if not item:
+            raise ExtractorError('%s said: %s' % (self.IE_NAME, feed['error']))
 
         video_id = item['guid']
 
@@ -30,9 +34,12 @@ class AMPIE(InfoExtractor):
             if isinstance(media_thumbnail, dict):
                 media_thumbnail = [media_thumbnail]
             for thumbnail_data in media_thumbnail:
-                thumbnail = thumbnail_data['@attributes']
+                thumbnail = thumbnail_data.get('@attributes', {})
+                thumbnail_url = thumbnail.get('url')
+                if not thumbnail_url:
+                    continue
                 thumbnails.append({
-                    'url': self._proto_relative_url(thumbnail['url'], 'http:'),
+                    'url': self._proto_relative_url(thumbnail_url, 'http:'),
                     'width': int_or_none(thumbnail.get('width')),
                     'height': int_or_none(thumbnail.get('height')),
                 })
@@ -43,9 +50,14 @@ class AMPIE(InfoExtractor):
             if isinstance(media_subtitle, dict):
                 media_subtitle = [media_subtitle]
             for subtitle_data in media_subtitle:
-                subtitle = subtitle_data['@attributes']
-                lang = subtitle.get('lang') or 'en'
-                subtitles[lang] = [{'url': subtitle['href']}]
+                subtitle = subtitle_data.get('@attributes', {})
+                subtitle_href = subtitle.get('href')
+                if not subtitle_href:
+                    continue
+                subtitles.setdefault(subtitle.get('lang') or 'en', []).append({
+                    'url': subtitle_href,
+                    'ext': mimetype2ext(subtitle.get('type')) or determine_ext(subtitle_href),
+                })
 
         formats = []
         media_content = get_media_node('content')

+ 44 - 22
youtube_dl/extractor/animeondemand.py

@@ -3,16 +3,13 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_urlparse,
-    compat_str,
-)
+from ..compat import compat_str
 from ..utils import (
     determine_ext,
     extract_attributes,
     ExtractorError,
-    sanitized_Request,
     urlencode_postdata,
+    urljoin,
 )
 
 
@@ -21,6 +18,8 @@ class AnimeOnDemandIE(InfoExtractor):
     _LOGIN_URL = 'https://www.anime-on-demand.de/users/sign_in'
     _APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply'
     _NETRC_MACHINE = 'animeondemand'
+    # German-speaking countries of Europe
+    _GEO_COUNTRIES = ['AT', 'CH', 'DE', 'LI', 'LU']
     _TESTS = [{
         # jap, OmU
         'url': 'https://www.anime-on-demand.de/anime/161',
@@ -46,6 +45,10 @@ class AnimeOnDemandIE(InfoExtractor):
         # Full length film, non-series, ger/jap, Dub/OmU, account required
         'url': 'https://www.anime-on-demand.de/anime/185',
         'only_matching': True,
+    }, {
+        # Flash videos
+        'url': 'https://www.anime-on-demand.de/anime/12',
+        'only_matching': True,
     }]
 
     def _login(self):
@@ -72,19 +75,18 @@ class AnimeOnDemandIE(InfoExtractor):
             'post url', default=self._LOGIN_URL, group='url')
 
         if not post_url.startswith('http'):
-            post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
-
-        request = sanitized_Request(
-            post_url, urlencode_postdata(login_form))
-        request.add_header('Referer', self._LOGIN_URL)
+            post_url = urljoin(self._LOGIN_URL, post_url)
 
         response = self._download_webpage(
-            request, None, 'Logging in as %s' % username)
+            post_url, None, 'Logging in',
+            data=urlencode_postdata(login_form), headers={
+                'Referer': self._LOGIN_URL,
+            })
 
         if all(p not in response for p in ('>Logout<', 'href="/users/sign_out"')):
             error = self._search_regex(
-                r'<p class="alert alert-danger">(.+?)</p>',
-                response, 'error', default=None)
+                r'<p[^>]+\bclass=(["\'])(?:(?!\1).)*\balert\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</p>',
+                response, 'error', default=None, group='error')
             if error:
                 raise ExtractorError('Unable to login: %s' % error, expected=True)
             raise ExtractorError('Unable to log in')
@@ -120,10 +122,11 @@ class AnimeOnDemandIE(InfoExtractor):
             formats = []
 
             for input_ in re.findall(
-                    r'<input[^>]+class=["\'].*?streamstarter_html5[^>]+>', html):
+                    r'<input[^>]+class=["\'].*?streamstarter[^>]+>', html):
                 attributes = extract_attributes(input_)
+                title = attributes.get('data-dialog-header')
                 playlist_urls = []
-                for playlist_key in ('data-playlist', 'data-otherplaylist'):
+                for playlist_key in ('data-playlist', 'data-otherplaylist', 'data-stream'):
                     playlist_url = attributes.get(playlist_key)
                     if isinstance(playlist_url, compat_str) and re.match(
                             r'/?[\da-zA-Z]+', playlist_url):
@@ -147,19 +150,38 @@ class AnimeOnDemandIE(InfoExtractor):
                         format_id_list.append(compat_str(num))
                     format_id = '-'.join(format_id_list)
                     format_note = ', '.join(filter(None, (kind, lang_note)))
-                    request = sanitized_Request(
-                        compat_urlparse.urljoin(url, playlist_url),
+                    item_id_list = []
+                    if format_id:
+                        item_id_list.append(format_id)
+                    item_id_list.append('videomaterial')
+                    playlist = self._download_json(
+                        urljoin(url, playlist_url), video_id,
+                        'Downloading %s JSON' % ' '.join(item_id_list),
                         headers={
                             'X-Requested-With': 'XMLHttpRequest',
                             'X-CSRF-Token': csrf_token,
                             'Referer': url,
                             'Accept': 'application/json, text/javascript, */*; q=0.01',
-                        })
-                    playlist = self._download_json(
-                        request, video_id, 'Downloading %s playlist JSON' % format_id,
-                        fatal=False)
+                        }, fatal=False)
                     if not playlist:
                         continue
+                    stream_url = playlist.get('streamurl')
+                    if stream_url:
+                        rtmp = re.search(
+                            r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+/))(?P<playpath>mp[34]:.+)',
+                            stream_url)
+                        if rtmp:
+                            formats.append({
+                                'url': rtmp.group('url'),
+                                'app': rtmp.group('app'),
+                                'play_path': rtmp.group('playpath'),
+                                'page_url': url,
+                                'player_url': 'https://www.anime-on-demand.de/assets/jwplayer.flash-55abfb34080700304d49125ce9ffb4a6.swf',
+                                'rtmp_real_time': True,
+                                'format_id': 'rtmp',
+                                'ext': 'flv',
+                            })
+                            continue
                     start_video = playlist.get('startvideo', 0)
                     playlist = playlist.get('playlist')
                     if not playlist or not isinstance(playlist, list):
@@ -222,7 +244,7 @@ class AnimeOnDemandIE(InfoExtractor):
                     f.update({
                         'id': '%s-%s' % (f['id'], m.group('kind').lower()),
                         'title': m.group('title'),
-                        'url': compat_urlparse.urljoin(url, m.group('href')),
+                        'url': urljoin(url, m.group('href')),
                     })
                     entries.append(f)
 

+ 70 - 14
youtube_dl/extractor/anvato.py

@@ -5,6 +5,7 @@ import base64
 import hashlib
 import json
 import random
+import re
 import time
 
 from .common import InfoExtractor
@@ -16,6 +17,8 @@ from ..utils import (
     intlist_to_bytes,
     int_or_none,
     strip_jsonp,
+    unescapeHTML,
+    unsmuggle_url,
 )
 
 
@@ -26,6 +29,8 @@ def md5_text(s):
 
 
 class AnvatoIE(InfoExtractor):
+    _VALID_URL = r'anvato:(?P<access_key_or_mcp>[^:]+):(?P<id>\d+)'
+
     # Copied from anvplayer.min.js
     _ANVACK_TABLE = {
         'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ',
@@ -114,6 +119,22 @@ class AnvatoIE(InfoExtractor):
         'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6_secure': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ'
     }
 
+    _MCP_TO_ACCESS_KEY_TABLE = {
+        'qa': 'anvato_mcpqa_demo_web_stage_18b55e00db5a13faa8d03ae6e41f6f5bcb15b922',
+        'lin': 'anvato_mcp_lin_web_prod_4c36fbfd4d8d8ecae6488656e21ac6d1ac972749',
+        'univison': 'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa',
+        'uni': 'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa',
+        'dev': 'anvato_mcp_fs2go_web_prod_c7b90a93e171469cdca00a931211a2f556370d0a',
+        'sps': 'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336',
+        'spsstg': 'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336',
+        'anv': 'anvato_mcp_anv_web_prod_791407490f4c1ef2a4bcb21103e0cb1bcb3352b3',
+        'gray': 'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900',
+        'hearst': 'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99',
+        'cbs': 'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe',
+        'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582'
+    }
+
+    _ANVP_RE = r'<script[^>]+\bdata-anvp\s*=\s*(["\'])(?P<anvp>(?:(?!\1).)+)\1'
     _AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce'
 
     def __init__(self, *args, **kwargs):
@@ -177,17 +198,16 @@ class AnvatoIE(InfoExtractor):
                 'tbr': tbr if tbr != 0 else None,
             }
 
-            if ext == 'm3u8' or media_format in ('m3u8', 'm3u8-variant'):
-                # Not using _extract_m3u8_formats here as individual media
-                # playlists are also included in published_urls.
-                if tbr is None:
-                    formats.append(self._m3u8_meta_format(video_url, ext='mp4', m3u8_id='hls'))
-                    continue
-                else:
-                    a_format.update({
-                        'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])),
-                        'ext': 'mp4',
-                    })
+            if media_format == 'm3u8' and tbr is not None:
+                a_format.update({
+                    'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])),
+                    'ext': 'mp4',
+                })
+            elif media_format == 'm3u8-variant' or ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                    m3u8_id='hls', fatal=False))
+                continue
             elif ext == 'mp3' or media_format == 'mp3':
                 a_format['vcodec'] = 'none'
             else:
@@ -222,9 +242,45 @@ class AnvatoIE(InfoExtractor):
             'subtitles': subtitles,
         }
 
+    @staticmethod
+    def _extract_urls(ie, webpage, video_id):
+        entries = []
+        for mobj in re.finditer(AnvatoIE._ANVP_RE, webpage):
+            anvplayer_data = ie._parse_json(
+                mobj.group('anvp'), video_id, transform_source=unescapeHTML,
+                fatal=False)
+            if not anvplayer_data:
+                continue
+            video = anvplayer_data.get('video')
+            if not isinstance(video, compat_str) or not video.isdigit():
+                continue
+            access_key = anvplayer_data.get('accessKey')
+            if not access_key:
+                mcp = anvplayer_data.get('mcp')
+                if mcp:
+                    access_key = AnvatoIE._MCP_TO_ACCESS_KEY_TABLE.get(
+                        mcp.lower())
+            if not access_key:
+                continue
+            entries.append(ie.url_result(
+                'anvato:%s:%s' % (access_key, video), ie=AnvatoIE.ie_key(),
+                video_id=video))
+        return entries
+
     def _extract_anvato_videos(self, webpage, video_id):
-        anvplayer_data = self._parse_json(self._html_search_regex(
-            r'<script[^>]+data-anvp=\'([^\']+)\'', webpage,
-            'Anvato player data'), video_id)
+        anvplayer_data = self._parse_json(
+            self._html_search_regex(
+                self._ANVP_RE, webpage, 'Anvato player data', group='anvp'),
+            video_id)
         return self._get_anvato_videos(
             anvplayer_data['accessKey'], anvplayer_data['video'])
+
+    def _real_extract(self, url):
+        url, smuggled_data = unsmuggle_url(url, {})
+        self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
+
+        mobj = re.match(self._VALID_URL, url)
+        access_key, video_id = mobj.group('access_key_or_mcp', 'id')
+        if access_key not in self._ANVACK_TABLE:
+            access_key = self._MCP_TO_ACCESS_KEY_TABLE[access_key]
+        return self._get_anvato_videos(access_key, video_id)

+ 30 - 19
youtube_dl/extractor/aparat.py

@@ -3,13 +3,13 @@ from __future__ import unicode_literals
 
 from .common import InfoExtractor
 from ..utils import (
-    ExtractorError,
-    HEADRequest,
+    int_or_none,
+    mimetype2ext,
 )
 
 
 class AparatIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
 
     _TEST = {
         'url': 'http://www.aparat.com/v/wP8On',
@@ -29,30 +29,41 @@ class AparatIE(InfoExtractor):
         # Note: There is an easier-to-parse configuration at
         # http://www.aparat.com/video/video/config/videohash/%video_id
         # but the URL in there does not work
-        embed_url = 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id
-        webpage = self._download_webpage(embed_url, video_id)
-
-        file_list = self._parse_json(self._search_regex(
-            r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage, 'file list'), video_id)
-        for i, item in enumerate(file_list[0]):
-            video_url = item['file']
-            req = HEADRequest(video_url)
-            res = self._request_webpage(
-                req, video_id, note='Testing video URL %d' % i, errnote=False)
-            if res:
-                break
-        else:
-            raise ExtractorError('No working video URLs found')
+        webpage = self._download_webpage(
+            'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
+            video_id)
 
         title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title')
+
+        file_list = self._parse_json(
+            self._search_regex(
+                r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage,
+                'file list'),
+            video_id)
+
+        formats = []
+        for item in file_list[0]:
+            file_url = item.get('file')
+            if not file_url:
+                continue
+            ext = mimetype2ext(item.get('type'))
+            label = item.get('label')
+            formats.append({
+                'url': file_url,
+                'ext': ext,
+                'format_id': label or ext,
+                'height': int_or_none(self._search_regex(
+                    r'(\d+)[pP]', label or '', 'height', default=None)),
+            })
+        self._sort_formats(formats)
+
         thumbnail = self._search_regex(
             r'image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
 
         return {
             'id': video_id,
             'title': title,
-            'url': video_url,
-            'ext': 'mp4',
             'thumbnail': thumbnail,
             'age_limit': self._family_friendly_search(webpage),
+            'formats': formats,
         }

+ 2 - 2
youtube_dl/extractor/appleconnect.py

@@ -12,13 +12,13 @@ class AppleConnectIE(InfoExtractor):
     _VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/idsa\.(?P<id>[\w-]+)'
     _TEST = {
         'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
-        'md5': '10d0f2799111df4cb1c924520ca78f98',
+        'md5': 'e7c38568a01ea45402570e6029206723',
         'info_dict': {
             'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
             'ext': 'm4v',
             'title': 'Energy',
             'uploader': 'Drake',
-            'thumbnail': 'http://is5.mzstatic.com/image/thumb/Video5/v4/78/61/c5/7861c5fa-ad6d-294b-1464-cf7605b911d6/source/1920x1080sr.jpg',
+            'thumbnail': r're:^https?://.*\.jpg$',
             'upload_date': '20150710',
             'timestamp': 1436545535,
         },

+ 5 - 4
youtube_dl/extractor/appletrailers.py

@@ -70,7 +70,8 @@ class AppleTrailersIE(InfoExtractor):
     }, {
         'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/',
         'info_dict': {
-            'id': 'blackthorn',
+            'id': '4489',
+            'title': 'Blackthorn',
         },
         'playlist_mincount': 2,
         'expected_warnings': ['Unable to download JSON metadata'],
@@ -116,7 +117,7 @@ class AppleTrailersIE(InfoExtractor):
                             continue
                         formats.append({
                             'format_id': '%s-%s' % (version, size),
-                            'url': re.sub(r'_(\d+p.mov)', r'_h\1', src),
+                            'url': re.sub(r'_(\d+p\.mov)', r'_h\1', src),
                             'width': int_or_none(size_data.get('width')),
                             'height': int_or_none(size_data.get('height')),
                             'language': version[:2],
@@ -178,7 +179,7 @@ class AppleTrailersIE(InfoExtractor):
             formats = []
             for format in settings['metadata']['sizes']:
                 # The src is a file pointing to the real video file
-                format_url = re.sub(r'_(\d*p.mov)', r'_h\1', format['src'])
+                format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', format['src'])
                 formats.append({
                     'url': format_url,
                     'format': format['type'],
@@ -261,7 +262,7 @@ class AppleTrailersSectionIE(InfoExtractor):
             'title': 'Most Popular',
             'id': 'mostpopular',
         },
-        'playlist_mincount': 80,
+        'playlist_mincount': 30,
     }, {
         'url': 'http://trailers.apple.com/#section=moviestudios',
         'info_dict': {

+ 4 - 4
youtube_dl/extractor/archiveorg.py

@@ -1,13 +1,13 @@
 from __future__ import unicode_literals
 
-from .jwplatform import JWPlatformBaseIE
+from .common import InfoExtractor
 from ..utils import (
     unified_strdate,
     clean_html,
 )
 
 
-class ArchiveOrgIE(JWPlatformBaseIE):
+class ArchiveOrgIE(InfoExtractor):
     IE_NAME = 'archive.org'
     IE_DESC = 'archive.org videos'
     _VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^/?#]+)(?:[?].*)?$'
@@ -24,12 +24,12 @@ class ArchiveOrgIE(JWPlatformBaseIE):
         }
     }, {
         'url': 'https://archive.org/details/Cops1922',
-        'md5': 'bc73c8ab3838b5a8fc6c6651fa7b58ba',
+        'md5': '0869000b4ce265e8ca62738b336b268a',
         'info_dict': {
             'id': 'Cops1922',
             'ext': 'mp4',
             'title': 'Buster Keaton\'s "Cops" (1922)',
-            'description': 'md5:b4544662605877edd99df22f9620d858',
+            'description': 'md5:89e7c77bf5d965dd5c0372cfb49470f6',
         }
     }, {
         'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect',

+ 16 - 7
youtube_dl/extractor/ard.py

@@ -5,6 +5,7 @@ import re
 
 from .common import InfoExtractor
 from .generic import GenericIE
+from ..compat import compat_str
 from ..utils import (
     determine_ext,
     ExtractorError,
@@ -93,6 +94,7 @@ class ARDMediathekIE(InfoExtractor):
 
         duration = int_or_none(media_info.get('_duration'))
         thumbnail = media_info.get('_previewImage')
+        is_live = media_info.get('_isLive') is True
 
         subtitles = {}
         subtitle_url = media_info.get('_subtitleUrl')
@@ -106,6 +108,7 @@ class ARDMediathekIE(InfoExtractor):
             'id': video_id,
             'duration': duration,
             'thumbnail': thumbnail,
+            'is_live': is_live,
             'formats': formats,
             'subtitles': subtitles,
         }
@@ -124,6 +127,8 @@ class ARDMediathekIE(InfoExtractor):
                 quality = stream.get('_quality')
                 server = stream.get('_server')
                 for stream_url in stream_urls:
+                    if not isinstance(stream_url, compat_str) or '//' not in stream_url:
+                        continue
                     ext = determine_ext(stream_url)
                     if quality != 'auto' and ext in ('f4m', 'm3u8'):
                         continue
@@ -144,13 +149,11 @@ class ARDMediathekIE(InfoExtractor):
                                 'play_path': stream_url,
                                 'format_id': 'a%s-rtmp-%s' % (num, quality),
                             }
-                        elif stream_url.startswith('http'):
+                        else:
                             f = {
                                 'url': stream_url,
                                 'format_id': 'a%s-%s-%s' % (num, ext, quality)
                             }
-                        else:
-                            continue
                         m = re.search(r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$', stream_url)
                         if m:
                             f.update({
@@ -166,9 +169,11 @@ class ARDMediathekIE(InfoExtractor):
         # determine video id from url
         m = re.match(self._VALID_URL, url)
 
+        document_id = None
+
         numid = re.search(r'documentId=([0-9]+)', url)
         if numid:
-            video_id = numid.group(1)
+            document_id = video_id = numid.group(1)
         else:
             video_id = m.group('video_id')
 
@@ -191,7 +196,7 @@ class ARDMediathekIE(InfoExtractor):
 
         title = self._html_search_regex(
             [r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
-             r'<meta name="dcterms.title" content="(.*?)"/>',
+             r'<meta name="dcterms\.title" content="(.*?)"/>',
              r'<h4 class="headline">(.*?)</h4>'],
             webpage, 'title')
         description = self._html_search_meta(
@@ -228,12 +233,16 @@ class ARDMediathekIE(InfoExtractor):
                 'formats': formats,
             }
         else:  # request JSON file
+            if not document_id:
+                video_id = self._search_regex(
+                    r'/play/(?:config|media)/(\d+)', webpage, 'media id')
             info = self._extract_media_info(
-                'http://www.ardmediathek.de/play/media/%s' % video_id, webpage, video_id)
+                'http://www.ardmediathek.de/play/media/%s' % video_id,
+                webpage, video_id)
 
         info.update({
             'id': video_id,
-            'title': title,
+            'title': self._live_title(title) if info.get('is_live') else title,
             'description': description,
             'thumbnail': thumbnail,
         })

+ 1 - 2
youtube_dl/extractor/arkena.py

@@ -93,8 +93,7 @@ class ArkenaIE(InfoExtractor):
                 exts = (mimetype2ext(f.get('Type')), determine_ext(f_url, None))
                 if kind == 'm3u8' or 'm3u8' in exts:
                     formats.extend(self._extract_m3u8_formats(
-                        f_url, video_id, 'mp4',
-                        entry_protocol='m3u8' if is_live else 'm3u8_native',
+                        f_url, video_id, 'mp4', 'm3u8_native',
                         m3u8_id=kind, fatal=False, live=is_live))
                 elif kind == 'flash' or 'f4m' in exts:
                     formats.extend(self._extract_f4m_formats(

+ 19 - 3
youtube_dl/extractor/arte.py

@@ -6,15 +6,18 @@ import re
 from .common import InfoExtractor
 from ..compat import (
     compat_parse_qs,
+    compat_str,
     compat_urllib_parse_urlparse,
 )
 from ..utils import (
+    ExtractorError,
     find_xpath_attr,
-    unified_strdate,
     get_element_by_attribute,
     int_or_none,
     NO_DEFAULT,
     qualities,
+    try_get,
+    unified_strdate,
 )
 
 # There are different sources of video in arte.tv, the extraction process
@@ -79,6 +82,16 @@ class ArteTVBaseIE(InfoExtractor):
         info = self._download_json(json_url, video_id)
         player_info = info['videoJsonPlayer']
 
+        vsr = try_get(player_info, lambda x: x['VSR'], dict)
+        if not vsr:
+            error = None
+            if try_get(player_info, lambda x: x['custom_msg']['type']) == 'error':
+                error = try_get(
+                    player_info, lambda x: x['custom_msg']['msg'], compat_str)
+            if not error:
+                error = 'Video %s is not available' % player_info.get('VID') or video_id
+            raise ExtractorError(error, expected=True)
+
         upload_date_str = player_info.get('shootingDate')
         if not upload_date_str:
             upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0]
@@ -107,7 +120,7 @@ class ArteTVBaseIE(InfoExtractor):
         langcode = LANGS.get(lang, lang)
 
         formats = []
-        for format_id, format_dict in player_info['VSR'].items():
+        for format_id, format_dict in vsr.items():
             f = dict(format_dict)
             versionCode = f.get('versionCode')
             l = re.escape(langcode)
@@ -180,7 +193,7 @@ class ArteTVBaseIE(InfoExtractor):
 
 class ArteTVPlus7IE(ArteTVBaseIE):
     IE_NAME = 'arte.tv:+7'
-    _VALID_URL = r'https?://(?:(?:www|sites)\.)?arte\.tv/[^/]+/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:(?:www|sites)\.)?arte\.tv/(?:[^/]+/)?(?P<lang>fr|de|en|es)/(?:videos/)?(?:[^/]+/)*(?P<id>[^/?#&]+)'
 
     _TESTS = [{
         'url': 'http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D',
@@ -188,6 +201,9 @@ class ArteTVPlus7IE(ArteTVBaseIE):
     }, {
         'url': 'http://sites.arte.tv/karambolage/de/video/karambolage-22',
         'only_matching': True,
+    }, {
+        'url': 'http://www.arte.tv/de/videos/048696-000-A/der-kluge-bauch-unser-zweites-gehirn',
+        'only_matching': True,
     }]
 
     @classmethod

+ 93 - 0
youtube_dl/extractor/asiancrush.py

@@ -0,0 +1,93 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .kaltura import KalturaIE
+from ..utils import (
+    extract_attributes,
+    remove_end,
+    urlencode_postdata,
+)
+
+
+class AsianCrushIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?asiancrush\.com/video/(?:[^/]+/)?0+(?P<id>\d+)v\b'
+    _TESTS = [{
+        'url': 'https://www.asiancrush.com/video/012869v/women-who-flirt/',
+        'md5': 'c3b740e48d0ba002a42c0b72857beae6',
+        'info_dict': {
+            'id': '1_y4tmjm5r',
+            'ext': 'mp4',
+            'title': 'Women Who Flirt',
+            'description': 'md5:3db14e9186197857e7063522cb89a805',
+            'timestamp': 1496936429,
+            'upload_date': '20170608',
+            'uploader_id': 'craig@crifkin.com',
+        },
+    }, {
+        'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        data = self._download_json(
+            'https://www.asiancrush.com/wp-admin/admin-ajax.php', video_id,
+            data=urlencode_postdata({
+                'postid': video_id,
+                'action': 'get_channel_kaltura_vars',
+            }))
+
+        entry_id = data['entry_id']
+
+        return self.url_result(
+            'kaltura:%s:%s' % (data['partner_id'], entry_id),
+            ie=KalturaIE.ie_key(), video_id=entry_id,
+            video_title=data.get('vid_label'))
+
+
+class AsianCrushPlaylistIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?asiancrush\.com/series/0+(?P<id>\d+)s\b'
+    _TEST = {
+        'url': 'https://www.asiancrush.com/series/012481s/scholar-walks-night/',
+        'info_dict': {
+            'id': '12481',
+            'title': 'Scholar Who Walks the Night',
+            'description': 'md5:7addd7c5132a09fd4741152d96cce886',
+        },
+        'playlist_count': 20,
+    }
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, playlist_id)
+
+        entries = []
+
+        for mobj in re.finditer(
+                r'<a[^>]+href=(["\'])(?P<url>%s.*?)\1[^>]*>' % AsianCrushIE._VALID_URL,
+                webpage):
+            attrs = extract_attributes(mobj.group(0))
+            if attrs.get('class') == 'clearfix':
+                entries.append(self.url_result(
+                    mobj.group('url'), ie=AsianCrushIE.ie_key()))
+
+        title = remove_end(
+            self._html_search_regex(
+                r'(?s)<h1\b[^>]\bid=["\']movieTitle[^>]+>(.+?)</h1>', webpage,
+                'title', default=None) or self._og_search_title(
+                webpage, default=None) or self._html_search_meta(
+                'twitter:title', webpage, 'title',
+                default=None) or self._search_regex(
+                r'<title>([^<]+)</title>', webpage, 'title', fatal=False),
+            ' | AsianCrush')
+
+        description = self._og_search_description(
+            webpage, default=None) or self._html_search_meta(
+            'twitter:description', webpage, 'description', fatal=False)
+
+        return self.playlist_result(entries, playlist_id, title, description)

+ 13 - 8
youtube_dl/extractor/atresplayer.py

@@ -36,7 +36,7 @@ class AtresPlayerIE(InfoExtractor):
         },
         {
             'url': 'http://www.atresplayer.com/television/especial/videoencuentros/temporada-1/capitulo-112-david-bustamante_2014121600375.html',
-            'md5': '0d0e918533bbd4b263f2de4d197d4aac',
+            'md5': '6e52cbb513c405e403dbacb7aacf8747',
             'info_dict': {
                 'id': 'capitulo-112-david-bustamante',
                 'ext': 'flv',
@@ -87,10 +87,11 @@ class AtresPlayerIE(InfoExtractor):
             self._LOGIN_URL, urlencode_postdata(login_form))
         request.add_header('Content-Type', 'application/x-www-form-urlencoded')
         response = self._download_webpage(
-            request, None, 'Logging in as %s' % username)
+            request, None, 'Logging in')
 
         error = self._html_search_regex(
-            r'(?s)<ul class="list_error">(.+?)</ul>', response, 'error', default=None)
+            r'(?s)<ul[^>]+class="[^"]*\blist_error\b[^"]*">(.+?)</ul>',
+            response, 'error', default=None)
         if error:
             raise ExtractorError(
                 'Unable to login: %s' % error, expected=True)
@@ -155,13 +156,17 @@ class AtresPlayerIE(InfoExtractor):
             if format_id == 'token' or not video_url.startswith('http'):
                 continue
             if 'geodeswowsmpra3player' in video_url:
-                f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0]
-                f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path)
+                # f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0]
+                # f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path)
                 # this videos are protected by DRM, the f4m downloader doesn't support them
                 continue
-            else:
-                f4m_url = video_url[:-9] + '/manifest.f4m'
-            formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False))
+            video_url_hd = video_url.replace('free_es', 'es')
+            formats.extend(self._extract_f4m_formats(
+                video_url_hd[:-9] + '/manifest.f4m', video_id, f4m_id='hds',
+                fatal=False))
+            formats.extend(self._extract_mpd_formats(
+                video_url_hd[:-9] + '/manifest.mpd', video_id, mpd_id='dash',
+                fatal=False))
         self._sort_formats(formats)
 
         path_data = player.get('pathData')

+ 73 - 0
youtube_dl/extractor/atvat.py

@@ -0,0 +1,73 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    determine_ext,
+    int_or_none,
+    unescapeHTML,
+)
+
+
+class ATVAtIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?atv\.at/(?:[^/]+/){2}(?P<id>[dv]\d+)'
+    _TESTS = [{
+        'url': 'http://atv.at/aktuell/di-210317-2005-uhr/v1698449/',
+        'md5': 'c3b6b975fb3150fc628572939df205f2',
+        'info_dict': {
+            'id': '1698447',
+            'ext': 'mp4',
+            'title': 'DI, 21.03.17 | 20:05 Uhr 1/1',
+        }
+    }, {
+        'url': 'http://atv.at/aktuell/meinrad-knapp/d8416/',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        video_data = self._parse_json(unescapeHTML(self._search_regex(
+            r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="([^"]+)"',
+            webpage, 'player data')), display_id)['config']['initial_video']
+
+        video_id = video_data['id']
+        video_title = video_data['title']
+
+        parts = []
+        for part in video_data.get('parts', []):
+            part_id = part['id']
+            part_title = part['title']
+
+            formats = []
+            for source in part.get('sources', []):
+                source_url = source.get('src')
+                if not source_url:
+                    continue
+                ext = determine_ext(source_url)
+                if ext == 'm3u8':
+                    formats.extend(self._extract_m3u8_formats(
+                        source_url, part_id, 'mp4', 'm3u8_native',
+                        m3u8_id='hls', fatal=False))
+                else:
+                    formats.append({
+                        'format_id': source.get('delivery'),
+                        'url': source_url,
+                    })
+            self._sort_formats(formats)
+
+            parts.append({
+                'id': part_id,
+                'title': part_title,
+                'thumbnail': part.get('preview_image_url'),
+                'duration': int_or_none(part.get('duration')),
+                'is_live': part.get('is_livestream'),
+                'formats': formats,
+            })
+
+        return {
+            '_type': 'multi_video',
+            'id': video_id,
+            'title': video_title,
+            'entries': parts,
+        }

+ 2 - 2
youtube_dl/extractor/audioboom.py

@@ -16,7 +16,7 @@ class AudioBoomIE(InfoExtractor):
             'title': '3/09/2016 Czaban Hour 3',
             'description': 'Guest:   Nate Davis - NFL free agency,   Guest:   Stan Gans',
             'duration': 2245.72,
-            'uploader': 'Steve Czaban',
+            'uploader': 'SB Nation A.M.',
             'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/steveczabanyahoosportsradio',
         }
     }, {
@@ -43,7 +43,7 @@ class AudioBoomIE(InfoExtractor):
 
         def from_clip(field):
             if clip:
-                clip.get(field)
+                return clip.get(field)
 
         audio_url = from_clip('clipURLPriorToLoading') or self._og_search_property(
             'audio', webpage, 'audio url')

+ 78 - 0
youtube_dl/extractor/aws.py

@@ -0,0 +1,78 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import datetime
+import hashlib
+import hmac
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_urlencode
+
+
+class AWSIE(InfoExtractor):
+    _AWS_ALGORITHM = 'AWS4-HMAC-SHA256'
+    _AWS_REGION = 'us-east-1'
+
+    def _aws_execute_api(self, aws_dict, video_id, query=None):
+        query = query or {}
+        amz_date = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
+        date = amz_date[:8]
+        headers = {
+            'Accept': 'application/json',
+            'Host': self._AWS_PROXY_HOST,
+            'X-Amz-Date': amz_date,
+            'X-Api-Key': self._AWS_API_KEY
+        }
+        session_token = aws_dict.get('session_token')
+        if session_token:
+            headers['X-Amz-Security-Token'] = session_token
+
+        def aws_hash(s):
+            return hashlib.sha256(s.encode('utf-8')).hexdigest()
+
+        # Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
+        canonical_querystring = compat_urllib_parse_urlencode(query)
+        canonical_headers = ''
+        for header_name, header_value in sorted(headers.items()):
+            canonical_headers += '%s:%s\n' % (header_name.lower(), header_value)
+        signed_headers = ';'.join([header.lower() for header in sorted(headers.keys())])
+        canonical_request = '\n'.join([
+            'GET',
+            aws_dict['uri'],
+            canonical_querystring,
+            canonical_headers,
+            signed_headers,
+            aws_hash('')
+        ])
+
+        # Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html
+        credential_scope_list = [date, self._AWS_REGION, 'execute-api', 'aws4_request']
+        credential_scope = '/'.join(credential_scope_list)
+        string_to_sign = '\n'.join([self._AWS_ALGORITHM, amz_date, credential_scope, aws_hash(canonical_request)])
+
+        # Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html
+        def aws_hmac(key, msg):
+            return hmac.new(key, msg.encode('utf-8'), hashlib.sha256)
+
+        def aws_hmac_digest(key, msg):
+            return aws_hmac(key, msg).digest()
+
+        def aws_hmac_hexdigest(key, msg):
+            return aws_hmac(key, msg).hexdigest()
+
+        k_signing = ('AWS4' + aws_dict['secret_key']).encode('utf-8')
+        for value in credential_scope_list:
+            k_signing = aws_hmac_digest(k_signing, value)
+
+        signature = aws_hmac_hexdigest(k_signing, string_to_sign)
+
+        # Task 4: http://docs.aws.amazon.com/general/latest/gr/sigv4-add-signature-to-request.html
+        headers['Authorization'] = ', '.join([
+            '%s Credential=%s/%s' % (self._AWS_ALGORITHM, aws_dict['access_key'], credential_scope),
+            'SignedHeaders=%s' % signed_headers,
+            'Signature=%s' % signature,
+        ])
+
+        return self._download_json(
+            'https://%s%s%s' % (self._AWS_PROXY_HOST, aws_dict['uri'], '?' + canonical_querystring if canonical_querystring else ''),
+            video_id, headers=headers)

+ 213 - 0
youtube_dl/extractor/azmedien.py

@@ -0,0 +1,213 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .kaltura import KalturaIE
+from ..utils import (
+    get_element_by_class,
+    get_element_by_id,
+    strip_or_none,
+    urljoin,
+)
+
+
+class AZMedienBaseIE(InfoExtractor):
+    def _kaltura_video(self, partner_id, entry_id):
+        return self.url_result(
+            'kaltura:%s:%s' % (partner_id, entry_id), ie=KalturaIE.ie_key(),
+            video_id=entry_id)
+
+
+class AZMedienIE(AZMedienBaseIE):
+    IE_DESC = 'AZ Medien videos'
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:www\.)?
+                        (?:
+                            telezueri\.ch|
+                            telebaern\.tv|
+                            telem1\.ch
+                        )/
+                        [0-9]+-show-[^/\#]+
+                        (?:
+                            /[0-9]+-episode-[^/\#]+
+                            (?:
+                                /[0-9]+-segment-(?:[^/\#]+\#)?|
+                                \#
+                            )|
+                            \#
+                        )
+                        (?P<id>[^\#]+)
+                    '''
+
+    _TESTS = [{
+        # URL with 'segment'
+        'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom',
+        'info_dict': {
+            'id': '1_2444peh4',
+            'ext': 'mp4',
+            'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom',
+            'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8',
+            'uploader_id': 'TeleZ?ri',
+            'upload_date': '20161218',
+            'timestamp': 1482084490,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        # URL with 'segment' and fragment:
+        'url': 'http://www.telebaern.tv/118-show-news/14240-episode-dienstag-17-januar-2017/33666-segment-achtung-gefahr#zu-wenig-pflegerinnen-und-pfleger',
+        'only_matching': True
+    }, {
+        # URL with 'episode' and fragment:
+        'url': 'http://www.telem1.ch/47-show-sonntalk/13986-episode-soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz#soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz',
+        'only_matching': True
+    }, {
+        # URL with 'show' and fragment:
+        'url': 'http://www.telezueri.ch/66-show-sonntalk#burka-plakate-trump-putin-china-besuch',
+        'only_matching': True
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        partner_id = self._search_regex(
+            r'<script[^>]+src=["\'](?:https?:)?//(?:[^/]+\.)?kaltura\.com(?:/[^/]+)*/(?:p|partner_id)/([0-9]+)',
+            webpage, 'kaltura partner id')
+        entry_id = self._html_search_regex(
+            r'<a[^>]+data-id=(["\'])(?P<id>(?:(?!\1).)+)\1[^>]+data-slug=["\']%s'
+            % re.escape(video_id), webpage, 'kaltura entry id', group='id')
+
+        return self._kaltura_video(partner_id, entry_id)
+
+
+class AZMedienPlaylistIE(AZMedienBaseIE):
+    IE_DESC = 'AZ Medien playlists'
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:www\.)?
+                        (?:
+                            telezueri\.ch|
+                            telebaern\.tv|
+                            telem1\.ch
+                        )/
+                        (?P<id>[0-9]+-
+                            (?:
+                                show|
+                                topic|
+                                themen
+                            )-[^/\#]+
+                            (?:
+                                /[0-9]+-episode-[^/\#]+
+                            )?
+                        )$
+                    '''
+
+    _TESTS = [{
+        # URL with 'episode'
+        'url': 'http://www.telebaern.tv/118-show-news/13735-episode-donnerstag-15-dezember-2016',
+        'info_dict': {
+            'id': '118-show-news/13735-episode-donnerstag-15-dezember-2016',
+            'title': 'News - Donnerstag, 15. Dezember 2016',
+        },
+        'playlist_count': 9,
+    }, {
+        # URL with 'themen'
+        'url': 'http://www.telem1.ch/258-themen-tele-m1-classics',
+        'info_dict': {
+            'id': '258-themen-tele-m1-classics',
+            'title': 'Tele M1 Classics',
+        },
+        'playlist_mincount': 15,
+    }, {
+        # URL with 'topic', contains nested playlists
+        'url': 'http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen',
+        'only_matching': True,
+    }, {
+        # URL with 'show' only
+        'url': 'http://www.telezueri.ch/86-show-talktaeglich',
+        'only_matching': True
+    }]
+
+    def _real_extract(self, url):
+        show_id = self._match_id(url)
+        webpage = self._download_webpage(url, show_id)
+
+        entries = []
+
+        partner_id = self._search_regex(
+            r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
+            webpage, 'kaltura partner id', default=None)
+
+        if partner_id:
+            entries = [
+                self._kaltura_video(partner_id, m.group('id'))
+                for m in re.finditer(
+                    r'data-id=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage)]
+
+        if not entries:
+            entries = [
+                self.url_result(m.group('url'), ie=AZMedienIE.ie_key())
+                for m in re.finditer(
+                    r'<a[^>]+data-real=(["\'])(?P<url>http.+?)\1', webpage)]
+
+        if not entries:
+            entries = [
+                # May contain nested playlists (e.g. [1]) thus no explicit
+                # ie_key
+                # 1. http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen)
+                self.url_result(urljoin(url, m.group('url')))
+                for m in re.finditer(
+                    r'<a[^>]+name=[^>]+href=(["\'])(?P<url>/.+?)\1', webpage)]
+
+        title = self._search_regex(
+            r'episodeShareTitle\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
+            webpage, 'title',
+            default=strip_or_none(get_element_by_id(
+                'video-title', webpage)), group='title')
+
+        return self.playlist_result(entries, show_id, title)
+
+
+class AZMedienShowPlaylistIE(AZMedienBaseIE):
+    IE_DESC = 'AZ Medien show playlists'
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:www\.)?
+                        (?:
+                            telezueri\.ch|
+                            telebaern\.tv|
+                            telem1\.ch
+                        )/
+                        (?:
+                            all-episodes|
+                            alle-episoden
+                        )/
+                        (?P<id>[^/?#&]+)
+                    '''
+
+    _TEST = {
+        'url': 'http://www.telezueri.ch/all-episodes/astrotalk',
+        'info_dict': {
+            'id': 'astrotalk',
+            'title': 'TeleZüri: AstroTalk - alle episoden',
+            'description': 'md5:4c0f7e7d741d906004266e295ceb4a26',
+        },
+        'playlist_mincount': 13,
+    }
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+        webpage = self._download_webpage(url, playlist_id)
+        episodes = get_element_by_class('search-mobile-box', webpage)
+        entries = [self.url_result(
+            urljoin(url, m.group('url'))) for m in re.finditer(
+                r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', episodes)]
+        title = self._og_search_title(webpage, fatal=False)
+        description = self._og_search_description(webpage)
+        return self.playlist_result(entries, playlist_id, title, description)

+ 0 - 140
youtube_dl/extractor/azubu.py

@@ -1,140 +0,0 @@
-from __future__ import unicode_literals
-
-import json
-
-from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
-    float_or_none,
-    sanitized_Request,
-)
-
-
-class AzubuIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?azubu\.(?:tv|uol.com.br)/[^/]+#!/play/(?P<id>\d+)'
-    _TESTS = [
-        {
-            'url': 'http://www.azubu.tv/GSL#!/play/15575/2014-hot6-cup-last-big-match-ro8-day-1',
-            'md5': 'a88b42fcf844f29ad6035054bd9ecaf4',
-            'info_dict': {
-                'id': '15575',
-                'ext': 'mp4',
-                'title': '2014 HOT6 CUP LAST BIG MATCH Ro8 Day 1',
-                'description': 'md5:d06bdea27b8cc4388a90ad35b5c66c01',
-                'thumbnail': r're:^https?://.*\.jpe?g',
-                'timestamp': 1417523507.334,
-                'upload_date': '20141202',
-                'duration': 9988.7,
-                'uploader': 'GSL',
-                'uploader_id': 414310,
-                'view_count': int,
-            },
-        },
-        {
-            'url': 'http://www.azubu.tv/FnaticTV#!/play/9344/-fnatic-at-worlds-2014:-toyz---%22i-love-rekkles,-he-has-amazing-mechanics%22-',
-            'md5': 'b72a871fe1d9f70bd7673769cdb3b925',
-            'info_dict': {
-                'id': '9344',
-                'ext': 'mp4',
-                'title': 'Fnatic at Worlds 2014: Toyz - "I love Rekkles, he has amazing mechanics"',
-                'description': 'md5:4a649737b5f6c8b5c5be543e88dc62af',
-                'thumbnail': r're:^https?://.*\.jpe?g',
-                'timestamp': 1410530893.320,
-                'upload_date': '20140912',
-                'duration': 172.385,
-                'uploader': 'FnaticTV',
-                'uploader_id': 272749,
-                'view_count': int,
-            },
-            'skip': 'Channel offline',
-        },
-    ]
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-
-        data = self._download_json(
-            'http://www.azubu.tv/api/video/%s' % video_id, video_id)['data']
-
-        title = data['title'].strip()
-        description = data.get('description')
-        thumbnail = data.get('thumbnail')
-        view_count = data.get('view_count')
-        user = data.get('user', {})
-        uploader = user.get('username')
-        uploader_id = user.get('id')
-
-        stream_params = json.loads(data['stream_params'])
-
-        timestamp = float_or_none(stream_params.get('creationDate'), 1000)
-        duration = float_or_none(stream_params.get('length'), 1000)
-
-        renditions = stream_params.get('renditions') or []
-        video = stream_params.get('FLVFullLength') or stream_params.get('videoFullLength')
-        if video:
-            renditions.append(video)
-
-        if not renditions and not user.get('channel', {}).get('is_live', True):
-            raise ExtractorError('%s said: channel is offline.' % self.IE_NAME, expected=True)
-
-        formats = [{
-            'url': fmt['url'],
-            'width': fmt['frameWidth'],
-            'height': fmt['frameHeight'],
-            'vbr': float_or_none(fmt['encodingRate'], 1000),
-            'filesize': fmt['size'],
-            'vcodec': fmt['videoCodec'],
-            'container': fmt['videoContainer'],
-        } for fmt in renditions if fmt['url']]
-        self._sort_formats(formats)
-
-        return {
-            'id': video_id,
-            'title': title,
-            'description': description,
-            'thumbnail': thumbnail,
-            'timestamp': timestamp,
-            'duration': duration,
-            'uploader': uploader,
-            'uploader_id': uploader_id,
-            'view_count': view_count,
-            'formats': formats,
-        }
-
-
-class AzubuLiveIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?azubu\.(?:tv|uol.com.br)/(?P<id>[^/]+)$'
-
-    _TESTS = [{
-        'url': 'http://www.azubu.tv/MarsTVMDLen',
-        'only_matching': True,
-    }, {
-        'url': 'http://azubu.uol.com.br/adolfz',
-        'only_matching': True,
-    }]
-
-    def _real_extract(self, url):
-        user = self._match_id(url)
-
-        info = self._download_json(
-            'http://api.azubu.tv/public/modules/last-video/{0}/info'.format(user),
-            user)['data']
-        if info['type'] != 'STREAM':
-            raise ExtractorError('{0} is not streaming live'.format(user), expected=True)
-
-        req = sanitized_Request(
-            'https://edge-elb.api.brightcove.com/playback/v1/accounts/3361910549001/videos/ref:' + info['reference_id'])
-        req.add_header('Accept', 'application/json;pk=BCpkADawqM1gvI0oGWg8dxQHlgT8HkdE2LnAlWAZkOlznO39bSZX726u4JqnDsK3MDXcO01JxXK2tZtJbgQChxgaFzEVdHRjaDoxaOu8hHOO8NYhwdxw9BzvgkvLUlpbDNUuDoc4E4wxDToV')
-        bc_info = self._download_json(req, user)
-        m3u8_url = next(source['src'] for source in bc_info['sources'] if source['container'] == 'M2TS')
-        formats = self._extract_m3u8_formats(m3u8_url, user, ext='mp4')
-        self._sort_formats(formats)
-
-        return {
-            'id': info['id'],
-            'title': self._live_title(info['title']),
-            'uploader_id': user,
-            'formats': formats,
-            'is_live': True,
-            'thumbnail': bc_info['poster'],
-        }

+ 1 - 1
youtube_dl/extractor/bambuser.py

@@ -59,7 +59,7 @@ class BambuserIE(InfoExtractor):
             self._LOGIN_URL, urlencode_postdata(login_form))
         request.add_header('Referer', self._LOGIN_URL)
         response = self._download_webpage(
-            request, None, 'Logging in as %s' % username)
+            request, None, 'Logging in')
 
         login_error = self._html_search_regex(
             r'(?s)<div class="messages error">(.+?)</div>',

+ 128 - 11
youtube_dl/extractor/bandcamp.py

@@ -14,14 +14,16 @@ from ..utils import (
     ExtractorError,
     float_or_none,
     int_or_none,
+    KNOWN_EXTENSIONS,
     parse_filesize,
     unescapeHTML,
     update_url_query,
+    unified_strdate,
 )
 
 
 class BandcampIE(InfoExtractor):
-    _VALID_URL = r'https?://.*?\.bandcamp\.com/track/(?P<title>.*)'
+    _VALID_URL = r'https?://.*?\.bandcamp\.com/track/(?P<title>[^/?#&]+)'
     _TESTS = [{
         'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
         'md5': 'c557841d5e50261777a6585648adf439',
@@ -34,12 +36,12 @@ class BandcampIE(InfoExtractor):
         '_skip': 'There is a limit of 200 free downloads / month for the test song'
     }, {
         'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
-        'md5': '73d0b3171568232574e45652f8720b5c',
+        'md5': '0369ace6b939f0927e62c67a1a8d9fa7',
         'info_dict': {
             'id': '2650410135',
-            'ext': 'mp3',
-            'title': 'Lanius (Battle)',
-            'uploader': 'Ben Prunty Music',
+            'ext': 'aiff',
+            'title': 'Ben Prunty - Lanius (Battle)',
+            'uploader': 'Ben Prunty',
         },
     }]
 
@@ -47,6 +49,7 @@ class BandcampIE(InfoExtractor):
         mobj = re.match(self._VALID_URL, url)
         title = mobj.group('title')
         webpage = self._download_webpage(url, title)
+        thumbnail = self._html_search_meta('og:image', webpage, default=None)
         m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
         if not m_download:
             m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage)
@@ -75,6 +78,7 @@ class BandcampIE(InfoExtractor):
                 return {
                     'id': track_id,
                     'title': data['title'],
+                    'thumbnail': thumbnail,
                     'formats': formats,
                     'duration': float_or_none(data.get('duration')),
                 }
@@ -143,7 +147,7 @@ class BandcampIE(InfoExtractor):
         return {
             'id': video_id,
             'title': title,
-            'thumbnail': info.get('thumb_url'),
+            'thumbnail': info.get('thumb_url') or thumbnail,
             'uploader': info.get('artist'),
             'artist': artist,
             'track': track,
@@ -153,7 +157,7 @@ class BandcampIE(InfoExtractor):
 
 class BandcampAlbumIE(InfoExtractor):
     IE_NAME = 'Bandcamp:album'
-    _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^?#]+)|/?(?:$|[?#]))'
+    _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^/?#&]+))?'
 
     _TESTS = [{
         'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
@@ -209,20 +213,44 @@ class BandcampAlbumIE(InfoExtractor):
             'id': 'entropy-ep',
         },
         'playlist_mincount': 3,
+    }, {
+        # not all tracks have songs
+        'url': 'https://insulters.bandcamp.com/album/we-are-the-plague',
+        'info_dict': {
+            'id': 'we-are-the-plague',
+            'title': 'WE ARE THE PLAGUE',
+            'uploader_id': 'insulters',
+        },
+        'playlist_count': 2,
     }]
 
+    @classmethod
+    def suitable(cls, url):
+        return (False
+                if BandcampWeeklyIE.suitable(url) or BandcampIE.suitable(url)
+                else super(BandcampAlbumIE, cls).suitable(url))
+
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         uploader_id = mobj.group('subdomain')
         album_id = mobj.group('album_id')
         playlist_id = album_id or uploader_id
         webpage = self._download_webpage(url, playlist_id)
-        tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
-        if not tracks_paths:
+        track_elements = re.findall(
+            r'(?s)<div[^>]*>(.*?<a[^>]+href="([^"]+?)"[^>]+itemprop="url"[^>]*>.*?)</div>', webpage)
+        if not track_elements:
             raise ExtractorError('The page doesn\'t contain any tracks')
+        # Only tracks with duration info have songs
         entries = [
-            self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
-            for t_path in tracks_paths]
+            self.url_result(
+                compat_urlparse.urljoin(url, t_path),
+                ie=BandcampIE.ie_key(),
+                video_title=self._search_regex(
+                    r'<span\b[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)',
+                    elem_content, 'track title', fatal=False))
+            for elem_content, t_path in track_elements
+            if self._html_search_meta('duration', elem_content, default=None)]
+
         title = self._html_search_regex(
             r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"',
             webpage, 'title', fatal=False)
@@ -235,3 +263,92 @@ class BandcampAlbumIE(InfoExtractor):
             'title': title,
             'entries': entries,
         }
+
+
+class BandcampWeeklyIE(InfoExtractor):
+    IE_NAME = 'Bandcamp:weekly'
+    _VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://bandcamp.com/?show=224',
+        'md5': 'b00df799c733cf7e0c567ed187dea0fd',
+        'info_dict': {
+            'id': '224',
+            'ext': 'opus',
+            'title': 'BC Weekly April 4th 2017 - Magic Moments',
+            'description': 'md5:5d48150916e8e02d030623a48512c874',
+            'duration': 5829.77,
+            'release_date': '20170404',
+            'series': 'Bandcamp Weekly',
+            'episode': 'Magic Moments',
+            'episode_number': 208,
+            'episode_id': '224',
+        }
+    }, {
+        'url': 'https://bandcamp.com/?blah/blah@&show=228',
+        'only_matching': True
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        blob = self._parse_json(
+            self._search_regex(
+                r'data-blob=(["\'])(?P<blob>{.+?})\1', webpage,
+                'blob', group='blob'),
+            video_id, transform_source=unescapeHTML)
+
+        show = blob['bcw_show']
+
+        # This is desired because any invalid show id redirects to `bandcamp.com`
+        # which happens to expose the latest Bandcamp Weekly episode.
+        show_id = int_or_none(show.get('show_id')) or int_or_none(video_id)
+
+        formats = []
+        for format_id, format_url in show['audio_stream'].items():
+            if not isinstance(format_url, compat_str):
+                continue
+            for known_ext in KNOWN_EXTENSIONS:
+                if known_ext in format_id:
+                    ext = known_ext
+                    break
+            else:
+                ext = None
+            formats.append({
+                'format_id': format_id,
+                'url': format_url,
+                'ext': ext,
+                'vcodec': 'none',
+            })
+        self._sort_formats(formats)
+
+        title = show.get('audio_title') or 'Bandcamp Weekly'
+        subtitle = show.get('subtitle')
+        if subtitle:
+            title += ' - %s' % subtitle
+
+        episode_number = None
+        seq = blob.get('bcw_seq')
+
+        if seq and isinstance(seq, list):
+            try:
+                episode_number = next(
+                    int_or_none(e.get('episode_number'))
+                    for e in seq
+                    if isinstance(e, dict) and int_or_none(e.get('id')) == show_id)
+            except StopIteration:
+                pass
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': show.get('desc') or show.get('short_desc'),
+            'duration': float_or_none(show.get('audio_duration')),
+            'is_live': False,
+            'release_date': unified_strdate(show.get('published_date')),
+            'series': 'Bandcamp Weekly',
+            'episode': show.get('subtitle'),
+            'episode_number': episode_number,
+            'episode_id': compat_str(video_id),
+            'formats': formats
+        }

+ 74 - 9
youtube_dl/extractor/bbc.py

@@ -6,14 +6,18 @@ import itertools
 
 from .common import InfoExtractor
 from ..utils import (
+    clean_html,
     dict_get,
     ExtractorError,
     float_or_none,
+    get_element_by_class,
     int_or_none,
     parse_duration,
     parse_iso8601,
     try_get,
     unescapeHTML,
+    urlencode_postdata,
+    urljoin,
 )
 from ..compat import (
     compat_etree_fromstring,
@@ -25,19 +29,23 @@ from ..compat import (
 class BBCCoUkIE(InfoExtractor):
     IE_NAME = 'bbc.co.uk'
     IE_DESC = 'BBC iPlayer'
-    _ID_REGEX = r'[pb][\da-z]{7}'
+    _ID_REGEX = r'[pbw][\da-z]{7}'
     _VALID_URL = r'''(?x)
                     https?://
                         (?:www\.)?bbc\.co\.uk/
                         (?:
                             programmes/(?!articles/)|
                             iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
-                            music/clips[/#]|
-                            radio/player/
+                            music/(?:clips|audiovideo/popular)[/#]|
+                            radio/player/|
+                            events/[^/]+/play/[^/]+/
                         )
                         (?P<id>%s)(?!/(?:episodes|broadcasts|clips))
                     ''' % _ID_REGEX
 
+    _LOGIN_URL = 'https://account.bbc.com/signin'
+    _NETRC_MACHINE = 'bbc'
+
     _MEDIASELECTOR_URLS = [
         # Provides HQ HLS streams with even better quality that pc mediaset but fails
         # with geolocation in some cases when it's even not geo restricted at all (e.g.
@@ -222,8 +230,48 @@ class BBCCoUkIE(InfoExtractor):
         }, {
             'url': 'http://www.bbc.co.uk/radio/player/p03cchwf',
             'only_matching': True,
-        }
-    ]
+        }, {
+            'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55',
+            'only_matching': True,
+        }, {
+            'url': 'http://www.bbc.co.uk/programmes/w3csv1y9',
+            'only_matching': True,
+        }]
+
+    _USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
+
+    def _login(self):
+        username, password = self._get_login_info()
+        if username is None:
+            return
+
+        login_page = self._download_webpage(
+            self._LOGIN_URL, None, 'Downloading signin page')
+
+        login_form = self._hidden_inputs(login_page)
+
+        login_form.update({
+            'username': username,
+            'password': password,
+        })
+
+        post_url = urljoin(self._LOGIN_URL, self._search_regex(
+            r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
+            'post url', default=self._LOGIN_URL, group='url'))
+
+        response, urlh = self._download_webpage_handle(
+            post_url, None, 'Logging in', data=urlencode_postdata(login_form),
+            headers={'Referer': self._LOGIN_URL})
+
+        if self._LOGIN_URL in urlh.geturl():
+            error = clean_html(get_element_by_class('form-message', response))
+            if error:
+                raise ExtractorError(
+                    'Unable to login: %s' % error, expected=True)
+            raise ExtractorError('Unable to log in')
+
+    def _real_initialize(self):
+        self._login()
 
     class MediaSelectionError(Exception):
         def __init__(self, id):
@@ -336,6 +384,15 @@ class BBCCoUkIE(InfoExtractor):
                         formats.extend(self._extract_m3u8_formats(
                             href, programme_id, ext='mp4', entry_protocol='m3u8_native',
                             m3u8_id=format_id, fatal=False))
+                        if re.search(self._USP_RE, href):
+                            usp_formats = self._extract_m3u8_formats(
+                                re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href),
+                                programme_id, ext='mp4', entry_protocol='m3u8_native',
+                                m3u8_id=format_id, fatal=False)
+                            for f in usp_formats:
+                                if f.get('height') and f['height'] > 720:
+                                    continue
+                                formats.append(f)
                     elif transfer_format == 'hds':
                         formats.extend(self._extract_f4m_formats(
                             href, programme_id, f4m_id=format_id, fatal=False))
@@ -350,7 +407,7 @@ class BBCCoUkIE(InfoExtractor):
                             fmt.update({
                                 'width': width,
                                 'height': height,
-                                'vbr': bitrate,
+                                'tbr': bitrate,
                                 'vcodec': encoding,
                             })
                         else:
@@ -359,7 +416,7 @@ class BBCCoUkIE(InfoExtractor):
                                 'acodec': encoding,
                                 'vcodec': 'none',
                             })
-                        if protocol == 'http':
+                        if protocol in ('http', 'https'):
                             # Direct link
                             fmt.update({
                                 'url': href,
@@ -378,6 +435,8 @@ class BBCCoUkIE(InfoExtractor):
                                 'rtmp_live': False,
                                 'ext': 'flv',
                             })
+                        else:
+                            continue
                         formats.append(fmt)
             elif kind == 'captions':
                 subtitles = self.extract_subtitles(media, programme_id)
@@ -396,7 +455,7 @@ class BBCCoUkIE(InfoExtractor):
                 description = smp_config['summary']
                 for item in smp_config['items']:
                     kind = item['kind']
-                    if kind != 'programme' and kind != 'radioProgramme':
+                    if kind not in ('programme', 'radioProgramme'):
                         continue
                     programme_id = item.get('vpid')
                     duration = int_or_none(item.get('duration'))
@@ -437,7 +496,7 @@ class BBCCoUkIE(InfoExtractor):
 
         for item in self._extract_items(playlist):
             kind = item.get('kind')
-            if kind != 'programme' and kind != 'radioProgramme':
+            if kind not in ('programme', 'radioProgramme'):
                 continue
             title = playlist.find('./{%s}title' % self._EMP_PLAYLIST_NS).text
             description_el = playlist.find('./{%s}summary' % self._EMP_PLAYLIST_NS)
@@ -470,6 +529,12 @@ class BBCCoUkIE(InfoExtractor):
 
         webpage = self._download_webpage(url, group_id, 'Downloading video page')
 
+        error = self._search_regex(
+            r'<div\b[^>]+\bclass=["\']smp__message delta["\'][^>]*>([^<]+)<',
+            webpage, 'error', default=None)
+        if error:
+            raise ExtractorError(error, expected=True)
+
         programme_id = None
         duration = None
 

+ 188 - 0
youtube_dl/extractor/beampro.py

@@ -0,0 +1,188 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    clean_html,
+    compat_str,
+    float_or_none,
+    int_or_none,
+    parse_iso8601,
+    try_get,
+    urljoin,
+)
+
+
+class BeamProBaseIE(InfoExtractor):
+    _API_BASE = 'https://mixer.com/api/v1'
+    _RATINGS = {'family': 0, 'teen': 13, '18+': 18}
+
+    def _extract_channel_info(self, chan):
+        user_id = chan.get('userId') or try_get(chan, lambda x: x['user']['id'])
+        return {
+            'uploader': chan.get('token') or try_get(
+                chan, lambda x: x['user']['username'], compat_str),
+            'uploader_id': compat_str(user_id) if user_id else None,
+            'age_limit': self._RATINGS.get(chan.get('audience')),
+        }
+
+
+class BeamProLiveIE(BeamProBaseIE):
+    IE_NAME = 'Mixer:live'
+    _VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/(?P<id>[^/?#&]+)'
+    _TEST = {
+        'url': 'http://mixer.com/niterhayven',
+        'info_dict': {
+            'id': '261562',
+            'ext': 'mp4',
+            'title': 'Introducing The Witcher 3 //  The Grind Starts Now!',
+            'description': 'md5:0b161ac080f15fe05d18a07adb44a74d',
+            'thumbnail': r're:https://.*\.jpg$',
+            'timestamp': 1483477281,
+            'upload_date': '20170103',
+            'uploader': 'niterhayven',
+            'uploader_id': '373396',
+            'age_limit': 18,
+            'is_live': True,
+            'view_count': int,
+        },
+        'skip': 'niterhayven is offline',
+        'params': {
+            'skip_download': True,
+        },
+    }
+
+    _MANIFEST_URL_TEMPLATE = '%s/channels/%%s/manifest.%%s' % BeamProBaseIE._API_BASE
+
+    @classmethod
+    def suitable(cls, url):
+        return False if BeamProVodIE.suitable(url) else super(BeamProLiveIE, cls).suitable(url)
+
+    def _real_extract(self, url):
+        channel_name = self._match_id(url)
+
+        chan = self._download_json(
+            '%s/channels/%s' % (self._API_BASE, channel_name), channel_name)
+
+        if chan.get('online') is False:
+            raise ExtractorError(
+                '{0} is offline'.format(channel_name), expected=True)
+
+        channel_id = chan['id']
+
+        def manifest_url(kind):
+            return self._MANIFEST_URL_TEMPLATE % (channel_id, kind)
+
+        formats = self._extract_m3u8_formats(
+            manifest_url('m3u8'), channel_name, ext='mp4', m3u8_id='hls',
+            fatal=False)
+        formats.extend(self._extract_smil_formats(
+            manifest_url('smil'), channel_name, fatal=False))
+        self._sort_formats(formats)
+
+        info = {
+            'id': compat_str(chan.get('id') or channel_name),
+            'title': self._live_title(chan.get('name') or channel_name),
+            'description': clean_html(chan.get('description')),
+            'thumbnail': try_get(
+                chan, lambda x: x['thumbnail']['url'], compat_str),
+            'timestamp': parse_iso8601(chan.get('updatedAt')),
+            'is_live': True,
+            'view_count': int_or_none(chan.get('viewersTotal')),
+            'formats': formats,
+        }
+        info.update(self._extract_channel_info(chan))
+
+        return info
+
+
+class BeamProVodIE(BeamProBaseIE):
+    IE_NAME = 'Mixer:vod'
+    _VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P<id>\d+)'
+    _TEST = {
+        'url': 'https://mixer.com/willow8714?vod=2259830',
+        'md5': 'b2431e6e8347dc92ebafb565d368b76b',
+        'info_dict': {
+            'id': '2259830',
+            'ext': 'mp4',
+            'title': 'willow8714\'s Channel',
+            'duration': 6828.15,
+            'thumbnail': r're:https://.*source\.png$',
+            'timestamp': 1494046474,
+            'upload_date': '20170506',
+            'uploader': 'willow8714',
+            'uploader_id': '6085379',
+            'age_limit': 13,
+            'view_count': int,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }
+
+    @staticmethod
+    def _extract_format(vod, vod_type):
+        if not vod.get('baseUrl'):
+            return []
+
+        if vod_type == 'hls':
+            filename, protocol = 'manifest.m3u8', 'm3u8_native'
+        elif vod_type == 'raw':
+            filename, protocol = 'source.mp4', 'https'
+        else:
+            assert False
+
+        data = vod.get('data') if isinstance(vod.get('data'), dict) else {}
+
+        format_id = [vod_type]
+        if isinstance(data.get('Height'), compat_str):
+            format_id.append('%sp' % data['Height'])
+
+        return [{
+            'url': urljoin(vod['baseUrl'], filename),
+            'format_id': '-'.join(format_id),
+            'ext': 'mp4',
+            'protocol': protocol,
+            'width': int_or_none(data.get('Width')),
+            'height': int_or_none(data.get('Height')),
+            'fps': int_or_none(data.get('Fps')),
+            'tbr': int_or_none(data.get('Bitrate'), 1000),
+        }]
+
+    def _real_extract(self, url):
+        vod_id = self._match_id(url)
+
+        vod_info = self._download_json(
+            '%s/recordings/%s' % (self._API_BASE, vod_id), vod_id)
+
+        state = vod_info.get('state')
+        if state != 'AVAILABLE':
+            raise ExtractorError(
+                'VOD %s is not available (state: %s)' % (vod_id, state),
+                expected=True)
+
+        formats = []
+        thumbnail_url = None
+
+        for vod in vod_info['vods']:
+            vod_type = vod.get('format')
+            if vod_type in ('hls', 'raw'):
+                formats.extend(self._extract_format(vod, vod_type))
+            elif vod_type == 'thumbnail':
+                thumbnail_url = urljoin(vod.get('baseUrl'), 'source.png')
+
+        self._sort_formats(formats)
+
+        info = {
+            'id': vod_id,
+            'title': vod_info.get('name') or vod_id,
+            'duration': float_or_none(vod_info.get('duration')),
+            'thumbnail': thumbnail_url,
+            'timestamp': parse_iso8601(vod_info.get('createdAt')),
+            'view_count': int_or_none(vod_info.get('viewsTotal')),
+            'formats': formats,
+        }
+        info.update(self._extract_channel_info(vod_info.get('channel') or {}))
+
+        return info

+ 13 - 6
youtube_dl/extractor/beeg.py

@@ -9,6 +9,7 @@ from ..compat import (
 from ..utils import (
     int_or_none,
     parse_iso8601,
+    urljoin,
 )
 
 
@@ -16,7 +17,7 @@ class BeegIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)'
     _TEST = {
         'url': 'http://beeg.com/5416503',
-        'md5': '46c384def73b33dbc581262e5ee67cef',
+        'md5': 'a1a1b1a8bc70a89e49ccfd113aed0820',
         'info_dict': {
             'id': '5416503',
             'ext': 'mp4',
@@ -36,9 +37,11 @@ class BeegIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
 
         cpl_url = self._search_regex(
-            r'<script[^>]+src=(["\'])(?P<url>(?:https?:)?//static\.beeg\.com/cpl/\d+\.js.*?)\1',
+            r'<script[^>]+src=(["\'])(?P<url>(?:/static|(?:https?:)?//static\.beeg\.com)/cpl/\d+\.js.*?)\1',
             webpage, 'cpl', default=None, group='url')
 
+        cpl_url = urljoin(url, cpl_url)
+
         beeg_version, beeg_salt = [None] * 2
 
         if cpl_url:
@@ -54,12 +57,16 @@ class BeegIE(InfoExtractor):
                     r'beeg_salt\s*=\s*(["\'])(?P<beeg_salt>.+?)\1', cpl, 'beeg salt',
                     default=None, group='beeg_salt')
 
-        beeg_version = beeg_version or '2000'
+        beeg_version = beeg_version or '2185'
         beeg_salt = beeg_salt or 'pmweAkq8lAYKdfWcFCUj0yoVgoPlinamH5UE1CB3H'
 
-        video = self._download_json(
-            'https://api.beeg.com/api/v6/%s/video/%s' % (beeg_version, video_id),
-            video_id)
+        for api_path in ('', 'api.'):
+            video = self._download_json(
+                'https://%sbeeg.com/api/v6/%s/video/%s'
+                % (api_path, beeg_version, video_id), video_id,
+                fatal=api_path == 'api.')
+            if video:
+                break
 
         def split(o, e):
             def cut(s, x):

+ 10 - 2
youtube_dl/extractor/bellmedia.py

@@ -21,10 +21,11 @@ class BellMediaIE(InfoExtractor):
                 animalplanet|
                 bravo|
                 mtv|
-                space
+                space|
+                etalk
             )\.ca|
             much\.com
-        )/.*?(?:\bvid=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6})'''
+        )/.*?(?:\bvid(?:eoid)?=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
     _TESTS = [{
         'url': 'http://www.ctv.ca/video/player?vid=706966',
         'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0',
@@ -55,6 +56,12 @@ class BellMediaIE(InfoExtractor):
     }, {
         'url': 'http://www.much.com/shows/the-almost-impossible-gameshow/928979/episode-6',
         'only_matching': True,
+    }, {
+        'url': 'http://www.ctv.ca/DCs-Legends-of-Tomorrow/Video/S2E11-Turncoat-vid1051430',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.etalk.ca/video?videoid=663455',
+        'only_matching': True,
     }]
     _DOMAINS = {
         'thecomedynetwork': 'comedy',
@@ -62,6 +69,7 @@ class BellMediaIE(InfoExtractor):
         'sciencechannel': 'discsci',
         'investigationdiscovery': 'invdisc',
         'animalplanet': 'aniplan',
+        'etalk': 'ctv',
     }
 
     def _real_extract(self, url):

+ 149 - 13
youtube_dl/extractor/bilibili.py

@@ -5,19 +5,27 @@ import hashlib
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_parse_qs
+from ..compat import (
+    compat_parse_qs,
+    compat_urlparse,
+)
 from ..utils import (
+    ExtractorError,
     int_or_none,
     float_or_none,
+    parse_iso8601,
+    smuggle_url,
+    strip_jsonp,
     unified_timestamp,
+    unsmuggle_url,
     urlencode_postdata,
 )
 
 
 class BiliBiliIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/v/)(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/(?P<anime_id>\d+)/play#)(?P<id>\d+)'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.bilibili.tv/video/av1074402/',
         'md5': '9fa226fe2b8a9a4d5a69b4c6a183417e',
         'info_dict': {
@@ -32,25 +40,77 @@ class BiliBiliIE(InfoExtractor):
             'uploader': '菊子桑',
             'uploader_id': '156160',
         },
-    }
+    }, {
+        # Tested in BiliBiliBangumiIE
+        'url': 'http://bangumi.bilibili.com/anime/1869/play#40062',
+        'only_matching': True,
+    }, {
+        'url': 'http://bangumi.bilibili.com/anime/5802/play#100643',
+        'md5': '3f721ad1e75030cc06faf73587cfec57',
+        'info_dict': {
+            'id': '100643',
+            'ext': 'mp4',
+            'title': 'CHAOS;CHILD',
+            'description': '如果你是神明,并且能够让妄想成为现实。那你会进行怎么样的妄想?是淫靡的世界?独裁社会?毁灭性的制裁?还是……2015年,涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...',
+        },
+        'skip': 'Geo-restricted to China',
+    }, {
+        # Title with double quotes
+        'url': 'http://www.bilibili.com/video/av8903802/',
+        'info_dict': {
+            'id': '8903802',
+            'ext': 'mp4',
+            'title': '阿滴英文|英文歌分享#6 "Closer',
+            'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
+            'uploader': '阿滴英文',
+            'uploader_id': '65880958',
+            'timestamp': 1488382620,
+            'upload_date': '20170301',
+        },
+        'params': {
+            'skip_download': True,  # Test metadata only
+        },
+    }]
 
-    _APP_KEY = '6f90a59ac58a4123'
-    _BILIBILI_KEY = '0bfd84cc3940035173f35e6777508326'
+    _APP_KEY = '84956560bc028eb7'
+    _BILIBILI_KEY = '94aba54af9065f71de72f5508f1cd42e'
+
+    def _report_error(self, result):
+        if 'message' in result:
+            raise ExtractorError('%s said: %s' % (self.IE_NAME, result['message']), expected=True)
+        elif 'code' in result:
+            raise ExtractorError('%s returns error %d' % (self.IE_NAME, result['code']), expected=True)
+        else:
+            raise ExtractorError('Can\'t extract Bangumi episode ID')
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
+        url, smuggled_data = unsmuggle_url(url, {})
+
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        anime_id = mobj.group('anime_id')
         webpage = self._download_webpage(url, video_id)
 
-        if 'anime/v' not in url:
+        if 'anime/' not in url:
             cid = compat_parse_qs(self._search_regex(
                 [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
                  r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
                 webpage, 'player parameters'))['cid'][0]
         else:
+            if 'no_bangumi_tip' not in smuggled_data:
+                self.to_screen('Downloading episode %s. To download all videos in anime %s, re-run youtube-dl with %s' % (
+                    video_id, anime_id, compat_urlparse.urljoin(url, '//bangumi.bilibili.com/anime/%s' % anime_id)))
+            headers = {
+                'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+            }
+            headers.update(self.geo_verification_headers())
+
             js = self._download_json(
                 'http://bangumi.bilibili.com/web_api/get_source', video_id,
                 data=urlencode_postdata({'episode_id': video_id}),
-                headers={'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'})
+                headers=headers)
+            if 'result' not in js:
+                self._report_error(js)
             cid = js['result']['cid']
 
         payload = 'appkey=%s&cid=%s&otype=json&quality=2&type=mp4' % (self._APP_KEY, cid)
@@ -58,7 +118,11 @@ class BiliBiliIE(InfoExtractor):
 
         video_info = self._download_json(
             'http://interface.bilibili.com/playurl?%s&sign=%s' % (payload, sign),
-            video_id, note='Downloading video info page')
+            video_id, note='Downloading video info page',
+            headers=self.geo_verification_headers())
+
+        if 'durl' not in video_info:
+            self._report_error(video_info)
 
         entries = []
 
@@ -74,6 +138,11 @@ class BiliBiliIE(InfoExtractor):
                     'preference': -2 if 'hd.mp4' in backup_url else -3,
                 })
 
+            for a_format in formats:
+                a_format.setdefault('http_headers', {}).update({
+                    'Referer': url,
+                })
+
             self._sort_formats(formats)
 
             entries.append({
@@ -82,10 +151,10 @@ class BiliBiliIE(InfoExtractor):
                 'formats': formats,
             })
 
-        title = self._html_search_regex('<h1[^>]+title="([^"]+)">', webpage, 'title')
+        title = self._html_search_regex('<h1[^>]*>([^<]+)</h1>', webpage, 'title')
         description = self._html_search_meta('description', webpage)
         timestamp = unified_timestamp(self._html_search_regex(
-            r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False))
+            r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', default=None))
         thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage)
 
         # TODO 'view_count' requires deobfuscating Javascript
@@ -99,7 +168,7 @@ class BiliBiliIE(InfoExtractor):
         }
 
         uploader_mobj = re.search(
-            r'<a[^>]+href="https?://space\.bilibili\.com/(?P<id>\d+)"[^>]+title="(?P<name>[^"]+)"',
+            r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]+title="(?P<name>[^"]+)"',
             webpage)
         if uploader_mobj:
             info.update({
@@ -123,3 +192,70 @@ class BiliBiliIE(InfoExtractor):
                 'description': description,
                 'entries': entries,
             }
+
+
+class BiliBiliBangumiIE(InfoExtractor):
+    _VALID_URL = r'https?://bangumi\.bilibili\.com/anime/(?P<id>\d+)'
+
+    IE_NAME = 'bangumi.bilibili.com'
+    IE_DESC = 'BiliBili番剧'
+
+    _TESTS = [{
+        'url': 'http://bangumi.bilibili.com/anime/1869',
+        'info_dict': {
+            'id': '1869',
+            'title': '混沌武士',
+            'description': 'md5:6a9622b911565794c11f25f81d6a97d2',
+        },
+        'playlist_count': 26,
+    }, {
+        'url': 'http://bangumi.bilibili.com/anime/1869',
+        'info_dict': {
+            'id': '1869',
+            'title': '混沌武士',
+            'description': 'md5:6a9622b911565794c11f25f81d6a97d2',
+        },
+        'playlist': [{
+            'md5': '91da8621454dd58316851c27c68b0c13',
+            'info_dict': {
+                'id': '40062',
+                'ext': 'mp4',
+                'title': '混沌武士',
+                'description': '故事发生在日本的江户时代。风是一个小酒馆的打工女。一日,酒馆里来了一群恶霸,虽然他们的举动令风十分不满,但是毕竟风只是一届女流,无法对他们采取什么行动,只能在心里嘟哝。这时,酒家里又进来了个“不良份子...',
+                'timestamp': 1414538739,
+                'upload_date': '20141028',
+                'episode': '疾风怒涛 Tempestuous Temperaments',
+                'episode_number': 1,
+            },
+        }],
+        'params': {
+            'playlist_items': '1',
+        },
+    }]
+
+    @classmethod
+    def suitable(cls, url):
+        return False if BiliBiliIE.suitable(url) else super(BiliBiliBangumiIE, cls).suitable(url)
+
+    def _real_extract(self, url):
+        bangumi_id = self._match_id(url)
+
+        # Sometimes this API returns a JSONP response
+        season_info = self._download_json(
+            'http://bangumi.bilibili.com/jsonp/seasoninfo/%s.ver' % bangumi_id,
+            bangumi_id, transform_source=strip_jsonp)['result']
+
+        entries = [{
+            '_type': 'url_transparent',
+            'url': smuggle_url(episode['webplay_url'], {'no_bangumi_tip': 1}),
+            'ie_key': BiliBiliIE.ie_key(),
+            'timestamp': parse_iso8601(episode.get('update_time'), delimiter=' '),
+            'episode': episode.get('index_title'),
+            'episode_number': int_or_none(episode.get('index')),
+        } for episode in season_info['episodes']]
+
+        entries = sorted(entries, key=lambda entry: entry.get('episode_number'))
+
+        return self.playlist_result(
+            entries, bangumi_id,
+            season_info.get('bangumi_title'), season_info.get('evaluate'))

+ 3 - 7
youtube_dl/extractor/bleacherreport.py

@@ -35,7 +35,7 @@ class BleacherReportIE(InfoExtractor):
             'title': 'Aussie Golfers Get Fright of Their Lives After Being Chased by Angry Kangaroo',
             'timestamp': 1446839961,
             'uploader': 'Sean Fay',
-            'description': 'md5:825e94e0f3521df52fa83b2ed198fa20',
+            'description': 'md5:b1601e2314c4d8eec23b6eafe086a757',
             'uploader_id': 6466954,
             'upload_date': '20151011',
         },
@@ -90,17 +90,13 @@ class BleacherReportCMSIE(AMPIE):
     _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36})'
     _TESTS = [{
         'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
-        'md5': '8c2c12e3af7805152675446c905d159b',
+        'md5': '2e4b0a997f9228ffa31fada5c53d1ed1',
         'info_dict': {
             'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
-            'ext': 'mp4',
+            'ext': 'flv',
             'title': 'Cena vs. Rollins Would Expose the Heavyweight Division',
             'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e',
         },
-        'params': {
-            # m3u8 download
-            'skip_download': True,
-        },
     }]
 
     def _real_extract(self, url):

+ 8 - 3
youtube_dl/extractor/bloomberg.py

@@ -33,6 +33,10 @@ class BloombergIE(InfoExtractor):
         'params': {
             'format': 'best[format_id^=hds]',
         },
+    }, {
+        # data-bmmrid=
+        'url': 'https://www.bloomberg.com/politics/articles/2017-02-08/le-pen-aide-briefed-french-central-banker-on-plan-to-print-money',
+        'only_matching': True,
     }, {
         'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets',
         'only_matching': True,
@@ -45,9 +49,10 @@ class BloombergIE(InfoExtractor):
         name = self._match_id(url)
         webpage = self._download_webpage(url, name)
         video_id = self._search_regex(
-            (r'["\']bmmrId["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
-             r'videoId\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1'),
-            webpage, 'id', group='url', default=None)
+            (r'["\']bmmrId["\']\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
+             r'videoId\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
+             r'data-bmmrid=(["\'])(?P<id>(?:(?!\1).)+)\1'),
+            webpage, 'id', group='id', default=None)
         if not video_id:
             bplayer_data = self._parse_json(self._search_regex(
                 r'BPlayer\(null,\s*({[^;]+})\);', webpage, 'id'), name)

+ 72 - 0
youtube_dl/extractor/bostonglobe.py

@@ -0,0 +1,72 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+from ..utils import (
+    extract_attributes,
+)
+
+
+class BostonGlobeIE(InfoExtractor):
+    _VALID_URL = r'(?i)https?://(?:www\.)?bostonglobe\.com/.*/(?P<id>[^/]+)/\w+(?:\.html)?'
+    _TESTS = [
+        {
+            'url': 'http://www.bostonglobe.com/metro/2017/02/11/tree-finally-succumbs-disease-leaving-hole-neighborhood/h1b4lviqzMTIn9sVy8F3gP/story.html',
+            'md5': '0a62181079c85c2d2b618c9a738aedaf',
+            'info_dict': {
+                'title': 'A tree finally succumbs to disease, leaving a hole in a neighborhood',
+                'id': '5320421710001',
+                'ext': 'mp4',
+                'description': 'It arrived as a sapling when the Back Bay was in its infancy, a spindly American elm tamped down into a square of dirt cut into the brick sidewalk of 1880s Marlborough Street, no higher than the first bay window of the new brownstone behind it.',
+                'timestamp': 1486877593,
+                'upload_date': '20170212',
+                'uploader_id': '245991542',
+            },
+        },
+        {
+            # Embedded youtube video; we hand it off to the Generic extractor.
+            'url': 'https://www.bostonglobe.com/lifestyle/names/2017/02/17/does-ben-affleck-play-matt-damon-favorite-version-batman/ruqkc9VxKBYmh5txn1XhSI/story.html',
+            'md5': '582b40327089d5c0c949b3c54b13c24b',
+            'info_dict': {
+                'title': "Who Is Matt Damon's Favorite Batman?",
+                'id': 'ZW1QCnlA6Qc',
+                'ext': 'mp4',
+                'upload_date': '20170217',
+                'description': 'md5:3b3dccb9375867e0b4d527ed87d307cb',
+                'uploader': 'The Late Late Show with James Corden',
+                'uploader_id': 'TheLateLateShow',
+            },
+            'expected_warnings': ['404'],
+        },
+    ]
+
+    def _real_extract(self, url):
+        page_id = self._match_id(url)
+        webpage = self._download_webpage(url, page_id)
+
+        page_title = self._og_search_title(webpage, default=None)
+
+        # <video data-brightcove-video-id="5320421710001" data-account="245991542" data-player="SJWAiyYWg" data-embed="default" class="video-js" controls itemscope itemtype="http://schema.org/VideoObject">
+        entries = []
+        for video in re.findall(r'(?i)(<video[^>]+>)', webpage):
+            attrs = extract_attributes(video)
+
+            video_id = attrs.get('data-brightcove-video-id')
+            account_id = attrs.get('data-account')
+            player_id = attrs.get('data-player')
+            embed = attrs.get('data-embed')
+
+            if video_id and account_id and player_id and embed:
+                entries.append(
+                    'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
+                    % (account_id, player_id, embed, video_id))
+
+        if len(entries) == 0:
+            return self.url_result(url, 'Generic')
+        elif len(entries) == 1:
+            return self.url_result(entries[0], 'BrightcoveNew')
+        else:
+            return self.playlist_from_matches(entries, page_id, page_title, ie='BrightcoveNew')

+ 9 - 4
youtube_dl/extractor/bpb.py

@@ -33,13 +33,18 @@ class BpbIE(InfoExtractor):
         title = self._html_search_regex(
             r'<h2 class="white">(.*?)</h2>', webpage, 'title')
         video_info_dicts = re.findall(
-            r"({\s*src:\s*'http://film\.bpb\.de/[^}]+})", webpage)
+            r"({\s*src\s*:\s*'https?://film\.bpb\.de/[^}]+})", webpage)
 
         formats = []
         for video_info in video_info_dicts:
-            video_info = self._parse_json(video_info, video_id, transform_source=js_to_json)
-            quality = video_info['quality']
-            video_url = video_info['src']
+            video_info = self._parse_json(
+                video_info, video_id, transform_source=js_to_json, fatal=False)
+            if not video_info:
+                continue
+            video_url = video_info.get('src')
+            if not video_url:
+                continue
+            quality = 'high' if '_high' in video_url else 'low'
             formats.append({
                 'url': video_url,
                 'preference': 10 if quality == 'high' else 0,

+ 144 - 4
youtube_dl/extractor/br.py

@@ -1,20 +1,23 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import json
 import re
 
 from .common import InfoExtractor
 from ..utils import (
+    determine_ext,
     ExtractorError,
     int_or_none,
     parse_duration,
+    parse_iso8601,
     xpath_element,
     xpath_text,
 )
 
 
 class BRIE(InfoExtractor):
-    IE_DESC = 'Bayerischer Rundfunk Mediathek'
+    IE_DESC = 'Bayerischer Rundfunk'
     _VALID_URL = r'(?P<base_url>https?://(?:www\.)?br(?:-klassik)?\.de)/(?:[a-z0-9\-_]+/)+(?P<id>[a-z0-9\-_]+)\.html'
 
     _TESTS = [
@@ -77,7 +80,7 @@ class BRIE(InfoExtractor):
                 'description': 'md5:bb659990e9e59905c3d41e369db1fbe3',
                 'duration': 893,
                 'uploader': 'Eva Maria Steimle',
-                'upload_date': '20140117',
+                'upload_date': '20170208',
             }
         },
     ]
@@ -123,10 +126,10 @@ class BRIE(InfoExtractor):
         for asset in assets.findall('asset'):
             format_url = xpath_text(asset, ['downloadUrl', 'url'])
             asset_type = asset.get('type')
-            if asset_type == 'HDS':
+            if asset_type.startswith('HDS'):
                 formats.extend(self._extract_f4m_formats(
                     format_url + '?hdcore=3.2.0', media_id, f4m_id='hds', fatal=False))
-            elif asset_type == 'HLS':
+            elif asset_type.startswith('HLS'):
                 formats.extend(self._extract_m3u8_formats(
                     format_url, media_id, 'mp4', 'm3u8_native', m3u8_id='hds', fatal=False))
             else:
@@ -169,3 +172,140 @@ class BRIE(InfoExtractor):
         } for variant in variants.findall('variant') if xpath_text(variant, 'url')]
         thumbnails.sort(key=lambda x: x['width'] * x['height'], reverse=True)
         return thumbnails
+
+
+class BRMediathekIE(InfoExtractor):
+    IE_DESC = 'Bayerischer Rundfunk Mediathek'
+    _VALID_URL = r'https?://(?:www\.)?br\.de/mediathek/video/[^/?&#]*?-(?P<id>av:[0-9a-f]{24})'
+
+    _TESTS = [{
+        'url': 'https://www.br.de/mediathek/video/gesundheit-die-sendung-vom-28112017-av:5a1e6a6e8fce6d001871cc8e',
+        'md5': 'fdc3d485835966d1622587d08ba632ec',
+        'info_dict': {
+            'id': 'av:5a1e6a6e8fce6d001871cc8e',
+            'ext': 'mp4',
+            'title': 'Die Sendung vom 28.11.2017',
+            'description': 'md5:6000cdca5912ab2277e5b7339f201ccc',
+            'timestamp': 1511942766,
+            'upload_date': '20171129',
+        }
+    }]
+
+    def _real_extract(self, url):
+        clip_id = self._match_id(url)
+
+        clip = self._download_json(
+            'https://proxy-base.master.mango.express/graphql',
+            clip_id, data=json.dumps({
+                "query": """{
+  viewer {
+    clip(id: "%s") {
+      title
+      description
+      duration
+      createdAt
+      ageRestriction
+      videoFiles {
+        edges {
+          node {
+            publicLocation
+            fileSize
+            videoProfile {
+              width
+              height
+              bitrate
+              encoding
+            }
+          }
+        }
+      }
+      captionFiles {
+        edges {
+          node {
+            publicLocation
+          }
+        }
+      }
+      teaserImages {
+        edges {
+          node {
+            imageFiles {
+              edges {
+                node {
+                  publicLocation
+                  width
+                  height
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}""" % clip_id}).encode(), headers={
+                'Content-Type': 'application/json',
+            })['data']['viewer']['clip']
+        title = clip['title']
+
+        formats = []
+        for edge in clip.get('videoFiles', {}).get('edges', []):
+            node = edge.get('node', {})
+            n_url = node.get('publicLocation')
+            if not n_url:
+                continue
+            ext = determine_ext(n_url)
+            if ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    n_url, clip_id, 'mp4', 'm3u8_native',
+                    m3u8_id='hls', fatal=False))
+            else:
+                video_profile = node.get('videoProfile', {})
+                tbr = int_or_none(video_profile.get('bitrate'))
+                format_id = 'http'
+                if tbr:
+                    format_id += '-%d' % tbr
+                formats.append({
+                    'format_id': format_id,
+                    'url': n_url,
+                    'width': int_or_none(video_profile.get('width')),
+                    'height': int_or_none(video_profile.get('height')),
+                    'tbr': tbr,
+                    'filesize': int_or_none(node.get('fileSize')),
+                })
+        self._sort_formats(formats)
+
+        subtitles = {}
+        for edge in clip.get('captionFiles', {}).get('edges', []):
+            node = edge.get('node', {})
+            n_url = node.get('publicLocation')
+            if not n_url:
+                continue
+            subtitles.setdefault('de', []).append({
+                'url': n_url,
+            })
+
+        thumbnails = []
+        for edge in clip.get('teaserImages', {}).get('edges', []):
+            for image_edge in edge.get('node', {}).get('imageFiles', {}).get('edges', []):
+                node = image_edge.get('node', {})
+                n_url = node.get('publicLocation')
+                if not n_url:
+                    continue
+                thumbnails.append({
+                    'url': n_url,
+                    'width': int_or_none(node.get('width')),
+                    'height': int_or_none(node.get('height')),
+                })
+
+        return {
+            'id': clip_id,
+            'title': title,
+            'description': clip.get('description'),
+            'duration': int_or_none(clip.get('duration')),
+            'timestamp': parse_iso8601(clip.get('createdAt')),
+            'age_limit': int_or_none(clip.get('ageRestriction')),
+            'formats': formats,
+            'subtitles': subtitles,
+            'thumbnails': thumbnails,
+        }

+ 138 - 62
youtube_dl/extractor/brightcove.py

@@ -5,6 +5,7 @@ import re
 import json
 
 from .common import InfoExtractor
+from .adobepass import AdobePassIE
 from ..compat import (
     compat_etree_fromstring,
     compat_parse_qs,
@@ -17,6 +18,7 @@ from ..compat import (
 from ..utils import (
     determine_ext,
     ExtractorError,
+    extract_attributes,
     find_xpath_attr,
     fix_xml_ampersands,
     float_or_none,
@@ -109,6 +111,7 @@ class BrightcoveLegacyIE(InfoExtractor):
                 'upload_date': '20140827',
                 'uploader_id': '710858724001',
             },
+            'skip': 'Video gone',
         },
         {
             # playlist with 'videoList'
@@ -129,6 +132,12 @@ class BrightcoveLegacyIE(InfoExtractor):
             },
             'playlist_mincount': 10,
         },
+        {
+            # playerID inferred from bcpid
+            # from http://www.un.org/chinese/News/story.asp?NewsID=27724
+            'url': 'https://link.brightcove.com/services/player/bcpid1722935254001/?bctid=5360463607001&autoStart=false&secureConnections=true&width=650&height=350',
+            'only_matching': True,  # Tested in GenericIE
+        }
     ]
     FLV_VCODECS = {
         1: 'SORENSON',
@@ -179,7 +188,7 @@ class BrightcoveLegacyIE(InfoExtractor):
 
         params = {}
 
-        playerID = find_param('playerID')
+        playerID = find_param('playerID') or find_param('playerId')
         if playerID is None:
             raise ExtractorError('Cannot find player ID')
         params['playerID'] = playerID
@@ -191,6 +200,16 @@ class BrightcoveLegacyIE(InfoExtractor):
         # These fields hold the id of the video
         videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList')
         if videoPlayer is not None:
+            if isinstance(videoPlayer, list):
+                videoPlayer = videoPlayer[0]
+            videoPlayer = videoPlayer.strip()
+            # UUID is also possible for videoPlayer (e.g.
+            # http://www.popcornflix.com/hoodies-vs-hooligans/7f2d2b87-bbf2-4623-acfb-ea942b4f01dd
+            # or http://www8.hp.com/cn/zh/home.html)
+            if not (re.match(
+                    r'^(?:\d+|[\da-fA-F]{8}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{12})$',
+                    videoPlayer) or videoPlayer.startswith('ref:')):
+                return None
             params['@videoPlayer'] = videoPlayer
         linkBase = find_param('linkBaseURL')
         if linkBase is not None:
@@ -204,7 +223,7 @@ class BrightcoveLegacyIE(InfoExtractor):
         #   // build Brightcove <object /> XML
         # }
         m = re.search(
-            r'''(?x)customBC.\createVideo\(
+            r'''(?x)customBC\.createVideo\(
                 .*?                                                  # skipping width and height
                 ["\'](?P<playerID>\d+)["\']\s*,\s*                   # playerID
                 ["\'](?P<playerKey>AQ[^"\']{48})[^"\']*["\']\s*,\s*  # playerKey begins with AQ and is 50 characters
@@ -254,9 +273,13 @@ class BrightcoveLegacyIE(InfoExtractor):
         if matches:
             return list(filter(None, [cls._build_brighcove_url(m) for m in matches]))
 
-        return list(filter(None, [
-            cls._build_brighcove_url_from_js(custom_bc)
-            for custom_bc in re.findall(r'(customBC\.createVideo\(.+?\);)', webpage)]))
+        matches = re.findall(r'(customBC\.createVideo\(.+?\);)', webpage)
+        if matches:
+            return list(filter(None, [
+                cls._build_brighcove_url_from_js(custom_bc)
+                for custom_bc in matches]))
+        return [src for _, src in re.findall(
+            r'<iframe[^>]+src=([\'"])((?:https?:)?//link\.brightcove\.com/services/player/(?!\1).+)\1', webpage)]
 
     def _real_extract(self, url):
         url, smuggled_data = unsmuggle_url(url, {})
@@ -273,6 +296,10 @@ class BrightcoveLegacyIE(InfoExtractor):
         if videoPlayer:
             # We set the original url as the default 'Referer' header
             referer = smuggled_data.get('Referer', url)
+            if 'playerID' not in query:
+                mobj = re.search(r'/bcpid(\d+)', url)
+                if mobj is not None:
+                    query['playerID'] = [mobj.group(1)]
             return self._get_video_info(
                 videoPlayer[0], query, referer=referer)
         elif 'playerKey' in query:
@@ -422,7 +449,7 @@ class BrightcoveLegacyIE(InfoExtractor):
         return info
 
 
-class BrightcoveNewIE(InfoExtractor):
+class BrightcoveNewIE(AdobePassIE):
     IE_NAME = 'brightcove:new'
     _VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*videoId=(?P<video_id>\d+|ref:[^&]+)'
     _TESTS = [{
@@ -437,7 +464,7 @@ class BrightcoveNewIE(InfoExtractor):
             'timestamp': 1441391203,
             'upload_date': '20150904',
             'uploader_id': '929656772001',
-            'formats': 'mincount:22',
+            'formats': 'mincount:20',
         },
     }, {
         # with rtmp streams
@@ -451,7 +478,7 @@ class BrightcoveNewIE(InfoExtractor):
             'timestamp': 1433556729,
             'upload_date': '20150606',
             'uploader_id': '4036320279001',
-            'formats': 'mincount:41',
+            'formats': 'mincount:39',
         },
         'params': {
             # m3u8 download
@@ -472,17 +499,18 @@ class BrightcoveNewIE(InfoExtractor):
     }]
 
     @staticmethod
-    def _extract_url(webpage):
-        urls = BrightcoveNewIE._extract_urls(webpage)
+    def _extract_url(ie, webpage):
+        urls = BrightcoveNewIE._extract_urls(ie, webpage)
         return urls[0] if urls else None
 
     @staticmethod
-    def _extract_urls(webpage):
+    def _extract_urls(ie, webpage):
         # Reference:
         # 1. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideoiniframe
-        # 2. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideousingjavascript
-        # 3. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/embed-in-page.html
-        # 4. https://support.brightcove.com/en/video-cloud/docs/dynamically-assigning-videos-player
+        # 2. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#tag
+        # 3. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideousingjavascript
+        # 4. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/in-page-embed-player-implementation.html
+        # 5. https://support.brightcove.com/en/video-cloud/docs/dynamically-assigning-videos-player
 
         entries = []
 
@@ -491,59 +519,52 @@ class BrightcoveNewIE(InfoExtractor):
                 r'<iframe[^>]+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
             entries.append(url if url.startswith('http') else 'http:' + url)
 
-        # Look for embed_in_page embeds [2]
-        for video_id, account_id, player_id, embed in re.findall(
-                # According to examples from [3] it's unclear whether video id
-                # may be optional and what to do when it is
-                # According to [4] data-video-id may be prefixed with ref:
-                r'''(?sx)
-                    <video[^>]+
-                        data-video-id=["\'](\d+|ref:[^"\']+)["\'][^>]*>.*?
-                    </video>.*?
-                    <script[^>]+
-                        src=["\'](?:https?:)?//players\.brightcove\.net/
-                        (\d+)/([^/]+)_([^/]+)/index(?:\.min)?\.js
+        # Look for <video> tags [2] and embed_in_page embeds [3]
+        # [2] looks like:
+        for video, script_tag, account_id, player_id, embed in re.findall(
+                r'''(?isx)
+                    (<video\s+[^>]*\bdata-video-id\s*=\s*['"]?[^>]+>)
+                    (?:.*?
+                        (<script[^>]+
+                            src=["\'](?:https?:)?//players\.brightcove\.net/
+                            (\d+)/([^/]+)_([^/]+)/index(?:\.min)?\.js
+                        )
+                    )?
                 ''', webpage):
-            entries.append(
-                'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
-                % (account_id, player_id, embed, video_id))
-
-        return entries
+            attrs = extract_attributes(video)
 
-    def _real_extract(self, url):
-        account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups()
-
-        webpage = self._download_webpage(
-            'http://players.brightcove.net/%s/%s_%s/index.min.js'
-            % (account_id, player_id, embed), video_id)
+            # According to examples from [4] it's unclear whether video id
+            # may be optional and what to do when it is
+            video_id = attrs.get('data-video-id')
+            if not video_id:
+                continue
 
-        policy_key = None
+            account_id = account_id or attrs.get('data-account')
+            if not account_id:
+                continue
 
-        catalog = self._search_regex(
-            r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
-        if catalog:
-            catalog = self._parse_json(
-                js_to_json(catalog), video_id, fatal=False)
-            if catalog:
-                policy_key = catalog.get('policyKey')
+            player_id = player_id or attrs.get('data-player') or 'default'
+            embed = embed or attrs.get('data-embed') or 'default'
+
+            bc_url = 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' % (
+                account_id, player_id, embed, video_id)
+
+            # Some brightcove videos may be embedded with video tag only and
+            # without script tag or any mentioning of brightcove at all. Such
+            # embeds are considered ambiguous since they are matched based only
+            # on data-video-id and data-account attributes and in the wild may
+            # not be brightcove embeds at all. Let's check reconstructed
+            # brightcove URLs in case of such embeds and only process valid
+            # ones. By this we ensure there is indeed a brightcove embed.
+            if not script_tag and not ie._is_valid_url(
+                    bc_url, video_id, 'possible brightcove video'):
+                continue
 
-        if not policy_key:
-            policy_key = self._search_regex(
-                r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
-                webpage, 'policy key', group='pk')
+            entries.append(bc_url)
 
-        api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id)
-        try:
-            json_data = self._download_json(api_url, video_id, headers={
-                'Accept': 'application/json;pk=%s' % policy_key
-            })
-        except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
-                json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
-                raise ExtractorError(
-                    json_data.get('message') or json_data['error_code'], expected=True)
-            raise
+        return entries
 
+    def _parse_brightcove_metadata(self, json_data, video_id):
         title = json_data['name'].strip()
 
         formats = []
@@ -626,7 +647,7 @@ class BrightcoveNewIE(InfoExtractor):
 
         is_live = False
         duration = float_or_none(json_data.get('duration'), 1000)
-        if duration and duration < 0:
+        if duration is not None and duration <= 0:
             is_live = True
 
         return {
@@ -636,9 +657,64 @@ class BrightcoveNewIE(InfoExtractor):
             'thumbnail': json_data.get('thumbnail') or json_data.get('poster'),
             'duration': duration,
             'timestamp': parse_iso8601(json_data.get('published_at')),
-            'uploader_id': account_id,
+            'uploader_id': json_data.get('account_id'),
             'formats': formats,
             'subtitles': subtitles,
             'tags': json_data.get('tags', []),
             'is_live': is_live,
         }
+
+    def _real_extract(self, url):
+        url, smuggled_data = unsmuggle_url(url, {})
+        self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
+
+        account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups()
+
+        webpage = self._download_webpage(
+            'http://players.brightcove.net/%s/%s_%s/index.min.js'
+            % (account_id, player_id, embed), video_id)
+
+        policy_key = None
+
+        catalog = self._search_regex(
+            r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
+        if catalog:
+            catalog = self._parse_json(
+                js_to_json(catalog), video_id, fatal=False)
+            if catalog:
+                policy_key = catalog.get('policyKey')
+
+        if not policy_key:
+            policy_key = self._search_regex(
+                r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
+                webpage, 'policy key', group='pk')
+
+        api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id)
+        try:
+            json_data = self._download_json(api_url, video_id, headers={
+                'Accept': 'application/json;pk=%s' % policy_key
+            })
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+                json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
+                message = json_data.get('message') or json_data['error_code']
+                if json_data.get('error_subcode') == 'CLIENT_GEO':
+                    self.raise_geo_restricted(msg=message)
+                raise ExtractorError(message, expected=True)
+            raise
+
+        errors = json_data.get('errors')
+        if errors and errors[0].get('error_subcode') == 'TVE_AUTH':
+            custom_fields = json_data['custom_fields']
+            tve_token = self._extract_mvpd_auth(
+                smuggled_data['source_url'], video_id,
+                custom_fields['bcadobepassrequestorid'],
+                custom_fields['bcadobepassresourceid'])
+            json_data = self._download_json(
+                api_url, video_id, headers={
+                    'Accept': 'application/json;pk=%s' % policy_key
+                }, query={
+                    'tveToken': tve_token,
+                })
+
+        return self._parse_brightcove_metadata(json_data, video_id)

+ 4 - 3
youtube_dl/extractor/buzzfeed.py

@@ -84,9 +84,10 @@ class BuzzFeedIE(InfoExtractor):
                 continue
             entries.append(self.url_result(video['url']))
 
-        facebook_url = FacebookIE._extract_url(webpage)
-        if facebook_url:
-            entries.append(self.url_result(facebook_url))
+        facebook_urls = FacebookIE._extract_urls(webpage)
+        entries.extend([
+            self.url_result(facebook_url)
+            for facebook_url in facebook_urls])
 
         return {
             '_type': 'playlist',

+ 18 - 53
youtube_dl/extractor/byutv.py

@@ -3,20 +3,19 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import ExtractorError
 
 
 class BYUtvIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?byutv\.org/watch/(?!event/)(?P<id>[0-9a-f-]+)(?:/(?P<display_id>[^/?#&]+))?'
+    _VALID_URL = r'https?://(?:www\.)?byutv\.org/(?:watch|player)/(?!event/)(?P<id>[0-9a-f-]+)(?:/(?P<display_id>[^/?#&]+))?'
     _TESTS = [{
         'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
         'info_dict': {
-            'id': '6587b9a3-89d2-42a6-a7f7-fd2f81840a7d',
+            'id': 'ZvanRocTpW-G5_yZFeltTAMv6jxOU9KH',
             'display_id': 'studio-c-season-5-episode-5',
             'ext': 'mp4',
             'title': 'Season 5 Episode 5',
-            'description': 'md5:e07269172baff037f8e8bf9956bc9747',
-            'thumbnail': r're:^https?://.*\.jpg$',
+            'description': 'md5:1d31dc18ef4f075b28f6a65937d22c65',
+            'thumbnail': r're:^https?://.*',
             'duration': 1486.486,
         },
         'params': {
@@ -26,6 +25,9 @@ class BYUtvIE(InfoExtractor):
     }, {
         'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d',
         'only_matching': True,
+    }, {
+        'url': 'https://www.byutv.org/player/27741493-dc83-40b0-8420-e7ae38a2ae98/byu-football-toledo-vs-byu-93016?listid=4fe0fee5-0d3c-4a29-b725-e4948627f472&listindex=0&q=toledo',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -33,16 +35,16 @@ class BYUtvIE(InfoExtractor):
         video_id = mobj.group('id')
         display_id = mobj.group('display_id') or video_id
 
-        webpage = self._download_webpage(url, display_id)
-        episode_code = self._search_regex(
-            r'(?s)episode:(.*?\}),\s*\n', webpage, 'episode information')
-
-        ep = self._parse_json(
-            episode_code, display_id, transform_source=lambda s:
-            re.sub(r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', s))
-
-        if ep['providerType'] != 'Ooyala':
-            raise ExtractorError('Unsupported provider %s' % ep['provider'])
+        ep = self._download_json(
+            'https://api.byutv.org/api3/catalog/getvideosforcontent', video_id,
+            query={
+                'contentid': video_id,
+                'channel': 'byutv',
+                'x-byutv-context': 'web$US',
+            }, headers={
+                'x-byutv-context': 'web$US',
+                'x-byutv-platformkey': 'xsaaw9c7y5',
+            })['ooyalaVOD']
 
         return {
             '_type': 'url_transparent',
@@ -50,44 +52,7 @@ class BYUtvIE(InfoExtractor):
             'url': 'ooyala:%s' % ep['providerId'],
             'id': video_id,
             'display_id': display_id,
-            'title': ep['title'],
+            'title': ep.get('title'),
             'description': ep.get('description'),
             'thumbnail': ep.get('imageThumbnail'),
         }
-
-
-class BYUtvEventIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?byutv\.org/watch/event/(?P<id>[0-9a-f-]+)'
-    _TEST = {
-        'url': 'http://www.byutv.org/watch/event/29941b9b-8bf6-48d2-aebf-7a87add9e34b',
-        'info_dict': {
-            'id': '29941b9b-8bf6-48d2-aebf-7a87add9e34b',
-            'ext': 'mp4',
-            'title': 'Toledo vs. BYU (9/30/16)',
-        },
-        'params': {
-            'skip_download': True,
-        },
-        'add_ie': ['Ooyala'],
-    }
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, video_id)
-
-        ooyala_id = self._search_regex(
-            r'providerId\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
-            webpage, 'ooyala id', group='id')
-
-        title = self._search_regex(
-            r'class=["\']description["\'][^>]*>\s*<h1>([^<]+)</h1>', webpage,
-            'title').strip()
-
-        return {
-            '_type': 'url_transparent',
-            'ie_key': 'Ooyala',
-            'url': 'ooyala:%s' % ooyala_id,
-            'id': video_id,
-            'title': title,
-        }

+ 1 - 4
youtube_dl/extractor/canalc2.py

@@ -16,13 +16,10 @@ class Canalc2IE(InfoExtractor):
         'md5': '060158428b650f896c542dfbb3d6487f',
         'info_dict': {
             'id': '12163',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': 'Terrasses du Numérique',
             'duration': 122,
         },
-        'params': {
-            'skip_download': True,  # Requires rtmpdump
-        }
     }, {
         'url': 'http://archives-canalc2.u-strasbg.fr/video.asp?idVideo=11427&voir=oui',
         'only_matching': True,

+ 40 - 20
youtube_dl/extractor/canalplus.py

@@ -7,8 +7,8 @@ from .common import InfoExtractor
 from ..compat import compat_urllib_parse_urlparse
 from ..utils import (
     dict_get,
-    ExtractorError,
-    HEADRequest,
+    # ExtractorError,
+    # HEADRequest,
     int_or_none,
     qualities,
     remove_end,
@@ -27,6 +27,7 @@ class CanalplusIE(InfoExtractor):
                                     (?:www\.)?d8\.tv|
                                     (?:www\.)?c8\.fr|
                                     (?:www\.)?d17\.tv|
+                                    (?:(?:football|www)\.)?cstar\.fr|
                                     (?:www\.)?itele\.fr
                                 )/(?:(?:[^/]+/)*(?P<display_id>[^/?#&]+))?(?:\?.*\bvid=(?P<vid>\d+))?|
                                 player\.canalplus\.fr/#/(?P<id>\d+)
@@ -40,9 +41,13 @@ class CanalplusIE(InfoExtractor):
         'd8': 'd8',
         'c8': 'd8',
         'd17': 'd17',
+        'cstar': 'd17',
         'itele': 'itele',
     }
 
+    # Only works for direct mp4 URLs
+    _GEO_COUNTRIES = ['FR']
+
     _TESTS = [{
         'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1192814',
         'info_dict': {
@@ -54,6 +59,7 @@ class CanalplusIE(InfoExtractor):
             'upload_date': '20160702',
         },
     }, {
+        # geo restricted, bypassed
         'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190',
         'info_dict': {
             'id': '1108190',
@@ -63,19 +69,20 @@ class CanalplusIE(InfoExtractor):
             'description': 'md5:4cea7a37153be42c1ba2c1d3064376ff',
             'upload_date': '20140724',
         },
-        'skip': 'Only works from France',
+        'expected_warnings': ['HTTP Error 403: Forbidden'],
     }, {
-        'url': 'http://www.c8.fr/c8-divertissement/ms-touche-pas-a-mon-poste/pid6318-videos-integrales.html',
-        'md5': '4b47b12b4ee43002626b97fad8fb1de5',
+        # geo restricted, bypassed
+        'url': 'http://www.c8.fr/c8-divertissement/ms-touche-pas-a-mon-poste/pid6318-videos-integrales.html?vid=1443684',
+        'md5': 'bb6f9f343296ab7ebd88c97b660ecf8d',
         'info_dict': {
-            'id': '1420213',
+            'id': '1443684',
             'display_id': 'pid6318-videos-integrales',
             'ext': 'mp4',
-            'title': 'TPMP ! Même le matin - Les 35H de Baba - 14/10/2016',
-            'description': 'md5:f96736c1b0ffaa96fd5b9e60ad871799',
-            'upload_date': '20161014',
+            'title': 'Guess my iep ! - TPMP - 07/04/2017',
+            'description': 'md5:6f005933f6e06760a9236d9b3b5f17fa',
+            'upload_date': '20170407',
         },
-        'skip': 'Only works from France',
+        'expected_warnings': ['HTTP Error 403: Forbidden'],
     }, {
         'url': 'http://www.itele.fr/chroniques/invite-michael-darmon/rachida-dati-nicolas-sarkozy-est-le-plus-en-phase-avec-les-inquietudes-des-francais-171510',
         'info_dict': {
@@ -86,6 +93,19 @@ class CanalplusIE(InfoExtractor):
             'description': 'Chaque matin du lundi au vendredi, Michaël Darmon reçoit un invité politique à 8h25.',
             'upload_date': '20161014',
         },
+    }, {
+        'url': 'http://football.cstar.fr/cstar-minisite-foot/pid7566-feminines-videos.html?vid=1416769',
+        'info_dict': {
+            'id': '1416769',
+            'display_id': 'pid7566-feminines-videos',
+            'ext': 'mp4',
+            'title': 'France - Albanie : les temps forts de la soirée - 20/09/2016',
+            'description': 'md5:c3f30f2aaac294c1c969b3294de6904e',
+            'upload_date': '20160921',
+        },
+        'params': {
+            'skip_download': True,
+        },
     }, {
         'url': 'http://m.canalplus.fr/?vid=1398231',
         'only_matching': True,
@@ -107,7 +127,7 @@ class CanalplusIE(InfoExtractor):
             [r'<canal:player[^>]+?videoId=(["\'])(?P<id>\d+)',
              r'id=["\']canal_video_player(?P<id>\d+)',
              r'data-video=["\'](?P<id>\d+)'],
-            webpage, 'video id', group='id')
+            webpage, 'video id', default=mobj.group('vid'), group='id')
 
         info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id)
         video_data = self._download_json(info_url, video_id, 'Downloading video JSON')
@@ -119,15 +139,15 @@ class CanalplusIE(InfoExtractor):
 
         preference = qualities(['MOBILE', 'BAS_DEBIT', 'HAUT_DEBIT', 'HD'])
 
-        fmt_url = next(iter(media.get('VIDEOS')))
-        if '/geo' in fmt_url.lower():
-            response = self._request_webpage(
-                HEADRequest(fmt_url), video_id,
-                'Checking if the video is georestricted')
-            if '/blocage' in response.geturl():
-                raise ExtractorError(
-                    'The video is not available in your country',
-                    expected=True)
+        # _, fmt_url = next(iter(media['VIDEOS'].items()))
+        # if '/geo' in fmt_url.lower():
+        #     response = self._request_webpage(
+        #         HEADRequest(fmt_url), video_id,
+        #         'Checking if the video is georestricted')
+        #     if '/blocage' in response.geturl():
+        #         raise ExtractorError(
+        #             'The video is not available in your country',
+        #             expected=True)
 
         formats = []
         for format_id, format_url in media['VIDEOS'].items():

Vissa filer visades inte eftersom för många filer har ändrats