浏览代码

Merge branch 'master' into openload-phantomjs-method

Tithen-Firion 8 年之前
父节点
当前提交
c89267d31a
共有 99 个文件被更改,包括 2121 次插入689 次删除
  1. 3 3
      .github/ISSUE_TEMPLATE.md
  2. 1 1
      .gitignore
  3. 1 0
      AUTHORS
  4. 126 0
      ChangeLog
  5. 1 1
      Makefile
  6. 3 0
      README.md
  7. 5 2
      docs/supportedsites.md
  8. 314 1
      test/test_InfoExtractor.py
  9. 3 1
      test/test_YoutubeDL.py
  10. 1 1
      test/test_download.py
  11. 54 0
      test/test_utils.py
  12. 14 0
      test/testdata/m3u8/pluzz_francetv_11507.m3u8
  13. 16 0
      test/testdata/m3u8/teamcoco_11995.m3u8
  14. 13 0
      test/testdata/m3u8/toggle_mobile_12211.m3u8
  15. 20 0
      test/testdata/m3u8/twitch_vod.m3u8
  16. 10 0
      test/testdata/m3u8/vidio.m3u8
  17. 10 4
      youtube_dl/YoutubeDL.py
  18. 1 0
      youtube_dl/__init__.py
  19. 19 15
      youtube_dl/downloader/common.py
  20. 11 32
      youtube_dl/downloader/dash.py
  21. 11 1
      youtube_dl/downloader/external.py
  22. 10 23
      youtube_dl/downloader/f4m.py
  23. 109 13
      youtube_dl/downloader/fragment.py
  24. 13 21
      youtube_dl/downloader/hls.py
  25. 9 25
      youtube_dl/downloader/ism.py
  26. 6 0
      youtube_dl/extractor/adobepass.py
  27. 9 5
      youtube_dl/extractor/aenetworks.py
  28. 1 2
      youtube_dl/extractor/afreecatv.py
  29. 19 7
      youtube_dl/extractor/amp.py
  30. 57 9
      youtube_dl/extractor/anvato.py
  31. 2 2
      youtube_dl/extractor/appleconnect.py
  32. 3 2
      youtube_dl/extractor/appletrailers.py
  33. 2 2
      youtube_dl/extractor/archiveorg.py
  34. 4 1
      youtube_dl/extractor/arte.py
  35. 1 1
      youtube_dl/extractor/atresplayer.py
  36. 1 1
      youtube_dl/extractor/audioboom.py
  37. 0 140
      youtube_dl/extractor/azubu.py
  38. 4 4
      youtube_dl/extractor/bandcamp.py
  39. 1 1
      youtube_dl/extractor/beeg.py
  40. 3 7
      youtube_dl/extractor/bleacherreport.py
  41. 1 1
      youtube_dl/extractor/br.py
  42. 20 6
      youtube_dl/extractor/brightcove.py
  43. 1 4
      youtube_dl/extractor/canalc2.py
  44. 3 3
      youtube_dl/extractor/cbc.py
  45. 2 2
      youtube_dl/extractor/cbslocal.py
  46. 49 3
      youtube_dl/extractor/cda.py
  47. 1 1
      youtube_dl/extractor/clipfish.py
  48. 2 1
      youtube_dl/extractor/collegerama.py
  49. 138 75
      youtube_dl/extractor/common.py
  50. 1 4
      youtube_dl/extractor/coub.py
  51. 2 2
      youtube_dl/extractor/crunchyroll.py
  52. 20 1
      youtube_dl/extractor/dailymotion.py
  53. 2 1
      youtube_dl/extractor/democracynow.py
  54. 1 1
      youtube_dl/extractor/dotsub.py
  55. 2 2
      youtube_dl/extractor/douyutv.py
  56. 8 2
      youtube_dl/extractor/extractors.py
  57. 5 4
      youtube_dl/extractor/foxsports.py
  58. 1 2
      youtube_dl/extractor/funnyordie.py
  59. 1 2
      youtube_dl/extractor/gamespot.py
  60. 85 1
      youtube_dl/extractor/generic.py
  61. 35 14
      youtube_dl/extractor/go.py
  62. 36 2
      youtube_dl/extractor/go90.py
  63. 2 2
      youtube_dl/extractor/infoq.py
  64. 6 2
      youtube_dl/extractor/instagram.py
  65. 17 9
      youtube_dl/extractor/iqiyi.py
  66. 22 6
      youtube_dl/extractor/itv.py
  67. 37 74
      youtube_dl/extractor/leeco.py
  68. 1 1
      youtube_dl/extractor/lego.py
  69. 37 0
      youtube_dl/extractor/limelight.py
  70. 97 0
      youtube_dl/extractor/noovo.py
  71. 1 1
      youtube_dl/extractor/nowness.py
  72. 26 6
      youtube_dl/extractor/odnoklassniki.py
  73. 18 1
      youtube_dl/extractor/pbs.py
  74. 6 26
      youtube_dl/extractor/porn91.py
  75. 1 2
      youtube_dl/extractor/r7.py
  76. 5 1
      youtube_dl/extractor/streamable.py
  77. 64 0
      youtube_dl/extractor/streamango.py
  78. 1 1
      youtube_dl/extractor/ted.py
  79. 1 2
      youtube_dl/extractor/tvp.py
  80. 23 12
      youtube_dl/extractor/tvplayer.py
  81. 11 6
      youtube_dl/extractor/vevo.py
  82. 3 6
      youtube_dl/extractor/videopress.py
  83. 5 2
      youtube_dl/extractor/vidio.py
  84. 6 5
      youtube_dl/extractor/vidzi.py
  85. 1 2
      youtube_dl/extractor/viewster.py
  86. 6 0
      youtube_dl/extractor/washingtonpost.py
  87. 40 12
      youtube_dl/extractor/wsj.py
  88. 14 14
      youtube_dl/extractor/xfileshare.py
  89. 21 2
      youtube_dl/extractor/xtube.py
  90. 10 1
      youtube_dl/extractor/xvideos.py
  91. 1 1
      youtube_dl/extractor/yahoo.py
  92. 2 1
      youtube_dl/extractor/yandexmusic.py
  93. 3 2
      youtube_dl/extractor/youtube.py
  94. 101 0
      youtube_dl/extractor/zaq1.py
  95. 4 0
      youtube_dl/options.py
  96. 28 3
      youtube_dl/postprocessor/ffmpeg.py
  97. 3 2
      youtube_dl/socks.py
  98. 189 21
      youtube_dl/utils.py
  99. 1 1
      youtube_dl/version.py

+ 3 - 3
.github/ISSUE_TEMPLATE.md

@@ -6,8 +6,8 @@
 
 
 ---
 ---
 
 
-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.04.15*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
-- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.04.15**
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.05.01*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.05.01**
 
 
 ### Before submitting an *issue* make sure you have:
 ### Before submitting an *issue* make sure you have:
 - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
 - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
 [debug] User config: []
 [debug] User config: []
 [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
 [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] youtube-dl version 2017.04.15
+[debug] youtube-dl version 2017.05.01
 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
 [debug] Proxy map: {}
 [debug] Proxy map: {}

+ 1 - 1
.gitignore

@@ -35,8 +35,8 @@ updates_key.pem
 *.mkv
 *.mkv
 *.swf
 *.swf
 *.part
 *.part
+*.ytdl
 *.swp
 *.swp
-test/testdata
 test/local_parameters.json
 test/local_parameters.json
 .tox
 .tox
 youtube-dl.zsh
 youtube-dl.zsh

+ 1 - 0
AUTHORS

@@ -211,3 +211,4 @@ Juanjo Benages
 Xiao Di Guan
 Xiao Di Guan
 Thomas Winant
 Thomas Winant
 Daniel Twardowski
 Daniel Twardowski
+Jeremie Jarosh

+ 126 - 0
ChangeLog

@@ -1,3 +1,129 @@
+version <unreleased>
+
+Extractors
++ [cda] Support birthday verification (#12789)
+* [leeco] Fix extraction (#12974)
+
+
+version 2017.05.01
+
+Core
++ [extractor/common] Extract view count from JSON-LD
+* [utils] Improve unified_timestamp
++ [utils] Add video/mp2t to mimetype2ext
+* [downloader/external] Properly handle live stream downloading cancellation
+  (#8932)
++ [utils] Add support for unicode whitespace in clean_html on python 2 (#12906)
+
+Extractors
+* [infoq] Make audio format extraction non fatal (#12938)
+* [brightcove] Allow whitespace around attribute names in embedded code
++ [zaq1] Add support for zaq1.pl (#12693)
++ [xvideos] Extract duration (#12828)
+* [vevo] Fix extraction (#12879)
++ [noovo] Add support for noovo.ca (#12792)
++ [washingtonpost] Add support for embeds (#12699)
+* [yandexmusic:playlist] Fix extraction for python 3 (#12888)
+* [anvato] Improve extraction (#12913)
+    * Promote to regular shortcut based extractor
+    * Add mcp to access key mapping table
+    * Add support for embeds extraction
+    * Add support for anvato embeds in generic extractor
+* [xtube] Fix extraction for older FLV videos (#12734)
+* [tvplayer] Fix extraction (#12908)
+
+
+version 2017.04.28
+
+Core
++ [adobepass] Use geo verification headers for all requests
+- [downloader/fragment] Remove assert for resume_len when no fragments
+  downloaded
++ [extractor/common] Add manifest_url for explicit group rendition formats
+* [extractor/common] Fix manifest_url for m3u8 formats
+- [extractor/common] Don't list master m3u8 playlists in format list (#12832)
+
+Extractor
+* [aenetworks] Fix extraction for shows with single season
++ [go] Add support for Disney, DisneyJunior and DisneyXD show pages
+* [youtube] Recognize new locale-based player URLs (#12885)
++ [streamable] Add support for new embedded URL schema (#12844)
+* [arte:+7] Relax URL regular expression (#12837)
+
+
+version 2017.04.26
+
+Core
+* Introduce --keep-fragments for keeping fragments of fragmented download
+  on disk after download is finished
+* [YoutubeDL] Fix output template for missing timestamp (#12796)
+* [socks] Handle cases where credentials are required but missing
+* [extractor/common] Improve HLS extraction (#12211)
+    * Extract m3u8 parsing to separate method
+    * Improve rendition groups extraction
+    * Build stream name according stream GROUP-ID
+    * Ignore reference to AUDIO group without URI when stream has no CODECS
+    * Use float for scaled tbr in _parse_m3u8_formats
+* [utils] Add support for TTML styles in dfxp2srt
+* [downloader/hls] No need to download keys for fragments that have been
+  already downloaded
+* [downloader/fragment] Improve fragment downloading
+    * Resume immediately
+    * Don't concatenate fragments and decrypt them on every resume
+    * Optimize disk storage usage, don't store intermediate fragments on disk
+    * Store bookkeeping download state file
++ [extractor/common] Add support for multiple getters in try_get
++ [extractor/common] Add support for video of WebPage context in _json_ld
+  (#12778)
++ [extractor/common] Relax JWPlayer regular expression and remove
+  duplicate URLs (#12768)
+
+Extractors
+* [iqiyi] Fix extraction of Yule videos
+* [vidio] Improve extraction and sort formats
++ [brightcove] Match only video elements with data-video-id attribute
+* [iqiyi] Fix playlist detection (#12504)
+- [azubu] Remove extractor (#12813)
+* [porn91] Fix extraction (#12814)
+* [vidzi] Fix extraction (#12793)
++ [amp] Extract error message (#12795)
++ [xfileshare] Add support for gorillavid.com and daclips.com (#12776)
+* [instagram] Fix extraction (#12777)
++ [generic] Support Brightcove videos in <iframe> (#12482)
++ [brightcove] Support URLs with bcpid instead of playerID (#12482)
+* [brightcove] Fix _extract_url (#12782)
++ [odnoklassniki] Extract HLS formats
+
+
+version 2017.04.17
+
+Extractors
+* [limelight] Improve extraction LimelightEmbeddedPlayerFlash media embeds and
+  add support for channel and channelList embeds
+* [generic] Extract multiple Limelight embeds (#12761)
++ [itv] Extract series metadata
+* [itv] Fix RTMP formats downloading (#12759)
+* [itv] Use native HLS downloader by default
++ [go90] Extract subtitles (#12752)
++ [go90] Extract series metadata (#12752)
+
+
+version 2017.04.16
+
+Core
+* [YoutubeDL] Apply expand_path after output template substitution
++ [YoutubeDL] Propagate overridden meta fields to extraction results of type
+  url (#11163)
+
+Extractors
++ [generic] Extract RSS entries as url_transparent (#11163)
++ [streamango] Add support for streamango.com (#12643)
++ [wsj:article] Add support for articles (#12558)
+* [brightcove] Relax video tag embeds extraction and validate ambiguous embeds'
+  URLs (#9163, #12005, #12178, #12480)
++ [udemy] Add support for react rendition (#12744)
+
+
 version 2017.04.15
 version 2017.04.15
 
 
 Extractors
 Extractors

+ 1 - 1
Makefile

@@ -1,7 +1,7 @@
 all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
 all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
 
 
 clean:
 clean:
-	rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.swf *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
+	rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.ytdl *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.swf *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
 	find . -name "*.pyc" -delete
 	find . -name "*.pyc" -delete
 	find . -name "*.class" -delete
 	find . -name "*.class" -delete
 
 

+ 3 - 0
README.md

@@ -187,6 +187,9 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
                                      and ISM)
                                      and ISM)
     --abort-on-unavailable-fragment  Abort downloading when some fragment is not
     --abort-on-unavailable-fragment  Abort downloading when some fragment is not
                                      available
                                      available
+    --keep-fragments                 Keep downloaded fragments on disk after
+                                     downloading is finished; fragments are
+                                     erased by default
     --buffer-size SIZE               Size of download buffer (e.g. 1024 or 16K)
     --buffer-size SIZE               Size of download buffer (e.g. 1024 or 16K)
                                      (default is 1024)
                                      (default is 1024)
     --no-resize-buffer               Do not automatically adjust the buffer
     --no-resize-buffer               Do not automatically adjust the buffer

+ 5 - 2
docs/supportedsites.md

@@ -45,6 +45,7 @@
  - **anderetijden**: npo.nl and ntr.nl
  - **anderetijden**: npo.nl and ntr.nl
  - **AnimeOnDemand**
  - **AnimeOnDemand**
  - **anitube.se**
  - **anitube.se**
+ - **Anvato**
  - **AnySex**
  - **AnySex**
  - **Aparat**
  - **Aparat**
  - **AppleConnect**
  - **AppleConnect**
@@ -81,8 +82,6 @@
  - **AZMedien**: AZ Medien videos
  - **AZMedien**: AZ Medien videos
  - **AZMedienPlaylist**: AZ Medien playlists
  - **AZMedienPlaylist**: AZ Medien playlists
  - **AZMedienShowPlaylist**: AZ Medien show playlists
  - **AZMedienShowPlaylist**: AZ Medien show playlists
- - **Azubu**
- - **AzubuLive**
  - **BaiduVideo**: 百度视频
  - **BaiduVideo**: 百度视频
  - **bambuser**
  - **bambuser**
  - **bambuser:channel**
  - **bambuser:channel**
@@ -531,6 +530,7 @@
  - **NJPWWorld**: 新日本プロレスワールド
  - **NJPWWorld**: 新日本プロレスワールド
  - **NobelPrize**
  - **NobelPrize**
  - **Noco**
  - **Noco**
+ - **Noovo**
  - **Normalboots**
  - **Normalboots**
  - **NosVideo**
  - **NosVideo**
  - **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz
  - **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz
@@ -745,6 +745,7 @@
  - **Steam**
  - **Steam**
  - **Stitcher**
  - **Stitcher**
  - **Streamable**
  - **Streamable**
+ - **Streamango**
  - **streamcloud.eu**
  - **streamcloud.eu**
  - **StreamCZ**
  - **StreamCZ**
  - **StreetVoice**
  - **StreetVoice**
@@ -966,6 +967,7 @@
  - **wrzuta.pl**
  - **wrzuta.pl**
  - **wrzuta.pl:playlist**
  - **wrzuta.pl:playlist**
  - **WSJ**: Wall Street Journal
  - **WSJ**: Wall Street Journal
+ - **WSJArticle**
  - **XBef**
  - **XBef**
  - **XboxClips**
  - **XboxClips**
  - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo
  - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo
@@ -1013,6 +1015,7 @@
  - **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
  - **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
  - **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
  - **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
  - **Zapiks**
  - **Zapiks**
+ - **Zaq1**
  - **ZDF**
  - **ZDF**
  - **ZDFChannel**
  - **ZDFChannel**
  - **zingmp3**: mp3.zing.vn
  - **zingmp3**: mp3.zing.vn

+ 314 - 1
test/test_InfoExtractor.py

@@ -3,12 +3,13 @@
 from __future__ import unicode_literals
 from __future__ import unicode_literals
 
 
 # Allow direct execution
 # Allow direct execution
+import io
 import os
 import os
 import sys
 import sys
 import unittest
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 
-from test.helper import FakeYDL, expect_dict
+from test.helper import FakeYDL, expect_dict, expect_value
 from youtube_dl.extractor.common import InfoExtractor
 from youtube_dl.extractor.common import InfoExtractor
 from youtube_dl.extractor import YoutubeIE, get_info_extractor
 from youtube_dl.extractor import YoutubeIE, get_info_extractor
 from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
 from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
@@ -175,6 +176,318 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                 }]
                 }]
             })
             })
 
 
+    def test_parse_m3u8_formats(self):
+        _TEST_CASES = [
+            (
+                # https://github.com/rg3/youtube-dl/issues/11507
+                # http://pluzz.francetv.fr/videos/le_ministere.html
+                'pluzz_francetv_11507',
+                'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
+                [{
+                    'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_0_av.m3u8?null=0',
+                    'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
+                    'ext': 'mp4',
+                    'format_id': '180',
+                    'protocol': 'm3u8',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc1.66.30',
+                    'tbr': 180,
+                    'width': 256,
+                    'height': 144,
+                }, {
+                    'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_1_av.m3u8?null=0',
+                    'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
+                    'ext': 'mp4',
+                    'format_id': '303',
+                    'protocol': 'm3u8',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc1.66.30',
+                    'tbr': 303,
+                    'width': 320,
+                    'height': 180,
+                }, {
+                    'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_2_av.m3u8?null=0',
+                    'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
+                    'ext': 'mp4',
+                    'format_id': '575',
+                    'protocol': 'm3u8',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc1.66.30',
+                    'tbr': 575,
+                    'width': 512,
+                    'height': 288,
+                }, {
+                    'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_3_av.m3u8?null=0',
+                    'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
+                    'ext': 'mp4',
+                    'format_id': '831',
+                    'protocol': 'm3u8',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc1.77.30',
+                    'tbr': 831,
+                    'width': 704,
+                    'height': 396,
+                }, {
+                    'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_4_av.m3u8?null=0',
+                    'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
+                    'ext': 'mp4',
+                    'protocol': 'm3u8',
+                    'format_id': '1467',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc1.77.30',
+                    'tbr': 1467,
+                    'width': 1024,
+                    'height': 576,
+                }]
+            ),
+            (
+                # https://github.com/rg3/youtube-dl/issues/11995
+                # http://teamcoco.com/video/clueless-gamer-super-bowl-for-honor
+                'teamcoco_11995',
+                'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
+                [{
+                    'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-audio-160k_v4.m3u8',
+                    'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
+                    'ext': 'mp4',
+                    'format_id': 'audio-0-Default',
+                    'protocol': 'm3u8',
+                    'vcodec': 'none',
+                }, {
+                    'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8',
+                    'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
+                    'ext': 'mp4',
+                    'format_id': 'audio-1-Default',
+                    'protocol': 'm3u8',
+                    'vcodec': 'none',
+                }, {
+                    'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8',
+                    'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
+                    'ext': 'mp4',
+                    'format_id': '71',
+                    'protocol': 'm3u8',
+                    'acodec': 'mp4a.40.5',
+                    'vcodec': 'none',
+                    'tbr': 71,
+                }, {
+                    'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-400k_v4.m3u8',
+                    'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
+                    'ext': 'mp4',
+                    'format_id': '413',
+                    'protocol': 'm3u8',
+                    'acodec': 'none',
+                    'vcodec': 'avc1.42001e',
+                    'tbr': 413,
+                    'width': 400,
+                    'height': 224,
+                }, {
+                    'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-400k_v4.m3u8',
+                    'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
+                    'ext': 'mp4',
+                    'format_id': '522',
+                    'protocol': 'm3u8',
+                    'acodec': 'none',
+                    'vcodec': 'avc1.42001e',
+                    'tbr': 522,
+                    'width': 400,
+                    'height': 224,
+                }, {
+                    'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-1m_v4.m3u8',
+                    'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
+                    'ext': 'mp4',
+                    'format_id': '1205',
+                    'protocol': 'm3u8',
+                    'acodec': 'none',
+                    'vcodec': 'avc1.4d001e',
+                    'tbr': 1205,
+                    'width': 640,
+                    'height': 360,
+                }, {
+                    'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-2m_v4.m3u8',
+                    'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
+                    'ext': 'mp4',
+                    'format_id': '2374',
+                    'protocol': 'm3u8',
+                    'acodec': 'none',
+                    'vcodec': 'avc1.4d001f',
+                    'tbr': 2374,
+                    'width': 1024,
+                    'height': 576,
+                }]
+            ),
+            (
+                # https://github.com/rg3/youtube-dl/issues/12211
+                # http://video.toggle.sg/en/series/whoopie-s-world/ep3/478601
+                'toggle_mobile_12211',
+                'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
+                [{
+                    'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_sa2ntrdg/name/a.mp4/index.m3u8',
+                    'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
+                    'ext': 'mp4',
+                    'format_id': 'audio-English',
+                    'protocol': 'm3u8',
+                    'language': 'eng',
+                    'vcodec': 'none',
+                }, {
+                    'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_r7y0nitg/name/a.mp4/index.m3u8',
+                    'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
+                    'ext': 'mp4',
+                    'format_id': 'audio-Undefined',
+                    'protocol': 'm3u8',
+                    'language': 'und',
+                    'vcodec': 'none',
+                }, {
+                    'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_qlk9hlzr/name/a.mp4/index.m3u8',
+                    'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
+                    'ext': 'mp4',
+                    'format_id': '155',
+                    'protocol': 'm3u8',
+                    'tbr': 155.648,
+                    'width': 320,
+                    'height': 180,
+                }, {
+                    'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_oefackmi/name/a.mp4/index.m3u8',
+                    'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
+                    'ext': 'mp4',
+                    'format_id': '502',
+                    'protocol': 'm3u8',
+                    'tbr': 502.784,
+                    'width': 480,
+                    'height': 270,
+                }, {
+                    'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_vyg9pj7k/name/a.mp4/index.m3u8',
+                    'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
+                    'ext': 'mp4',
+                    'format_id': '827',
+                    'protocol': 'm3u8',
+                    'tbr': 827.392,
+                    'width': 640,
+                    'height': 360,
+                }, {
+                    'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_50n4psvx/name/a.mp4/index.m3u8',
+                    'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
+                    'ext': 'mp4',
+                    'format_id': '1396',
+                    'protocol': 'm3u8',
+                    'tbr': 1396.736,
+                    'width': 854,
+                    'height': 480,
+                }]
+            ),
+            (
+                # http://www.twitch.tv/riotgames/v/6528877
+                'twitch_vod',
+                'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
+                [{
+                    'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/audio_only/index-muted-HM49I092CC.m3u8',
+                    'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
+                    'ext': 'mp4',
+                    'format_id': 'Audio Only',
+                    'protocol': 'm3u8',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'none',
+                    'tbr': 182.725,
+                }, {
+                    'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/mobile/index-muted-HM49I092CC.m3u8',
+                    'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
+                    'ext': 'mp4',
+                    'format_id': 'Mobile',
+                    'protocol': 'm3u8',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc1.42C00D',
+                    'tbr': 280.474,
+                    'width': 400,
+                    'height': 226,
+                }, {
+                    'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/low/index-muted-HM49I092CC.m3u8',
+                    'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
+                    'ext': 'mp4',
+                    'format_id': 'Low',
+                    'protocol': 'm3u8',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc1.42C01E',
+                    'tbr': 628.347,
+                    'width': 640,
+                    'height': 360,
+                }, {
+                    'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/medium/index-muted-HM49I092CC.m3u8',
+                    'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
+                    'ext': 'mp4',
+                    'format_id': 'Medium',
+                    'protocol': 'm3u8',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc1.42C01E',
+                    'tbr': 893.387,
+                    'width': 852,
+                    'height': 480,
+                }, {
+                    'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/high/index-muted-HM49I092CC.m3u8',
+                    'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
+                    'ext': 'mp4',
+                    'format_id': 'High',
+                    'protocol': 'm3u8',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc1.42C01F',
+                    'tbr': 1603.789,
+                    'width': 1280,
+                    'height': 720,
+                }, {
+                    'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/chunked/index-muted-HM49I092CC.m3u8',
+                    'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
+                    'ext': 'mp4',
+                    'format_id': 'Source',
+                    'protocol': 'm3u8',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc1.100.31',
+                    'tbr': 3214.134,
+                    'width': 1280,
+                    'height': 720,
+                }]
+            ),
+            (
+                # http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015
+                # EXT-X-STREAM-INF tag with NAME attribute that is not defined
+                # in HLS specification
+                'vidio',
+                'https://www.vidio.com/videos/165683/playlist.m3u8',
+                [{
+                    'url': 'https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b300.mp4.m3u8',
+                    'manifest_url': 'https://www.vidio.com/videos/165683/playlist.m3u8',
+                    'ext': 'mp4',
+                    'format_id': '270p 3G',
+                    'protocol': 'm3u8',
+                    'tbr': 300,
+                    'width': 480,
+                    'height': 270,
+                }, {
+                    'url': 'https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b600.mp4.m3u8',
+                    'manifest_url': 'https://www.vidio.com/videos/165683/playlist.m3u8',
+                    'ext': 'mp4',
+                    'format_id': '360p SD',
+                    'protocol': 'm3u8',
+                    'tbr': 600,
+                    'width': 640,
+                    'height': 360,
+                }, {
+                    'url': 'https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b1200.mp4.m3u8',
+                    'manifest_url': 'https://www.vidio.com/videos/165683/playlist.m3u8',
+                    'ext': 'mp4',
+                    'format_id': '720p HD',
+                    'protocol': 'm3u8',
+                    'tbr': 1200,
+                    'width': 1280,
+                    'height': 720,
+                }]
+            )
+        ]
+
+        for m3u8_file, m3u8_url, expected_formats in _TEST_CASES:
+            with io.open('./test/testdata/m3u8/%s.m3u8' % m3u8_file,
+                         mode='r', encoding='utf-8') as f:
+                formats = self.ie._parse_m3u8_formats(
+                    f.read(), m3u8_url, ext='mp4')
+                self.ie._sort_formats(formats)
+                expect_value(self, formats, expected_formats, None)
+
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':
     unittest.main()
     unittest.main()

+ 3 - 1
test/test_YoutubeDL.py

@@ -755,6 +755,7 @@ class TestYoutubeDL(unittest.TestCase):
                     '_type': 'url_transparent',
                     '_type': 'url_transparent',
                     'url': 'foo2:',
                     'url': 'foo2:',
                     'ie_key': 'Foo2',
                     'ie_key': 'Foo2',
+                    'title': 'foo1 title'
                 }
                 }
 
 
         class Foo2IE(InfoExtractor):
         class Foo2IE(InfoExtractor):
@@ -771,7 +772,7 @@ class TestYoutubeDL(unittest.TestCase):
             _VALID_URL = r'foo3:'
             _VALID_URL = r'foo3:'
 
 
             def _real_extract(self, url):
             def _real_extract(self, url):
-                return _make_result([{'url': TEST_URL}])
+                return _make_result([{'url': TEST_URL}], title='foo3 title')
 
 
         ydl.add_info_extractor(Foo1IE(ydl))
         ydl.add_info_extractor(Foo1IE(ydl))
         ydl.add_info_extractor(Foo2IE(ydl))
         ydl.add_info_extractor(Foo2IE(ydl))
@@ -779,6 +780,7 @@ class TestYoutubeDL(unittest.TestCase):
         ydl.extract_info('foo1:')
         ydl.extract_info('foo1:')
         downloaded = ydl.downloaded_info_dicts[0]
         downloaded = ydl.downloaded_info_dicts[0]
         self.assertEqual(downloaded['url'], TEST_URL)
         self.assertEqual(downloaded['url'], TEST_URL)
+        self.assertEqual(downloaded['title'], 'foo1 title')
 
 
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':

+ 1 - 1
test/test_download.py

@@ -225,7 +225,7 @@ def generator(test_case, tname):
                                 format_bytes(got_fsize)))
                                 format_bytes(got_fsize)))
                     if 'md5' in tc:
                     if 'md5' in tc:
                         md5_for_file = _file_md5(tc_filename)
                         md5_for_file = _file_md5(tc_filename)
-                        self.assertEqual(md5_for_file, tc['md5'])
+                        self.assertEqual(tc['md5'], md5_for_file)
                 # Finally, check test cases' data again but this time against
                 # Finally, check test cases' data again but this time against
                 # extracted data from info JSON file written during processing
                 # extracted data from info JSON file written during processing
                 info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json'
                 info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json'

+ 54 - 0
test/test_utils.py

@@ -44,6 +44,7 @@ from youtube_dl.utils import (
     limit_length,
     limit_length,
     mimetype2ext,
     mimetype2ext,
     month_by_name,
     month_by_name,
+    multipart_encode,
     ohdave_rsa_encrypt,
     ohdave_rsa_encrypt,
     OnDemandPagedList,
     OnDemandPagedList,
     orderedSet,
     orderedSet,
@@ -338,6 +339,7 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(unified_timestamp('UNKNOWN DATE FORMAT'), None)
         self.assertEqual(unified_timestamp('UNKNOWN DATE FORMAT'), None)
         self.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500)
         self.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500)
         self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100)
         self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100)
+        self.assertEqual(unified_timestamp('2017-03-30T17:52:41Q'), 1490896361)
 
 
     def test_determine_ext(self):
     def test_determine_ext(self):
         self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
         self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
@@ -619,6 +621,16 @@ class TestUtil(unittest.TestCase):
             'http://example.com/path', {'test': '第二行тест'})),
             'http://example.com/path', {'test': '第二行тест'})),
             query_dict('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82'))
             query_dict('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82'))
 
 
+    def test_multipart_encode(self):
+        self.assertEqual(
+            multipart_encode({b'field': b'value'}, boundary='AAAAAA')[0],
+            b'--AAAAAA\r\nContent-Disposition: form-data; name="field"\r\n\r\nvalue\r\n--AAAAAA--\r\n')
+        self.assertEqual(
+            multipart_encode({'欄位'.encode('utf-8'): '值'.encode('utf-8')}, boundary='AAAAAA')[0],
+            b'--AAAAAA\r\nContent-Disposition: form-data; name="\xe6\xac\x84\xe4\xbd\x8d"\r\n\r\n\xe5\x80\xbc\r\n--AAAAAA--\r\n')
+        self.assertRaises(
+            ValueError, multipart_encode, {b'field': b'value'}, boundary='value')
+
     def test_dict_get(self):
     def test_dict_get(self):
         FALSE_VALUES = {
         FALSE_VALUES = {
             'none': None,
             'none': None,
@@ -899,6 +911,7 @@ class TestUtil(unittest.TestCase):
     def test_clean_html(self):
     def test_clean_html(self):
         self.assertEqual(clean_html('a:\nb'), 'a: b')
         self.assertEqual(clean_html('a:\nb'), 'a: b')
         self.assertEqual(clean_html('a:\n   "b"'), 'a:    "b"')
         self.assertEqual(clean_html('a:\n   "b"'), 'a:    "b"')
+        self.assertEqual(clean_html('a<br>\xa0b'), 'a\nb')
 
 
     def test_intlist_to_bytes(self):
     def test_intlist_to_bytes(self):
         self.assertEqual(
         self.assertEqual(
@@ -1069,6 +1082,47 @@ The first line
 '''
 '''
         self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data)
         self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data)
 
 
+        dfxp_data_with_style = '''<?xml version="1.0" encoding="utf-8"?>
+<tt xmlns="http://www.w3.org/2006/10/ttaf1" xmlns:ttp="http://www.w3.org/2006/10/ttaf1#parameter" ttp:timeBase="media" xmlns:tts="http://www.w3.org/2006/10/ttaf1#style" xml:lang="en" xmlns:ttm="http://www.w3.org/2006/10/ttaf1#metadata">
+  <head>
+    <styling>
+      <style id="s2" style="s0" tts:color="cyan" tts:fontWeight="bold" />
+      <style id="s1" style="s0" tts:color="yellow" tts:fontStyle="italic" />
+      <style id="s3" style="s0" tts:color="lime" tts:textDecoration="underline" />
+      <style id="s0" tts:backgroundColor="black" tts:fontStyle="normal" tts:fontSize="16" tts:fontFamily="sansSerif" tts:color="white" />
+    </styling>
+  </head>
+  <body tts:textAlign="center" style="s0">
+    <div>
+      <p begin="00:00:02.08" id="p0" end="00:00:05.84">default style<span tts:color="red">custom style</span></p>
+      <p style="s2" begin="00:00:02.08" id="p0" end="00:00:05.84"><span tts:color="lime">part 1<br /></span><span tts:color="cyan">part 2</span></p>
+      <p style="s3" begin="00:00:05.84" id="p1" end="00:00:09.56">line 3<br />part 3</p>
+      <p style="s1" tts:textDecoration="underline" begin="00:00:09.56" id="p2" end="00:00:12.36"><span style="s2" tts:color="lime">inner<br /> </span>style</p>
+    </div>
+  </body>
+</tt>'''
+        srt_data = '''1
+00:00:02,080 --> 00:00:05,839
+<font color="white" face="sansSerif" size="16">default style<font color="red">custom style</font></font>
+
+2
+00:00:02,080 --> 00:00:05,839
+<b><font color="cyan" face="sansSerif" size="16"><font color="lime">part 1
+</font>part 2</font></b>
+
+3
+00:00:05,839 --> 00:00:09,560
+<u><font color="lime">line 3
+part 3</font></u>
+
+4
+00:00:09,560 --> 00:00:12,359
+<i><u><font color="yellow"><font color="lime">inner
+ </font>style</font></u></i>
+
+'''
+        self.assertEqual(dfxp2srt(dfxp_data_with_style), srt_data)
+
     def test_cli_option(self):
     def test_cli_option(self):
         self.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128'])
         self.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128'])
         self.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), [])
         self.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), [])

+ 14 - 0
test/testdata/m3u8/pluzz_francetv_11507.m3u8

@@ -0,0 +1,14 @@
+#EXTM3U
+    
#EXT-X-VERSION:5
+    
#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Francais",DEFAULT=NO,FORCED=NO,URI="http://replayftv-pmd.francetv.fr/subtitles/2017/16/156589847-1492488987.m3u8",LANGUAGE="fra"
+    
#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="aac",LANGUAGE="fra",NAME="Francais",DEFAULT=YES, AUTOSELECT=YES
+#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=180000,RESOLUTION=256x144,CODECS="avc1.66.30, mp4a.40.2"
+http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_0_av.m3u8?null=0
+#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=303000,RESOLUTION=320x180,CODECS="avc1.66.30, mp4a.40.2"
+http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_1_av.m3u8?null=0
+#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=575000,RESOLUTION=512x288,CODECS="avc1.66.30, mp4a.40.2"
+http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_2_av.m3u8?null=0
+#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=831000,RESOLUTION=704x396,CODECS="avc1.77.30, mp4a.40.2"
+http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_3_av.m3u8?null=0
+#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=1467000,RESOLUTION=1024x576,CODECS="avc1.77.30, mp4a.40.2"
+http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_4_av.m3u8?null=0

+ 16 - 0
test/testdata/m3u8/teamcoco_11995.m3u8

@@ -0,0 +1,16 @@
+#EXTM3U
+#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio-0",NAME="Default",AUTOSELECT=YES,DEFAULT=YES,URI="hls/CONAN_020217_Highlight_show-audio-160k_v4.m3u8"
+#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio-1",NAME="Default",AUTOSELECT=YES,DEFAULT=YES,URI="hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8"
+#EXT-X-I-FRAME-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=37862000,CODECS="avc1.4d001f",URI="hls/CONAN_020217_Highlight_show-2m_iframe.m3u8"
+#EXT-X-I-FRAME-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=18750000,CODECS="avc1.4d001e",URI="hls/CONAN_020217_Highlight_show-1m_iframe.m3u8"
+#EXT-X-I-FRAME-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=6535000,CODECS="avc1.42001e",URI="hls/CONAN_020217_Highlight_show-400k_iframe.m3u8"
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=2374000,RESOLUTION=1024x576,CODECS="avc1.4d001f,mp4a.40.2",AUDIO="audio-0"
+hls/CONAN_020217_Highlight_show-2m_v4.m3u8
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1205000,RESOLUTION=640x360,CODECS="avc1.4d001e,mp4a.40.2",AUDIO="audio-0"
+hls/CONAN_020217_Highlight_show-1m_v4.m3u8
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=522000,RESOLUTION=400x224,CODECS="avc1.42001e,mp4a.40.2",AUDIO="audio-0"
+hls/CONAN_020217_Highlight_show-400k_v4.m3u8
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=413000,RESOLUTION=400x224,CODECS="avc1.42001e,mp4a.40.5",AUDIO="audio-1"
+hls/CONAN_020217_Highlight_show-400k_v4.m3u8
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=71000,CODECS="mp4a.40.5",AUDIO="audio-1"
+hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8

+ 13 - 0
test/testdata/m3u8/toggle_mobile_12211.m3u8

@@ -0,0 +1,13 @@
+#EXTM3U
+#EXT-X-VERSION:4
+#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio",LANGUAGE="eng",NAME="English",URI="http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_sa2ntrdg/name/a.mp4/index.m3u8"
+#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio",LANGUAGE="und",NAME="Undefined",URI="http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_r7y0nitg/name/a.mp4/index.m3u8"
+
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=155648,RESOLUTION=320x180,AUDIO="audio"
+http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_qlk9hlzr/name/a.mp4/index.m3u8
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=502784,RESOLUTION=480x270,AUDIO="audio"
+http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_oefackmi/name/a.mp4/index.m3u8
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=827392,RESOLUTION=640x360,AUDIO="audio"
+http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_vyg9pj7k/name/a.mp4/index.m3u8
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1396736,RESOLUTION=854x480,AUDIO="audio"
+http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_50n4psvx/name/a.mp4/index.m3u8

+ 20 - 0
test/testdata/m3u8/twitch_vod.m3u8

@@ -0,0 +1,20 @@
+#EXTM3U
+#EXT-X-TWITCH-INFO:ORIGIN="s3",CLUSTER="edgecast_vod",REGION="EU",MANIFEST-CLUSTER="edgecast_vod",USER-IP="109.171.17.81"
+#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="chunked",NAME="Source",AUTOSELECT=YES,DEFAULT=YES
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=3214134,CODECS="avc1.100.31,mp4a.40.2",RESOLUTION="1280x720",VIDEO="chunked"
+https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/chunked/index-muted-HM49I092CC.m3u8
+#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="high",NAME="High",AUTOSELECT=YES,DEFAULT=YES
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1603789,CODECS="avc1.42C01F,mp4a.40.2",RESOLUTION="1280x720",VIDEO="high"
+https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/high/index-muted-HM49I092CC.m3u8
+#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="medium",NAME="Medium",AUTOSELECT=YES,DEFAULT=YES
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=893387,CODECS="avc1.42C01E,mp4a.40.2",RESOLUTION="852x480",VIDEO="medium"
+https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/medium/index-muted-HM49I092CC.m3u8
+#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="low",NAME="Low",AUTOSELECT=YES,DEFAULT=YES
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=628347,CODECS="avc1.42C01E,mp4a.40.2",RESOLUTION="640x360",VIDEO="low"
+https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/low/index-muted-HM49I092CC.m3u8
+#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="mobile",NAME="Mobile",AUTOSELECT=YES,DEFAULT=YES
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=280474,CODECS="avc1.42C00D,mp4a.40.2",RESOLUTION="400x226",VIDEO="mobile"
+https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/mobile/index-muted-HM49I092CC.m3u8
+#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="audio_only",NAME="Audio Only",AUTOSELECT=NO,DEFAULT=NO
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=182725,CODECS="mp4a.40.2",VIDEO="audio_only"
+https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/audio_only/index-muted-HM49I092CC.m3u8

+ 10 - 0
test/testdata/m3u8/vidio.m3u8

@@ -0,0 +1,10 @@
+#EXTM3U
+
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=300000,RESOLUTION=480x270,NAME="270p 3G"
+https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b300.mp4.m3u8
+
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=600000,RESOLUTION=640x360,NAME="360p SD"
+https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b600.mp4.m3u8
+
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1200000,RESOLUTION=1280x720,NAME="720p HD"
+https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b1200.mp4.m3u8

+ 10 - 4
youtube_dl/YoutubeDL.py

@@ -640,7 +640,7 @@ class YoutubeDL(object):
 
 
             NUMERIC_FIELDS = set((
             NUMERIC_FIELDS = set((
                 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
                 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
-                'upload_year', 'upload_month', 'upload_day',
+                'timestamp', 'upload_year', 'upload_month', 'upload_day',
                 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
                 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
                 'average_rating', 'comment_count', 'age_limit',
                 'average_rating', 'comment_count', 'age_limit',
                 'start_time', 'end_time',
                 'start_time', 'end_time',
@@ -672,8 +672,7 @@ class YoutubeDL(object):
                         FORMAT_RE.format(numeric_field),
                         FORMAT_RE.format(numeric_field),
                         r'%({0})s'.format(numeric_field), outtmpl)
                         r'%({0})s'.format(numeric_field), outtmpl)
 
 
-            tmpl = expand_path(outtmpl)
-            filename = tmpl % template_dict
+            filename = expand_path(outtmpl % template_dict)
             # Temporary fix for #4787
             # Temporary fix for #4787
             # 'Treat' all problem characters by passing filename through preferredencoding
             # 'Treat' all problem characters by passing filename through preferredencoding
             # to workaround encoding issues with subprocess on python2 @ Windows
             # to workaround encoding issues with subprocess on python2 @ Windows
@@ -851,7 +850,14 @@ class YoutubeDL(object):
             new_result = info.copy()
             new_result = info.copy()
             new_result.update(force_properties)
             new_result.update(force_properties)
 
 
-            assert new_result.get('_type') != 'url_transparent'
+            # Extracted info may not be a video result (i.e.
+            # info.get('_type', 'video') != video) but rather an url or
+            # url_transparent. In such cases outer metadata (from ie_result)
+            # should be propagated to inner one (info). For this to happen
+            # _type of info should be overridden with url_transparent. This
+            # fixes issue from https://github.com/rg3/youtube-dl/pull/11163.
+            if new_result.get('_type') == 'url':
+                new_result['_type'] = 'url_transparent'
 
 
             return self.process_ie_result(
             return self.process_ie_result(
                 new_result, download=download, extra_info=extra_info)
                 new_result, download=download, extra_info=extra_info)

+ 1 - 0
youtube_dl/__init__.py

@@ -343,6 +343,7 @@ def _real_main(argv=None):
         'retries': opts.retries,
         'retries': opts.retries,
         'fragment_retries': opts.fragment_retries,
         'fragment_retries': opts.fragment_retries,
         'skip_unavailable_fragments': opts.skip_unavailable_fragments,
         'skip_unavailable_fragments': opts.skip_unavailable_fragments,
+        'keep_fragments': opts.keep_fragments,
         'buffersize': opts.buffersize,
         'buffersize': opts.buffersize,
         'noresizebuffer': opts.noresizebuffer,
         'noresizebuffer': opts.noresizebuffer,
         'continuedl': opts.continue_dl,
         'continuedl': opts.continue_dl,

+ 19 - 15
youtube_dl/downloader/common.py

@@ -187,6 +187,9 @@ class FileDownloader(object):
             return filename[:-len('.part')]
             return filename[:-len('.part')]
         return filename
         return filename
 
 
+    def ytdl_filename(self, filename):
+        return filename + '.ytdl'
+
     def try_rename(self, old_filename, new_filename):
     def try_rename(self, old_filename, new_filename):
         try:
         try:
             if old_filename == new_filename:
             if old_filename == new_filename:
@@ -327,21 +330,22 @@ class FileDownloader(object):
             os.path.exists(encodeFilename(filename))
             os.path.exists(encodeFilename(filename))
         )
         )
 
 
-        continuedl_and_exists = (
-            self.params.get('continuedl', True) and
-            os.path.isfile(encodeFilename(filename)) and
-            not self.params.get('nopart', False)
-        )
-
-        # Check file already present
-        if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists):
-            self.report_file_already_downloaded(filename)
-            self._hook_progress({
-                'filename': filename,
-                'status': 'finished',
-                'total_bytes': os.path.getsize(encodeFilename(filename)),
-            })
-            return True
+        if not hasattr(filename, 'write'):
+            continuedl_and_exists = (
+                self.params.get('continuedl', True) and
+                os.path.isfile(encodeFilename(filename)) and
+                not self.params.get('nopart', False)
+            )
+
+            # Check file already present
+            if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists):
+                self.report_file_already_downloaded(filename)
+                self._hook_progress({
+                    'filename': filename,
+                    'status': 'finished',
+                    'total_bytes': os.path.getsize(encodeFilename(filename)),
+                })
+                return True
 
 
         min_sleep_interval = self.params.get('sleep_interval')
         min_sleep_interval = self.params.get('sleep_interval')
         if min_sleep_interval:
         if min_sleep_interval:

+ 11 - 32
youtube_dl/downloader/dash.py

@@ -1,13 +1,7 @@
 from __future__ import unicode_literals
 from __future__ import unicode_literals
 
 
-import os
-
 from .fragment import FragmentFD
 from .fragment import FragmentFD
 from ..compat import compat_urllib_error
 from ..compat import compat_urllib_error
-from ..utils import (
-    sanitize_open,
-    encodeFilename,
-)
 
 
 
 
 class DashSegmentsFD(FragmentFD):
 class DashSegmentsFD(FragmentFD):
@@ -28,31 +22,24 @@ class DashSegmentsFD(FragmentFD):
 
 
         self._prepare_and_start_frag_download(ctx)
         self._prepare_and_start_frag_download(ctx)
 
 
-        segments_filenames = []
-
         fragment_retries = self.params.get('fragment_retries', 0)
         fragment_retries = self.params.get('fragment_retries', 0)
         skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
         skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
 
 
-        def process_segment(segment, tmp_filename, num):
-            segment_url = segment['url']
-            segment_name = 'Frag%d' % num
-            target_filename = '%s-%s' % (tmp_filename, segment_name)
+        frag_index = 0
+        for i, segment in enumerate(segments):
+            frag_index += 1
+            if frag_index <= ctx['fragment_index']:
+                continue
             # In DASH, the first segment contains necessary headers to
             # In DASH, the first segment contains necessary headers to
             # generate a valid MP4 file, so always abort for the first segment
             # generate a valid MP4 file, so always abort for the first segment
-            fatal = num == 0 or not skip_unavailable_fragments
+            fatal = i == 0 or not skip_unavailable_fragments
             count = 0
             count = 0
             while count <= fragment_retries:
             while count <= fragment_retries:
                 try:
                 try:
-                    success = ctx['dl'].download(target_filename, {
-                        'url': segment_url,
-                        'http_headers': info_dict.get('http_headers'),
-                    })
+                    success, frag_content = self._download_fragment(ctx, segment['url'], info_dict)
                     if not success:
                     if not success:
                         return False
                         return False
-                    down, target_sanitized = sanitize_open(target_filename, 'rb')
-                    ctx['dest_stream'].write(down.read())
-                    down.close()
-                    segments_filenames.append(target_sanitized)
+                    self._append_fragment(ctx, frag_content)
                     break
                     break
                 except compat_urllib_error.HTTPError as err:
                 except compat_urllib_error.HTTPError as err:
                     # YouTube may often return 404 HTTP error for a fragment causing the
                     # YouTube may often return 404 HTTP error for a fragment causing the
@@ -63,22 +50,14 @@ class DashSegmentsFD(FragmentFD):
                     # HTTP error.
                     # HTTP error.
                     count += 1
                     count += 1
                     if count <= fragment_retries:
                     if count <= fragment_retries:
-                        self.report_retry_fragment(err, segment_name, count, fragment_retries)
+                        self.report_retry_fragment(err, frag_index, count, fragment_retries)
             if count > fragment_retries:
             if count > fragment_retries:
                 if not fatal:
                 if not fatal:
-                    self.report_skip_fragment(segment_name)
-                    return True
+                    self.report_skip_fragment(frag_index)
+                    continue
                 self.report_error('giving up after %s fragment retries' % fragment_retries)
                 self.report_error('giving up after %s fragment retries' % fragment_retries)
                 return False
                 return False
-            return True
-
-        for i, segment in enumerate(segments):
-            if not process_segment(segment, ctx['tmpfilename'], i):
-                return False
 
 
         self._finish_frag_download(ctx)
         self._finish_frag_download(ctx)
 
 
-        for segment_file in segments_filenames:
-            os.remove(encodeFilename(segment_file))
-
         return True
         return True

+ 11 - 1
youtube_dl/downloader/external.py

@@ -29,7 +29,17 @@ class ExternalFD(FileDownloader):
         self.report_destination(filename)
         self.report_destination(filename)
         tmpfilename = self.temp_name(filename)
         tmpfilename = self.temp_name(filename)
 
 
-        retval = self._call_downloader(tmpfilename, info_dict)
+        try:
+            retval = self._call_downloader(tmpfilename, info_dict)
+        except KeyboardInterrupt:
+            if not info_dict.get('is_live'):
+                raise
+            # Live stream downloading cancellation should be considered as
+            # correct and expected termination thus all postprocessing
+            # should take place
+            retval = 0
+            self.to_screen('[%s] Interrupted by user' % self.get_basename())
+
         if retval == 0:
         if retval == 0:
             fsize = os.path.getsize(encodeFilename(tmpfilename))
             fsize = os.path.getsize(encodeFilename(tmpfilename))
             self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize))
             self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize))

+ 10 - 23
youtube_dl/downloader/f4m.py

@@ -3,7 +3,6 @@ from __future__ import division, unicode_literals
 import base64
 import base64
 import io
 import io
 import itertools
 import itertools
-import os
 import time
 import time
 
 
 from .fragment import FragmentFD
 from .fragment import FragmentFD
@@ -16,9 +15,7 @@ from ..compat import (
     compat_struct_unpack,
     compat_struct_unpack,
 )
 )
 from ..utils import (
 from ..utils import (
-    encodeFilename,
     fix_xml_ampersands,
     fix_xml_ampersands,
-    sanitize_open,
     xpath_text,
     xpath_text,
 )
 )
 
 
@@ -366,17 +363,21 @@ class F4mFD(FragmentFD):
 
 
         dest_stream = ctx['dest_stream']
         dest_stream = ctx['dest_stream']
 
 
-        write_flv_header(dest_stream)
-        if not live:
-            write_metadata_tag(dest_stream, metadata)
+        if ctx['complete_frags_downloaded_bytes'] == 0:
+            write_flv_header(dest_stream)
+            if not live:
+                write_metadata_tag(dest_stream, metadata)
 
 
         base_url_parsed = compat_urllib_parse_urlparse(base_url)
         base_url_parsed = compat_urllib_parse_urlparse(base_url)
 
 
         self._start_frag_download(ctx)
         self._start_frag_download(ctx)
 
 
-        frags_filenames = []
+        frag_index = 0
         while fragments_list:
         while fragments_list:
             seg_i, frag_i = fragments_list.pop(0)
             seg_i, frag_i = fragments_list.pop(0)
+            frag_index += 1
+            if frag_index <= ctx['fragment_index']:
+                continue
             name = 'Seg%d-Frag%d' % (seg_i, frag_i)
             name = 'Seg%d-Frag%d' % (seg_i, frag_i)
             query = []
             query = []
             if base_url_parsed.query:
             if base_url_parsed.query:
@@ -386,17 +387,10 @@ class F4mFD(FragmentFD):
             if info_dict.get('extra_param_to_segment_url'):
             if info_dict.get('extra_param_to_segment_url'):
                 query.append(info_dict['extra_param_to_segment_url'])
                 query.append(info_dict['extra_param_to_segment_url'])
             url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query))
             url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query))
-            frag_filename = '%s-%s' % (ctx['tmpfilename'], name)
             try:
             try:
-                success = ctx['dl'].download(frag_filename, {
-                    'url': url_parsed.geturl(),
-                    'http_headers': info_dict.get('http_headers'),
-                })
+                success, down_data = self._download_fragment(ctx, url_parsed.geturl(), info_dict)
                 if not success:
                 if not success:
                     return False
                     return False
-                (down, frag_sanitized) = sanitize_open(frag_filename, 'rb')
-                down_data = down.read()
-                down.close()
                 reader = FlvReader(down_data)
                 reader = FlvReader(down_data)
                 while True:
                 while True:
                     try:
                     try:
@@ -411,12 +405,8 @@ class F4mFD(FragmentFD):
                             break
                             break
                         raise
                         raise
                     if box_type == b'mdat':
                     if box_type == b'mdat':
-                        dest_stream.write(box_data)
+                        self._append_fragment(ctx, box_data)
                         break
                         break
-                if live:
-                    os.remove(encodeFilename(frag_sanitized))
-                else:
-                    frags_filenames.append(frag_sanitized)
             except (compat_urllib_error.HTTPError, ) as err:
             except (compat_urllib_error.HTTPError, ) as err:
                 if live and (err.code == 404 or err.code == 410):
                 if live and (err.code == 404 or err.code == 410):
                     # We didn't keep up with the live window. Continue
                     # We didn't keep up with the live window. Continue
@@ -436,7 +426,4 @@ class F4mFD(FragmentFD):
 
 
         self._finish_frag_download(ctx)
         self._finish_frag_download(ctx)
 
 
-        for frag_file in frags_filenames:
-            os.remove(encodeFilename(frag_file))
-
         return True
         return True

+ 109 - 13
youtube_dl/downloader/fragment.py

@@ -2,6 +2,7 @@ from __future__ import division, unicode_literals
 
 
 import os
 import os
 import time
 import time
+import json
 
 
 from .common import FileDownloader
 from .common import FileDownloader
 from .http import HttpFD
 from .http import HttpFD
@@ -28,15 +29,37 @@ class FragmentFD(FileDownloader):
                         and hlsnative only)
                         and hlsnative only)
     skip_unavailable_fragments:
     skip_unavailable_fragments:
                         Skip unavailable fragments (DASH and hlsnative only)
                         Skip unavailable fragments (DASH and hlsnative only)
+    keep_fragments:     Keep downloaded fragments on disk after downloading is
+                        finished
+
+    For each incomplete fragment download youtube-dl keeps on disk a special
+    bookkeeping file with download state and metadata (in future such files will
+    be used for any incomplete download handled by youtube-dl). This file is
+    used to properly handle resuming, check download file consistency and detect
+    potential errors. The file has a .ytdl extension and represents a standard
+    JSON file of the following format:
+
+    extractor:
+        Dictionary of extractor related data. TBD.
+
+    downloader:
+        Dictionary of downloader related data. May contain following data:
+            current_fragment:
+                Dictionary with current (being downloaded) fragment data:
+                index:  0-based index of current fragment among all fragments
+            fragment_count:
+                Total count of fragments
+
+    This feature is experimental and file format may change in future.
     """
     """
 
 
-    def report_retry_fragment(self, err, fragment_name, count, retries):
+    def report_retry_fragment(self, err, frag_index, count, retries):
         self.to_screen(
         self.to_screen(
-            '[download] Got server HTTP error: %s. Retrying fragment %s (attempt %d of %s)...'
-            % (error_to_compat_str(err), fragment_name, count, self.format_retries(retries)))
+            '[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s)...'
+            % (error_to_compat_str(err), frag_index, count, self.format_retries(retries)))
 
 
-    def report_skip_fragment(self, fragment_name):
-        self.to_screen('[download] Skipping fragment %s...' % fragment_name)
+    def report_skip_fragment(self, frag_index):
+        self.to_screen('[download] Skipping fragment %d...' % frag_index)
 
 
     def _prepare_url(self, info_dict, url):
     def _prepare_url(self, info_dict, url):
         headers = info_dict.get('http_headers')
         headers = info_dict.get('http_headers')
@@ -46,6 +69,51 @@ class FragmentFD(FileDownloader):
         self._prepare_frag_download(ctx)
         self._prepare_frag_download(ctx)
         self._start_frag_download(ctx)
         self._start_frag_download(ctx)
 
 
+    @staticmethod
+    def __do_ytdl_file(ctx):
+        return not ctx['live'] and not ctx['tmpfilename'] == '-'
+
+    def _read_ytdl_file(self, ctx):
+        stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'r')
+        ctx['fragment_index'] = json.loads(stream.read())['downloader']['current_fragment']['index']
+        stream.close()
+
+    def _write_ytdl_file(self, ctx):
+        frag_index_stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'w')
+        downloader = {
+            'current_fragment': {
+                'index': ctx['fragment_index'],
+            },
+        }
+        if ctx.get('fragment_count') is not None:
+            downloader['fragment_count'] = ctx['fragment_count']
+        frag_index_stream.write(json.dumps({'downloader': downloader}))
+        frag_index_stream.close()
+
+    def _download_fragment(self, ctx, frag_url, info_dict, headers=None):
+        fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index'])
+        success = ctx['dl'].download(fragment_filename, {
+            'url': frag_url,
+            'http_headers': headers or info_dict.get('http_headers'),
+        })
+        if not success:
+            return False, None
+        down, frag_sanitized = sanitize_open(fragment_filename, 'rb')
+        ctx['fragment_filename_sanitized'] = frag_sanitized
+        frag_content = down.read()
+        down.close()
+        return True, frag_content
+
+    def _append_fragment(self, ctx, frag_content):
+        try:
+            ctx['dest_stream'].write(frag_content)
+        finally:
+            if self.__do_ytdl_file(ctx):
+                self._write_ytdl_file(ctx)
+            if not self.params.get('keep_fragments', False):
+                os.remove(ctx['fragment_filename_sanitized'])
+            del ctx['fragment_filename_sanitized']
+
     def _prepare_frag_download(self, ctx):
     def _prepare_frag_download(self, ctx):
         if 'live' not in ctx:
         if 'live' not in ctx:
             ctx['live'] = False
             ctx['live'] = False
@@ -66,11 +134,36 @@ class FragmentFD(FileDownloader):
             }
             }
         )
         )
         tmpfilename = self.temp_name(ctx['filename'])
         tmpfilename = self.temp_name(ctx['filename'])
-        dest_stream, tmpfilename = sanitize_open(tmpfilename, 'wb')
+        open_mode = 'wb'
+        resume_len = 0
+
+        # Establish possible resume length
+        if os.path.isfile(encodeFilename(tmpfilename)):
+            open_mode = 'ab'
+            resume_len = os.path.getsize(encodeFilename(tmpfilename))
+
+        # Should be initialized before ytdl file check
+        ctx.update({
+            'tmpfilename': tmpfilename,
+            'fragment_index': 0,
+        })
+
+        if self.__do_ytdl_file(ctx):
+            if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
+                self._read_ytdl_file(ctx)
+            else:
+                self._write_ytdl_file(ctx)
+            if ctx['fragment_index'] > 0:
+                assert resume_len > 0
+
+        dest_stream, tmpfilename = sanitize_open(tmpfilename, open_mode)
+
         ctx.update({
         ctx.update({
             'dl': dl,
             'dl': dl,
             'dest_stream': dest_stream,
             'dest_stream': dest_stream,
             'tmpfilename': tmpfilename,
             'tmpfilename': tmpfilename,
+            # Total complete fragments downloaded so far in bytes
+            'complete_frags_downloaded_bytes': resume_len,
         })
         })
 
 
     def _start_frag_download(self, ctx):
     def _start_frag_download(self, ctx):
@@ -79,9 +172,9 @@ class FragmentFD(FileDownloader):
         # hook
         # hook
         state = {
         state = {
             'status': 'downloading',
             'status': 'downloading',
-            'downloaded_bytes': 0,
-            'frag_index': 0,
-            'frag_count': total_frags,
+            'downloaded_bytes': ctx['complete_frags_downloaded_bytes'],
+            'fragment_index': ctx['fragment_index'],
+            'fragment_count': total_frags,
             'filename': ctx['filename'],
             'filename': ctx['filename'],
             'tmpfilename': ctx['tmpfilename'],
             'tmpfilename': ctx['tmpfilename'],
         }
         }
@@ -89,8 +182,6 @@ class FragmentFD(FileDownloader):
         start = time.time()
         start = time.time()
         ctx.update({
         ctx.update({
             'started': start,
             'started': start,
-            # Total complete fragments downloaded so far in bytes
-            'complete_frags_downloaded_bytes': 0,
             # Amount of fragment's bytes downloaded by the time of the previous
             # Amount of fragment's bytes downloaded by the time of the previous
             # frag progress hook invocation
             # frag progress hook invocation
             'prev_frag_downloaded_bytes': 0,
             'prev_frag_downloaded_bytes': 0,
@@ -106,11 +197,12 @@ class FragmentFD(FileDownloader):
             if not ctx['live']:
             if not ctx['live']:
                 estimated_size = (
                 estimated_size = (
                     (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) /
                     (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) /
-                    (state['frag_index'] + 1) * total_frags)
+                    (state['fragment_index'] + 1) * total_frags)
                 state['total_bytes_estimate'] = estimated_size
                 state['total_bytes_estimate'] = estimated_size
 
 
             if s['status'] == 'finished':
             if s['status'] == 'finished':
-                state['frag_index'] += 1
+                state['fragment_index'] += 1
+                ctx['fragment_index'] = state['fragment_index']
                 state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes']
                 state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes']
                 ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes']
                 ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes']
                 ctx['prev_frag_downloaded_bytes'] = 0
                 ctx['prev_frag_downloaded_bytes'] = 0
@@ -132,6 +224,10 @@ class FragmentFD(FileDownloader):
 
 
     def _finish_frag_download(self, ctx):
     def _finish_frag_download(self, ctx):
         ctx['dest_stream'].close()
         ctx['dest_stream'].close()
+        if self.__do_ytdl_file(ctx):
+            ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename']))
+            if os.path.isfile(ytdl_filename):
+                os.remove(ytdl_filename)
         elapsed = time.time() - ctx['started']
         elapsed = time.time() - ctx['started']
         self.try_rename(ctx['tmpfilename'], ctx['filename'])
         self.try_rename(ctx['tmpfilename'], ctx['filename'])
         fsize = os.path.getsize(encodeFilename(ctx['filename']))
         fsize = os.path.getsize(encodeFilename(ctx['filename']))

+ 13 - 21
youtube_dl/downloader/hls.py

@@ -1,6 +1,5 @@
 from __future__ import unicode_literals
 from __future__ import unicode_literals
 
 
-import os.path
 import re
 import re
 import binascii
 import binascii
 try:
 try:
@@ -18,8 +17,6 @@ from ..compat import (
     compat_struct_pack,
     compat_struct_pack,
 )
 )
 from ..utils import (
 from ..utils import (
-    encodeFilename,
-    sanitize_open,
     parse_m3u8_attributes,
     parse_m3u8_attributes,
     update_url_query,
     update_url_query,
 )
 )
@@ -103,17 +100,18 @@ class HlsFD(FragmentFD):
         media_sequence = 0
         media_sequence = 0
         decrypt_info = {'METHOD': 'NONE'}
         decrypt_info = {'METHOD': 'NONE'}
         byte_range = {}
         byte_range = {}
-        frags_filenames = []
+        frag_index = 0
         for line in s.splitlines():
         for line in s.splitlines():
             line = line.strip()
             line = line.strip()
             if line:
             if line:
                 if not line.startswith('#'):
                 if not line.startswith('#'):
+                    frag_index += 1
+                    if frag_index <= ctx['fragment_index']:
+                        continue
                     frag_url = (
                     frag_url = (
                         line
                         line
                         if re.match(r'^https?://', line)
                         if re.match(r'^https?://', line)
                         else compat_urlparse.urljoin(man_url, line))
                         else compat_urlparse.urljoin(man_url, line))
-                    frag_name = 'Frag%d' % i
-                    frag_filename = '%s-%s' % (ctx['tmpfilename'], frag_name)
                     if extra_query:
                     if extra_query:
                         frag_url = update_url_query(frag_url, extra_query)
                         frag_url = update_url_query(frag_url, extra_query)
                     count = 0
                     count = 0
@@ -122,15 +120,10 @@ class HlsFD(FragmentFD):
                         headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'])
                         headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'])
                     while count <= fragment_retries:
                     while count <= fragment_retries:
                         try:
                         try:
-                            success = ctx['dl'].download(frag_filename, {
-                                'url': frag_url,
-                                'http_headers': headers,
-                            })
+                            success, frag_content = self._download_fragment(
+                                ctx, frag_url, info_dict, headers)
                             if not success:
                             if not success:
                                 return False
                                 return False
-                            down, frag_sanitized = sanitize_open(frag_filename, 'rb')
-                            frag_content = down.read()
-                            down.close()
                             break
                             break
                         except compat_urllib_error.HTTPError as err:
                         except compat_urllib_error.HTTPError as err:
                             # Unavailable (possibly temporary) fragments may be served.
                             # Unavailable (possibly temporary) fragments may be served.
@@ -139,28 +132,29 @@ class HlsFD(FragmentFD):
                             # https://github.com/rg3/youtube-dl/issues/10448).
                             # https://github.com/rg3/youtube-dl/issues/10448).
                             count += 1
                             count += 1
                             if count <= fragment_retries:
                             if count <= fragment_retries:
-                                self.report_retry_fragment(err, frag_name, count, fragment_retries)
+                                self.report_retry_fragment(err, frag_index, count, fragment_retries)
                     if count > fragment_retries:
                     if count > fragment_retries:
                         if skip_unavailable_fragments:
                         if skip_unavailable_fragments:
                             i += 1
                             i += 1
                             media_sequence += 1
                             media_sequence += 1
-                            self.report_skip_fragment(frag_name)
+                            self.report_skip_fragment(frag_index)
                             continue
                             continue
                         self.report_error(
                         self.report_error(
                             'giving up after %s fragment retries' % fragment_retries)
                             'giving up after %s fragment retries' % fragment_retries)
                         return False
                         return False
                     if decrypt_info['METHOD'] == 'AES-128':
                     if decrypt_info['METHOD'] == 'AES-128':
                         iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
                         iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
+                        decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(decrypt_info['URI']).read()
                         frag_content = AES.new(
                         frag_content = AES.new(
                             decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
                             decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
-                    ctx['dest_stream'].write(frag_content)
-                    frags_filenames.append(frag_sanitized)
+                    self._append_fragment(ctx, frag_content)
                     # We only download the first fragment during the test
                     # We only download the first fragment during the test
                     if test:
                     if test:
                         break
                         break
                     i += 1
                     i += 1
                     media_sequence += 1
                     media_sequence += 1
                 elif line.startswith('#EXT-X-KEY'):
                 elif line.startswith('#EXT-X-KEY'):
+                    decrypt_url = decrypt_info.get('URI')
                     decrypt_info = parse_m3u8_attributes(line[11:])
                     decrypt_info = parse_m3u8_attributes(line[11:])
                     if decrypt_info['METHOD'] == 'AES-128':
                     if decrypt_info['METHOD'] == 'AES-128':
                         if 'IV' in decrypt_info:
                         if 'IV' in decrypt_info:
@@ -170,7 +164,8 @@ class HlsFD(FragmentFD):
                                 man_url, decrypt_info['URI'])
                                 man_url, decrypt_info['URI'])
                         if extra_query:
                         if extra_query:
                             decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
                             decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
-                        decrypt_info['KEY'] = self.ydl.urlopen(decrypt_info['URI']).read()
+                        if decrypt_url != decrypt_info['URI']:
+                            decrypt_info['KEY'] = None
                 elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
                 elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
                     media_sequence = int(line[22:])
                     media_sequence = int(line[22:])
                 elif line.startswith('#EXT-X-BYTERANGE'):
                 elif line.startswith('#EXT-X-BYTERANGE'):
@@ -183,7 +178,4 @@ class HlsFD(FragmentFD):
 
 
         self._finish_frag_download(ctx)
         self._finish_frag_download(ctx)
 
 
-        for frag_file in frags_filenames:
-            os.remove(encodeFilename(frag_file))
-
         return True
         return True

+ 9 - 25
youtube_dl/downloader/ism.py

@@ -1,6 +1,5 @@
 from __future__ import unicode_literals
 from __future__ import unicode_literals
 
 
-import os
 import time
 import time
 import struct
 import struct
 import binascii
 import binascii
@@ -8,10 +7,6 @@ import io
 
 
 from .fragment import FragmentFD
 from .fragment import FragmentFD
 from ..compat import compat_urllib_error
 from ..compat import compat_urllib_error
-from ..utils import (
-    sanitize_open,
-    encodeFilename,
-)
 
 
 
 
 u8 = struct.Struct(b'>B')
 u8 = struct.Struct(b'>B')
@@ -225,50 +220,39 @@ class IsmFD(FragmentFD):
 
 
         self._prepare_and_start_frag_download(ctx)
         self._prepare_and_start_frag_download(ctx)
 
 
-        segments_filenames = []
-
         fragment_retries = self.params.get('fragment_retries', 0)
         fragment_retries = self.params.get('fragment_retries', 0)
         skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
         skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
 
 
         track_written = False
         track_written = False
+        frag_index = 0
         for i, segment in enumerate(segments):
         for i, segment in enumerate(segments):
-            segment_url = segment['url']
-            segment_name = 'Frag%d' % i
-            target_filename = '%s-%s' % (ctx['tmpfilename'], segment_name)
+            frag_index += 1
+            if frag_index <= ctx['fragment_index']:
+                continue
             count = 0
             count = 0
             while count <= fragment_retries:
             while count <= fragment_retries:
                 try:
                 try:
-                    success = ctx['dl'].download(target_filename, {
-                        'url': segment_url,
-                        'http_headers': info_dict.get('http_headers'),
-                    })
+                    success, frag_content = self._download_fragment(ctx, segment['url'], info_dict)
                     if not success:
                     if not success:
                         return False
                         return False
-                    down, target_sanitized = sanitize_open(target_filename, 'rb')
-                    down_data = down.read()
                     if not track_written:
                     if not track_written:
-                        tfhd_data = extract_box_data(down_data, [b'moof', b'traf', b'tfhd'])
+                        tfhd_data = extract_box_data(frag_content, [b'moof', b'traf', b'tfhd'])
                         info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0]
                         info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0]
                         write_piff_header(ctx['dest_stream'], info_dict['_download_params'])
                         write_piff_header(ctx['dest_stream'], info_dict['_download_params'])
                         track_written = True
                         track_written = True
-                    ctx['dest_stream'].write(down_data)
-                    down.close()
-                    segments_filenames.append(target_sanitized)
+                    self._append_fragment(ctx, frag_content)
                     break
                     break
                 except compat_urllib_error.HTTPError as err:
                 except compat_urllib_error.HTTPError as err:
                     count += 1
                     count += 1
                     if count <= fragment_retries:
                     if count <= fragment_retries:
-                        self.report_retry_fragment(err, segment_name, count, fragment_retries)
+                        self.report_retry_fragment(err, frag_index, count, fragment_retries)
             if count > fragment_retries:
             if count > fragment_retries:
                 if skip_unavailable_fragments:
                 if skip_unavailable_fragments:
-                    self.report_skip_fragment(segment_name)
+                    self.report_skip_fragment(frag_index)
                     continue
                     continue
                 self.report_error('giving up after %s fragment retries' % fragment_retries)
                 self.report_error('giving up after %s fragment retries' % fragment_retries)
                 return False
                 return False
 
 
         self._finish_frag_download(ctx)
         self._finish_frag_download(ctx)
 
 
-        for segment_file in segments_filenames:
-            os.remove(encodeFilename(segment_file))
-
         return True
         return True

+ 6 - 0
youtube_dl/extractor/adobepass.py

@@ -1308,6 +1308,12 @@ class AdobePassIE(InfoExtractor):
     _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
     _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
     _MVPD_CACHE = 'ap-mvpd'
     _MVPD_CACHE = 'ap-mvpd'
 
 
+    def _download_webpage_handle(self, *args, **kwargs):
+        headers = kwargs.get('headers', {})
+        headers.update(self.geo_verification_headers())
+        kwargs['headers'] = headers
+        return super(AdobePassIE, self)._download_webpage_handle(*args, **kwargs)
+
     @staticmethod
     @staticmethod
     def _get_mvpd_resource(provider_id, title, guid, rating):
     def _get_mvpd_resource(provider_id, title, guid, rating):
         channel = etree.Element('channel')
         channel = etree.Element('channel')

+ 9 - 5
youtube_dl/extractor/aenetworks.py

@@ -101,10 +101,14 @@ class AENetworksIE(AENetworksBaseIE):
                 for season_url_path in re.findall(r'(?s)<li[^>]+data-href="(/shows/%s/season-\d+)"' % url_parts[0], webpage):
                 for season_url_path in re.findall(r'(?s)<li[^>]+data-href="(/shows/%s/season-\d+)"' % url_parts[0], webpage):
                     entries.append(self.url_result(
                     entries.append(self.url_result(
                         compat_urlparse.urljoin(url, season_url_path), 'AENetworks'))
                         compat_urlparse.urljoin(url, season_url_path), 'AENetworks'))
-                return self.playlist_result(
-                    entries, self._html_search_meta('aetn:SeriesId', webpage),
-                    self._html_search_meta('aetn:SeriesTitle', webpage))
-            elif url_parts_len == 2:
+                if entries:
+                    return self.playlist_result(
+                        entries, self._html_search_meta('aetn:SeriesId', webpage),
+                        self._html_search_meta('aetn:SeriesTitle', webpage))
+                else:
+                    # single season
+                    url_parts_len = 2
+            if url_parts_len == 2:
                 entries = []
                 entries = []
                 for episode_item in re.findall(r'(?s)<[^>]+class="[^"]*(?:episode|program)-item[^"]*"[^>]*>', webpage):
                 for episode_item in re.findall(r'(?s)<[^>]+class="[^"]*(?:episode|program)-item[^"]*"[^>]*>', webpage):
                     episode_attributes = extract_attributes(episode_item)
                     episode_attributes = extract_attributes(episode_item)
@@ -112,7 +116,7 @@ class AENetworksIE(AENetworksBaseIE):
                         url, episode_attributes['data-canonical'])
                         url, episode_attributes['data-canonical'])
                     entries.append(self.url_result(
                     entries.append(self.url_result(
                         episode_url, 'AENetworks',
                         episode_url, 'AENetworks',
-                        episode_attributes['data-videoid']))
+                        episode_attributes.get('data-videoid') or episode_attributes.get('data-video-id')))
                 return self.playlist_result(
                 return self.playlist_result(
                     entries, self._html_search_meta('aetn:SeasonId', webpage))
                     entries, self._html_search_meta('aetn:SeasonId', webpage))
 
 

+ 1 - 2
youtube_dl/extractor/afreecatv.py

@@ -207,11 +207,10 @@ class AfreecaTVIE(InfoExtractor):
                     file_url, video_id, 'mp4', entry_protocol='m3u8_native',
                     file_url, video_id, 'mp4', entry_protocol='m3u8_native',
                     m3u8_id='hls',
                     m3u8_id='hls',
                     note='Downloading part %d m3u8 information' % file_num)
                     note='Downloading part %d m3u8 information' % file_num)
-                title = title if one else '%s (part %d)' % (title, file_num)
                 file_info = common_entry.copy()
                 file_info = common_entry.copy()
                 file_info.update({
                 file_info.update({
                     'id': format_id,
                     'id': format_id,
-                    'title': title,
+                    'title': title if one else '%s (part %d)' % (title, file_num),
                     'upload_date': upload_date,
                     'upload_date': upload_date,
                     'duration': file_duration,
                     'duration': file_duration,
                     'formats': formats,
                     'formats': formats,

+ 19 - 7
youtube_dl/extractor/amp.py

@@ -7,15 +7,19 @@ from ..utils import (
     parse_iso8601,
     parse_iso8601,
     mimetype2ext,
     mimetype2ext,
     determine_ext,
     determine_ext,
+    ExtractorError,
 )
 )
 
 
 
 
 class AMPIE(InfoExtractor):
 class AMPIE(InfoExtractor):
     # parse Akamai Adaptive Media Player feed
     # parse Akamai Adaptive Media Player feed
     def _extract_feed_info(self, url):
     def _extract_feed_info(self, url):
-        item = self._download_json(
+        feed = self._download_json(
             url, None, 'Downloading Akamai AMP feed',
             url, None, 'Downloading Akamai AMP feed',
-            'Unable to download Akamai AMP feed')['channel']['item']
+            'Unable to download Akamai AMP feed')
+        item = feed.get('channel', {}).get('item')
+        if not item:
+            raise ExtractorError('%s said: %s' % (self.IE_NAME, feed['error']))
 
 
         video_id = item['guid']
         video_id = item['guid']
 
 
@@ -30,9 +34,12 @@ class AMPIE(InfoExtractor):
             if isinstance(media_thumbnail, dict):
             if isinstance(media_thumbnail, dict):
                 media_thumbnail = [media_thumbnail]
                 media_thumbnail = [media_thumbnail]
             for thumbnail_data in media_thumbnail:
             for thumbnail_data in media_thumbnail:
-                thumbnail = thumbnail_data['@attributes']
+                thumbnail = thumbnail_data.get('@attributes', {})
+                thumbnail_url = thumbnail.get('url')
+                if not thumbnail_url:
+                    continue
                 thumbnails.append({
                 thumbnails.append({
-                    'url': self._proto_relative_url(thumbnail['url'], 'http:'),
+                    'url': self._proto_relative_url(thumbnail_url, 'http:'),
                     'width': int_or_none(thumbnail.get('width')),
                     'width': int_or_none(thumbnail.get('width')),
                     'height': int_or_none(thumbnail.get('height')),
                     'height': int_or_none(thumbnail.get('height')),
                 })
                 })
@@ -43,9 +50,14 @@ class AMPIE(InfoExtractor):
             if isinstance(media_subtitle, dict):
             if isinstance(media_subtitle, dict):
                 media_subtitle = [media_subtitle]
                 media_subtitle = [media_subtitle]
             for subtitle_data in media_subtitle:
             for subtitle_data in media_subtitle:
-                subtitle = subtitle_data['@attributes']
-                lang = subtitle.get('lang') or 'en'
-                subtitles[lang] = [{'url': subtitle['href']}]
+                subtitle = subtitle_data.get('@attributes', {})
+                subtitle_href = subtitle.get('href')
+                if not subtitle_href:
+                    continue
+                subtitles.setdefault(subtitle.get('lang') or 'en', []).append({
+                    'url': subtitle_href,
+                    'ext': mimetype2ext(subtitle.get('type')) or determine_ext(subtitle_href),
+                })
 
 
         formats = []
         formats = []
         media_content = get_media_node('content')
         media_content = get_media_node('content')

+ 57 - 9
youtube_dl/extractor/anvato.py

@@ -5,6 +5,7 @@ import base64
 import hashlib
 import hashlib
 import json
 import json
 import random
 import random
+import re
 import time
 import time
 
 
 from .common import InfoExtractor
 from .common import InfoExtractor
@@ -16,6 +17,7 @@ from ..utils import (
     intlist_to_bytes,
     intlist_to_bytes,
     int_or_none,
     int_or_none,
     strip_jsonp,
     strip_jsonp,
+    unescapeHTML,
 )
 )
 
 
 
 
@@ -26,6 +28,8 @@ def md5_text(s):
 
 
 
 
 class AnvatoIE(InfoExtractor):
 class AnvatoIE(InfoExtractor):
+    _VALID_URL = r'anvato:(?P<access_key_or_mcp>[^:]+):(?P<id>\d+)'
+
     # Copied from anvplayer.min.js
     # Copied from anvplayer.min.js
     _ANVACK_TABLE = {
     _ANVACK_TABLE = {
         'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ',
         'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ',
@@ -114,6 +118,22 @@ class AnvatoIE(InfoExtractor):
         'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6_secure': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ'
         'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6_secure': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ'
     }
     }
 
 
+    _MCP_TO_ACCESS_KEY_TABLE = {
+        'qa': 'anvato_mcpqa_demo_web_stage_18b55e00db5a13faa8d03ae6e41f6f5bcb15b922',
+        'lin': 'anvato_mcp_lin_web_prod_4c36fbfd4d8d8ecae6488656e21ac6d1ac972749',
+        'univison': 'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa',
+        'uni': 'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa',
+        'dev': 'anvato_mcp_fs2go_web_prod_c7b90a93e171469cdca00a931211a2f556370d0a',
+        'sps': 'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336',
+        'spsstg': 'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336',
+        'anv': 'anvato_mcp_anv_web_prod_791407490f4c1ef2a4bcb21103e0cb1bcb3352b3',
+        'gray': 'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900',
+        'hearst': 'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99',
+        'cbs': 'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe',
+        'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582'
+    }
+
+    _ANVP_RE = r'<script[^>]+\bdata-anvp\s*=\s*(["\'])(?P<anvp>(?:(?!\1).)+)\1'
     _AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce'
     _AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce'
 
 
     def __init__(self, *args, **kwargs):
     def __init__(self, *args, **kwargs):
@@ -178,12 +198,7 @@ class AnvatoIE(InfoExtractor):
             }
             }
 
 
             if ext == 'm3u8' or media_format in ('m3u8', 'm3u8-variant'):
             if ext == 'm3u8' or media_format in ('m3u8', 'm3u8-variant'):
-                # Not using _extract_m3u8_formats here as individual media
-                # playlists are also included in published_urls.
-                if tbr is None:
-                    formats.append(self._m3u8_meta_format(video_url, ext='mp4', m3u8_id='hls'))
-                    continue
-                else:
+                if tbr is not None:
                     a_format.update({
                     a_format.update({
                         'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])),
                         'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])),
                         'ext': 'mp4',
                         'ext': 'mp4',
@@ -222,9 +237,42 @@ class AnvatoIE(InfoExtractor):
             'subtitles': subtitles,
             'subtitles': subtitles,
         }
         }
 
 
+    @staticmethod
+    def _extract_urls(ie, webpage, video_id):
+        entries = []
+        for mobj in re.finditer(AnvatoIE._ANVP_RE, webpage):
+            anvplayer_data = ie._parse_json(
+                mobj.group('anvp'), video_id, transform_source=unescapeHTML,
+                fatal=False)
+            if not anvplayer_data:
+                continue
+            video = anvplayer_data.get('video')
+            if not isinstance(video, compat_str) or not video.isdigit():
+                continue
+            access_key = anvplayer_data.get('accessKey')
+            if not access_key:
+                mcp = anvplayer_data.get('mcp')
+                if mcp:
+                    access_key = AnvatoIE._MCP_TO_ACCESS_KEY_TABLE.get(
+                        mcp.lower())
+            if not access_key:
+                continue
+            entries.append(ie.url_result(
+                'anvato:%s:%s' % (access_key, video), ie=AnvatoIE.ie_key(),
+                video_id=video))
+        return entries
+
     def _extract_anvato_videos(self, webpage, video_id):
     def _extract_anvato_videos(self, webpage, video_id):
-        anvplayer_data = self._parse_json(self._html_search_regex(
-            r'<script[^>]+data-anvp=\'([^\']+)\'', webpage,
-            'Anvato player data'), video_id)
+        anvplayer_data = self._parse_json(
+            self._html_search_regex(
+                self._ANVP_RE, webpage, 'Anvato player data', group='anvp'),
+            video_id)
         return self._get_anvato_videos(
         return self._get_anvato_videos(
             anvplayer_data['accessKey'], anvplayer_data['video'])
             anvplayer_data['accessKey'], anvplayer_data['video'])
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        access_key, video_id = mobj.group('access_key_or_mcp', 'id')
+        if access_key not in self._ANVACK_TABLE:
+            access_key = self._MCP_TO_ACCESS_KEY_TABLE[access_key]
+        return self._get_anvato_videos(access_key, video_id)

+ 2 - 2
youtube_dl/extractor/appleconnect.py

@@ -12,13 +12,13 @@ class AppleConnectIE(InfoExtractor):
     _VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/idsa\.(?P<id>[\w-]+)'
     _VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/idsa\.(?P<id>[\w-]+)'
     _TEST = {
     _TEST = {
         'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
         'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
-        'md5': '10d0f2799111df4cb1c924520ca78f98',
+        'md5': 'e7c38568a01ea45402570e6029206723',
         'info_dict': {
         'info_dict': {
             'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
             'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
             'ext': 'm4v',
             'ext': 'm4v',
             'title': 'Energy',
             'title': 'Energy',
             'uploader': 'Drake',
             'uploader': 'Drake',
-            'thumbnail': 'http://is5.mzstatic.com/image/thumb/Video5/v4/78/61/c5/7861c5fa-ad6d-294b-1464-cf7605b911d6/source/1920x1080sr.jpg',
+            'thumbnail': r're:^https?://.*\.jpg$',
             'upload_date': '20150710',
             'upload_date': '20150710',
             'timestamp': 1436545535,
             'timestamp': 1436545535,
         },
         },

+ 3 - 2
youtube_dl/extractor/appletrailers.py

@@ -70,7 +70,8 @@ class AppleTrailersIE(InfoExtractor):
     }, {
     }, {
         'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/',
         'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/',
         'info_dict': {
         'info_dict': {
-            'id': 'blackthorn',
+            'id': '4489',
+            'title': 'Blackthorn',
         },
         },
         'playlist_mincount': 2,
         'playlist_mincount': 2,
         'expected_warnings': ['Unable to download JSON metadata'],
         'expected_warnings': ['Unable to download JSON metadata'],
@@ -261,7 +262,7 @@ class AppleTrailersSectionIE(InfoExtractor):
             'title': 'Most Popular',
             'title': 'Most Popular',
             'id': 'mostpopular',
             'id': 'mostpopular',
         },
         },
-        'playlist_mincount': 80,
+        'playlist_mincount': 30,
     }, {
     }, {
         'url': 'http://trailers.apple.com/#section=moviestudios',
         'url': 'http://trailers.apple.com/#section=moviestudios',
         'info_dict': {
         'info_dict': {

+ 2 - 2
youtube_dl/extractor/archiveorg.py

@@ -24,12 +24,12 @@ class ArchiveOrgIE(InfoExtractor):
         }
         }
     }, {
     }, {
         'url': 'https://archive.org/details/Cops1922',
         'url': 'https://archive.org/details/Cops1922',
-        'md5': 'bc73c8ab3838b5a8fc6c6651fa7b58ba',
+        'md5': '0869000b4ce265e8ca62738b336b268a',
         'info_dict': {
         'info_dict': {
             'id': 'Cops1922',
             'id': 'Cops1922',
             'ext': 'mp4',
             'ext': 'mp4',
             'title': 'Buster Keaton\'s "Cops" (1922)',
             'title': 'Buster Keaton\'s "Cops" (1922)',
-            'description': 'md5:b4544662605877edd99df22f9620d858',
+            'description': 'md5:89e7c77bf5d965dd5c0372cfb49470f6',
         }
         }
     }, {
     }, {
         'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
         'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect',

+ 4 - 1
youtube_dl/extractor/arte.py

@@ -180,7 +180,7 @@ class ArteTVBaseIE(InfoExtractor):
 
 
 class ArteTVPlus7IE(ArteTVBaseIE):
 class ArteTVPlus7IE(ArteTVBaseIE):
     IE_NAME = 'arte.tv:+7'
     IE_NAME = 'arte.tv:+7'
-    _VALID_URL = r'https?://(?:(?:www|sites)\.)?arte\.tv/[^/]+/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:(?:www|sites)\.)?arte\.tv/(?:[^/]+/)?(?P<lang>fr|de|en|es)/(?:videos/)?(?:[^/]+/)*(?P<id>[^/?#&]+)'
 
 
     _TESTS = [{
     _TESTS = [{
         'url': 'http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D',
         'url': 'http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D',
@@ -188,6 +188,9 @@ class ArteTVPlus7IE(ArteTVBaseIE):
     }, {
     }, {
         'url': 'http://sites.arte.tv/karambolage/de/video/karambolage-22',
         'url': 'http://sites.arte.tv/karambolage/de/video/karambolage-22',
         'only_matching': True,
         'only_matching': True,
+    }, {
+        'url': 'http://www.arte.tv/de/videos/048696-000-A/der-kluge-bauch-unser-zweites-gehirn',
+        'only_matching': True,
     }]
     }]
 
 
     @classmethod
     @classmethod

+ 1 - 1
youtube_dl/extractor/atresplayer.py

@@ -36,7 +36,7 @@ class AtresPlayerIE(InfoExtractor):
         },
         },
         {
         {
             'url': 'http://www.atresplayer.com/television/especial/videoencuentros/temporada-1/capitulo-112-david-bustamante_2014121600375.html',
             'url': 'http://www.atresplayer.com/television/especial/videoencuentros/temporada-1/capitulo-112-david-bustamante_2014121600375.html',
-            'md5': '0d0e918533bbd4b263f2de4d197d4aac',
+            'md5': '6e52cbb513c405e403dbacb7aacf8747',
             'info_dict': {
             'info_dict': {
                 'id': 'capitulo-112-david-bustamante',
                 'id': 'capitulo-112-david-bustamante',
                 'ext': 'flv',
                 'ext': 'flv',

+ 1 - 1
youtube_dl/extractor/audioboom.py

@@ -16,7 +16,7 @@ class AudioBoomIE(InfoExtractor):
             'title': '3/09/2016 Czaban Hour 3',
             'title': '3/09/2016 Czaban Hour 3',
             'description': 'Guest:   Nate Davis - NFL free agency,   Guest:   Stan Gans',
             'description': 'Guest:   Nate Davis - NFL free agency,   Guest:   Stan Gans',
             'duration': 2245.72,
             'duration': 2245.72,
-            'uploader': 'Steve Czaban',
+            'uploader': 'SB Nation A.M.',
             'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/steveczabanyahoosportsradio',
             'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/steveczabanyahoosportsradio',
         }
         }
     }, {
     }, {

+ 0 - 140
youtube_dl/extractor/azubu.py

@@ -1,140 +0,0 @@
-from __future__ import unicode_literals
-
-import json
-
-from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
-    float_or_none,
-    sanitized_Request,
-)
-
-
-class AzubuIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?azubu\.(?:tv|uol.com.br)/[^/]+#!/play/(?P<id>\d+)'
-    _TESTS = [
-        {
-            'url': 'http://www.azubu.tv/GSL#!/play/15575/2014-hot6-cup-last-big-match-ro8-day-1',
-            'md5': 'a88b42fcf844f29ad6035054bd9ecaf4',
-            'info_dict': {
-                'id': '15575',
-                'ext': 'mp4',
-                'title': '2014 HOT6 CUP LAST BIG MATCH Ro8 Day 1',
-                'description': 'md5:d06bdea27b8cc4388a90ad35b5c66c01',
-                'thumbnail': r're:^https?://.*\.jpe?g',
-                'timestamp': 1417523507.334,
-                'upload_date': '20141202',
-                'duration': 9988.7,
-                'uploader': 'GSL',
-                'uploader_id': 414310,
-                'view_count': int,
-            },
-        },
-        {
-            'url': 'http://www.azubu.tv/FnaticTV#!/play/9344/-fnatic-at-worlds-2014:-toyz---%22i-love-rekkles,-he-has-amazing-mechanics%22-',
-            'md5': 'b72a871fe1d9f70bd7673769cdb3b925',
-            'info_dict': {
-                'id': '9344',
-                'ext': 'mp4',
-                'title': 'Fnatic at Worlds 2014: Toyz - "I love Rekkles, he has amazing mechanics"',
-                'description': 'md5:4a649737b5f6c8b5c5be543e88dc62af',
-                'thumbnail': r're:^https?://.*\.jpe?g',
-                'timestamp': 1410530893.320,
-                'upload_date': '20140912',
-                'duration': 172.385,
-                'uploader': 'FnaticTV',
-                'uploader_id': 272749,
-                'view_count': int,
-            },
-            'skip': 'Channel offline',
-        },
-    ]
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-
-        data = self._download_json(
-            'http://www.azubu.tv/api/video/%s' % video_id, video_id)['data']
-
-        title = data['title'].strip()
-        description = data.get('description')
-        thumbnail = data.get('thumbnail')
-        view_count = data.get('view_count')
-        user = data.get('user', {})
-        uploader = user.get('username')
-        uploader_id = user.get('id')
-
-        stream_params = json.loads(data['stream_params'])
-
-        timestamp = float_or_none(stream_params.get('creationDate'), 1000)
-        duration = float_or_none(stream_params.get('length'), 1000)
-
-        renditions = stream_params.get('renditions') or []
-        video = stream_params.get('FLVFullLength') or stream_params.get('videoFullLength')
-        if video:
-            renditions.append(video)
-
-        if not renditions and not user.get('channel', {}).get('is_live', True):
-            raise ExtractorError('%s said: channel is offline.' % self.IE_NAME, expected=True)
-
-        formats = [{
-            'url': fmt['url'],
-            'width': fmt['frameWidth'],
-            'height': fmt['frameHeight'],
-            'vbr': float_or_none(fmt['encodingRate'], 1000),
-            'filesize': fmt['size'],
-            'vcodec': fmt['videoCodec'],
-            'container': fmt['videoContainer'],
-        } for fmt in renditions if fmt['url']]
-        self._sort_formats(formats)
-
-        return {
-            'id': video_id,
-            'title': title,
-            'description': description,
-            'thumbnail': thumbnail,
-            'timestamp': timestamp,
-            'duration': duration,
-            'uploader': uploader,
-            'uploader_id': uploader_id,
-            'view_count': view_count,
-            'formats': formats,
-        }
-
-
-class AzubuLiveIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?azubu\.(?:tv|uol.com.br)/(?P<id>[^/]+)$'
-
-    _TESTS = [{
-        'url': 'http://www.azubu.tv/MarsTVMDLen',
-        'only_matching': True,
-    }, {
-        'url': 'http://azubu.uol.com.br/adolfz',
-        'only_matching': True,
-    }]
-
-    def _real_extract(self, url):
-        user = self._match_id(url)
-
-        info = self._download_json(
-            'http://api.azubu.tv/public/modules/last-video/{0}/info'.format(user),
-            user)['data']
-        if info['type'] != 'STREAM':
-            raise ExtractorError('{0} is not streaming live'.format(user), expected=True)
-
-        req = sanitized_Request(
-            'https://edge-elb.api.brightcove.com/playback/v1/accounts/3361910549001/videos/ref:' + info['reference_id'])
-        req.add_header('Accept', 'application/json;pk=BCpkADawqM1gvI0oGWg8dxQHlgT8HkdE2LnAlWAZkOlznO39bSZX726u4JqnDsK3MDXcO01JxXK2tZtJbgQChxgaFzEVdHRjaDoxaOu8hHOO8NYhwdxw9BzvgkvLUlpbDNUuDoc4E4wxDToV')
-        bc_info = self._download_json(req, user)
-        m3u8_url = next(source['src'] for source in bc_info['sources'] if source['container'] == 'M2TS')
-        formats = self._extract_m3u8_formats(m3u8_url, user, ext='mp4')
-        self._sort_formats(formats)
-
-        return {
-            'id': info['id'],
-            'title': self._live_title(info['title']),
-            'uploader_id': user,
-            'formats': formats,
-            'is_live': True,
-            'thumbnail': bc_info['poster'],
-        }

+ 4 - 4
youtube_dl/extractor/bandcamp.py

@@ -34,12 +34,12 @@ class BandcampIE(InfoExtractor):
         '_skip': 'There is a limit of 200 free downloads / month for the test song'
         '_skip': 'There is a limit of 200 free downloads / month for the test song'
     }, {
     }, {
         'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
         'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
-        'md5': '73d0b3171568232574e45652f8720b5c',
+        'md5': '0369ace6b939f0927e62c67a1a8d9fa7',
         'info_dict': {
         'info_dict': {
             'id': '2650410135',
             'id': '2650410135',
-            'ext': 'mp3',
-            'title': 'Lanius (Battle)',
-            'uploader': 'Ben Prunty Music',
+            'ext': 'aiff',
+            'title': 'Ben Prunty - Lanius (Battle)',
+            'uploader': 'Ben Prunty',
         },
         },
     }]
     }]
 
 

+ 1 - 1
youtube_dl/extractor/beeg.py

@@ -16,7 +16,7 @@ class BeegIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)'
     _VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)'
     _TEST = {
     _TEST = {
         'url': 'http://beeg.com/5416503',
         'url': 'http://beeg.com/5416503',
-        'md5': '46c384def73b33dbc581262e5ee67cef',
+        'md5': 'a1a1b1a8bc70a89e49ccfd113aed0820',
         'info_dict': {
         'info_dict': {
             'id': '5416503',
             'id': '5416503',
             'ext': 'mp4',
             'ext': 'mp4',

+ 3 - 7
youtube_dl/extractor/bleacherreport.py

@@ -35,7 +35,7 @@ class BleacherReportIE(InfoExtractor):
             'title': 'Aussie Golfers Get Fright of Their Lives After Being Chased by Angry Kangaroo',
             'title': 'Aussie Golfers Get Fright of Their Lives After Being Chased by Angry Kangaroo',
             'timestamp': 1446839961,
             'timestamp': 1446839961,
             'uploader': 'Sean Fay',
             'uploader': 'Sean Fay',
-            'description': 'md5:825e94e0f3521df52fa83b2ed198fa20',
+            'description': 'md5:b1601e2314c4d8eec23b6eafe086a757',
             'uploader_id': 6466954,
             'uploader_id': 6466954,
             'upload_date': '20151011',
             'upload_date': '20151011',
         },
         },
@@ -90,17 +90,13 @@ class BleacherReportCMSIE(AMPIE):
     _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36})'
     _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36})'
     _TESTS = [{
     _TESTS = [{
         'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
         'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
-        'md5': '8c2c12e3af7805152675446c905d159b',
+        'md5': '2e4b0a997f9228ffa31fada5c53d1ed1',
         'info_dict': {
         'info_dict': {
             'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
             'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
-            'ext': 'mp4',
+            'ext': 'flv',
             'title': 'Cena vs. Rollins Would Expose the Heavyweight Division',
             'title': 'Cena vs. Rollins Would Expose the Heavyweight Division',
             'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e',
             'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e',
         },
         },
-        'params': {
-            # m3u8 download
-            'skip_download': True,
-        },
     }]
     }]
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):

+ 1 - 1
youtube_dl/extractor/br.py

@@ -77,7 +77,7 @@ class BRIE(InfoExtractor):
                 'description': 'md5:bb659990e9e59905c3d41e369db1fbe3',
                 'description': 'md5:bb659990e9e59905c3d41e369db1fbe3',
                 'duration': 893,
                 'duration': 893,
                 'uploader': 'Eva Maria Steimle',
                 'uploader': 'Eva Maria Steimle',
-                'upload_date': '20140117',
+                'upload_date': '20170208',
             }
             }
         },
         },
     ]
     ]

+ 20 - 6
youtube_dl/extractor/brightcove.py

@@ -131,6 +131,12 @@ class BrightcoveLegacyIE(InfoExtractor):
             },
             },
             'playlist_mincount': 10,
             'playlist_mincount': 10,
         },
         },
+        {
+            # playerID inferred from bcpid
+            # from http://www.un.org/chinese/News/story.asp?NewsID=27724
+            'url': 'https://link.brightcove.com/services/player/bcpid1722935254001/?bctid=5360463607001&autoStart=false&secureConnections=true&width=650&height=350',
+            'only_matching': True,  # Tested in GenericIE
+        }
     ]
     ]
     FLV_VCODECS = {
     FLV_VCODECS = {
         1: 'SORENSON',
         1: 'SORENSON',
@@ -266,9 +272,13 @@ class BrightcoveLegacyIE(InfoExtractor):
         if matches:
         if matches:
             return list(filter(None, [cls._build_brighcove_url(m) for m in matches]))
             return list(filter(None, [cls._build_brighcove_url(m) for m in matches]))
 
 
-        return list(filter(None, [
-            cls._build_brighcove_url_from_js(custom_bc)
-            for custom_bc in re.findall(r'(customBC\.createVideo\(.+?\);)', webpage)]))
+        matches = re.findall(r'(customBC\.createVideo\(.+?\);)', webpage)
+        if matches:
+            return list(filter(None, [
+                cls._build_brighcove_url_from_js(custom_bc)
+                for custom_bc in matches]))
+        return [src for _, src in re.findall(
+            r'<iframe[^>]+src=([\'"])((?:https?:)?//link\.brightcove\.com/services/player/(?!\1).+)\1', webpage)]
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
         url, smuggled_data = unsmuggle_url(url, {})
         url, smuggled_data = unsmuggle_url(url, {})
@@ -285,6 +295,10 @@ class BrightcoveLegacyIE(InfoExtractor):
         if videoPlayer:
         if videoPlayer:
             # We set the original url as the default 'Referer' header
             # We set the original url as the default 'Referer' header
             referer = smuggled_data.get('Referer', url)
             referer = smuggled_data.get('Referer', url)
+            if 'playerID' not in query:
+                mobj = re.search(r'/bcpid(\d+)', url)
+                if mobj is not None:
+                    query['playerID'] = [mobj.group(1)]
             return self._get_video_info(
             return self._get_video_info(
                 videoPlayer[0], query, referer=referer)
                 videoPlayer[0], query, referer=referer)
         elif 'playerKey' in query:
         elif 'playerKey' in query:
@@ -484,8 +498,8 @@ class BrightcoveNewIE(InfoExtractor):
     }]
     }]
 
 
     @staticmethod
     @staticmethod
-    def _extract_url(webpage):
-        urls = BrightcoveNewIE._extract_urls(webpage)
+    def _extract_url(ie, webpage):
+        urls = BrightcoveNewIE._extract_urls(ie, webpage)
         return urls[0] if urls else None
         return urls[0] if urls else None
 
 
     @staticmethod
     @staticmethod
@@ -508,7 +522,7 @@ class BrightcoveNewIE(InfoExtractor):
         # [2] looks like:
         # [2] looks like:
         for video, script_tag, account_id, player_id, embed in re.findall(
         for video, script_tag, account_id, player_id, embed in re.findall(
                 r'''(?isx)
                 r'''(?isx)
-                    (<video\s+[^>]+>)
+                    (<video\s+[^>]*\bdata-video-id\s*=\s*['"]?[^>]+>)
                     (?:.*?
                     (?:.*?
                         (<script[^>]+
                         (<script[^>]+
                             src=["\'](?:https?:)?//players\.brightcove\.net/
                             src=["\'](?:https?:)?//players\.brightcove\.net/

+ 1 - 4
youtube_dl/extractor/canalc2.py

@@ -16,13 +16,10 @@ class Canalc2IE(InfoExtractor):
         'md5': '060158428b650f896c542dfbb3d6487f',
         'md5': '060158428b650f896c542dfbb3d6487f',
         'info_dict': {
         'info_dict': {
             'id': '12163',
             'id': '12163',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': 'Terrasses du Numérique',
             'title': 'Terrasses du Numérique',
             'duration': 122,
             'duration': 122,
         },
         },
-        'params': {
-            'skip_download': True,  # Requires rtmpdump
-        }
     }, {
     }, {
         'url': 'http://archives-canalc2.u-strasbg.fr/video.asp?idVideo=11427&voir=oui',
         'url': 'http://archives-canalc2.u-strasbg.fr/video.asp?idVideo=11427&voir=oui',
         'only_matching': True,
         'only_matching': True,

+ 3 - 3
youtube_dl/extractor/cbc.py

@@ -96,6 +96,7 @@ class CBCIE(InfoExtractor):
         'info_dict': {
         'info_dict': {
             'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks',
             'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks',
             'id': 'dog-indoor-exercise-winter-1.3928238',
             'id': 'dog-indoor-exercise-winter-1.3928238',
+            'description': 'md5:c18552e41726ee95bd75210d1ca9194c',
         },
         },
         'playlist_mincount': 6,
         'playlist_mincount': 6,
     }]
     }]
@@ -165,12 +166,11 @@ class CBCPlayerIE(InfoExtractor):
             'uploader': 'CBCC-NEW',
             'uploader': 'CBCC-NEW',
         },
         },
     }, {
     }, {
-        # available only when we add `formats=MPEG4,FLV,MP3` to theplatform url
         'url': 'http://www.cbc.ca/player/play/2164402062',
         'url': 'http://www.cbc.ca/player/play/2164402062',
-        'md5': '17a61eb813539abea40618d6323a7f82',
+        'md5': '33fcd8f6719b9dd60a5e73adcb83b9f6',
         'info_dict': {
         'info_dict': {
             'id': '2164402062',
             'id': '2164402062',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': 'Cancer survivor four times over',
             'title': 'Cancer survivor four times over',
             'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.',
             'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.',
             'timestamp': 1320410746,
             'timestamp': 1320410746,

+ 2 - 2
youtube_dl/extractor/cbslocal.py

@@ -60,8 +60,8 @@ class CBSLocalIE(AnvatoIE):
             'title': 'A Very Blue Anniversary',
             'title': 'A Very Blue Anniversary',
             'description': 'CBS2’s Cindy Hsu has more.',
             'description': 'CBS2’s Cindy Hsu has more.',
             'thumbnail': 're:^https?://.*',
             'thumbnail': 're:^https?://.*',
-            'timestamp': 1479962220,
-            'upload_date': '20161124',
+            'timestamp': int,
+            'upload_date': r're:^\d{8}$',
             'uploader': 'CBS',
             'uploader': 'CBS',
             'subtitles': {
             'subtitles': {
                 'en': 'mincount:5',
                 'en': 'mincount:5',

+ 49 - 3
youtube_dl/extractor/cda.py

@@ -9,7 +9,10 @@ from ..utils import (
     ExtractorError,
     ExtractorError,
     float_or_none,
     float_or_none,
     int_or_none,
     int_or_none,
+    multipart_encode,
     parse_duration,
     parse_duration,
+    random_birthday,
+    urljoin,
 )
 )
 
 
 
 
@@ -27,7 +30,8 @@ class CDAIE(InfoExtractor):
             'description': 'md5:269ccd135d550da90d1662651fcb9772',
             'description': 'md5:269ccd135d550da90d1662651fcb9772',
             'thumbnail': r're:^https?://.*\.jpg$',
             'thumbnail': r're:^https?://.*\.jpg$',
             'average_rating': float,
             'average_rating': float,
-            'duration': 39
+            'duration': 39,
+            'age_limit': 0,
         }
         }
     }, {
     }, {
         'url': 'http://www.cda.pl/video/57413289',
         'url': 'http://www.cda.pl/video/57413289',
@@ -41,13 +45,41 @@ class CDAIE(InfoExtractor):
             'uploader': 'crash404',
             'uploader': 'crash404',
             'view_count': int,
             'view_count': int,
             'average_rating': float,
             'average_rating': float,
-            'duration': 137
+            'duration': 137,
+            'age_limit': 0,
         }
         }
+    }, {
+        # Age-restricted
+        'url': 'http://www.cda.pl/video/1273454c4',
+        'info_dict': {
+            'id': '1273454c4',
+            'ext': 'mp4',
+            'title': 'Bronson (2008) napisy HD 1080p',
+            'description': 'md5:1b6cb18508daf2dc4e0fa4db77fec24c',
+            'height': 1080,
+            'uploader': 'boniek61',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'duration': 5554,
+            'age_limit': 18,
+            'view_count': int,
+            'average_rating': float,
+        },
     }, {
     }, {
         'url': 'http://ebd.cda.pl/0x0/5749950c',
         'url': 'http://ebd.cda.pl/0x0/5749950c',
         'only_matching': True,
         'only_matching': True,
     }]
     }]
 
 
+    def _download_age_confirm_page(self, url, video_id, *args, **kwargs):
+        form_data = random_birthday('rok', 'miesiac', 'dzien')
+        form_data.update({'return': url, 'module': 'video', 'module_id': video_id})
+        data, content_type = multipart_encode(form_data)
+        return self._download_webpage(
+            urljoin(url, '/a/validatebirth'), video_id, *args,
+            data=data, headers={
+                'Referer': url,
+                'Content-Type': content_type,
+            }, **kwargs)
+
     def _real_extract(self, url):
     def _real_extract(self, url):
         video_id = self._match_id(url)
         video_id = self._match_id(url)
         self._set_cookie('cda.pl', 'cda.player', 'html5')
         self._set_cookie('cda.pl', 'cda.player', 'html5')
@@ -57,6 +89,13 @@ class CDAIE(InfoExtractor):
         if 'Ten film jest dostępny dla użytkowników premium' in webpage:
         if 'Ten film jest dostępny dla użytkowników premium' in webpage:
             raise ExtractorError('This video is only available for premium users.', expected=True)
             raise ExtractorError('This video is only available for premium users.', expected=True)
 
 
+        need_confirm_age = False
+        if self._html_search_regex(r'(<form[^>]+action="/a/validatebirth")',
+                                   webpage, 'birthday validate form', default=None):
+            webpage = self._download_age_confirm_page(
+                url, video_id, note='Confirming age')
+            need_confirm_age = True
+
         formats = []
         formats = []
 
 
         uploader = self._search_regex(r'''(?x)
         uploader = self._search_regex(r'''(?x)
@@ -81,6 +120,7 @@ class CDAIE(InfoExtractor):
             'thumbnail': self._og_search_thumbnail(webpage),
             'thumbnail': self._og_search_thumbnail(webpage),
             'formats': formats,
             'formats': formats,
             'duration': None,
             'duration': None,
+            'age_limit': 18 if need_confirm_age else 0,
         }
         }
 
 
         def extract_format(page, version):
         def extract_format(page, version):
@@ -121,7 +161,12 @@ class CDAIE(InfoExtractor):
         for href, resolution in re.findall(
         for href, resolution in re.findall(
                 r'<a[^>]+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)',
                 r'<a[^>]+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)',
                 webpage):
                 webpage):
-            webpage = self._download_webpage(
+            if need_confirm_age:
+                handler = self._download_age_confirm_page
+            else:
+                handler = self._download_webpage
+
+            webpage = handler(
                 self._BASE_URL + href, video_id,
                 self._BASE_URL + href, video_id,
                 'Downloading %s version information' % resolution, fatal=False)
                 'Downloading %s version information' % resolution, fatal=False)
             if not webpage:
             if not webpage:
@@ -129,6 +174,7 @@ class CDAIE(InfoExtractor):
                 # invalid version is requested.
                 # invalid version is requested.
                 self.report_warning('Unable to download %s version information' % resolution)
                 self.report_warning('Unable to download %s version information' % resolution)
                 continue
                 continue
+
             extract_format(webpage, resolution)
             extract_format(webpage, resolution)
 
 
         self._sort_formats(formats)
         self._sort_formats(formats)

+ 1 - 1
youtube_dl/extractor/clipfish.py

@@ -12,7 +12,7 @@ class ClipfishIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?clipfish\.de/(?:[^/]+/)+video/(?P<id>[0-9]+)'
     _VALID_URL = r'https?://(?:www\.)?clipfish\.de/(?:[^/]+/)+video/(?P<id>[0-9]+)'
     _TEST = {
     _TEST = {
         'url': 'http://www.clipfish.de/special/ugly-americans/video/4343170/s01-e01-ugly-americans-date-in-der-hoelle/',
         'url': 'http://www.clipfish.de/special/ugly-americans/video/4343170/s01-e01-ugly-americans-date-in-der-hoelle/',
-        'md5': '720563e467b86374c194bdead08d207d',
+        'md5': 'b9a5dc46294154c1193e2d10e0c95693',
         'info_dict': {
         'info_dict': {
             'id': '4343170',
             'id': '4343170',
             'ext': 'mp4',
             'ext': 'mp4',

+ 2 - 1
youtube_dl/extractor/collegerama.py

@@ -21,7 +21,7 @@ class CollegeRamaIE(InfoExtractor):
                 'ext': 'mp4',
                 'ext': 'mp4',
                 'title': 'Een nieuwe wereld: waarden, bewustzijn en techniek van de mensheid 2.0.',
                 'title': 'Een nieuwe wereld: waarden, bewustzijn en techniek van de mensheid 2.0.',
                 'description': '',
                 'description': '',
-                'thumbnail': r're:^https?://.*\.jpg$',
+                'thumbnail': r're:^https?://.*\.jpg(?:\?.*?)?$',
                 'duration': 7713.088,
                 'duration': 7713.088,
                 'timestamp': 1413309600,
                 'timestamp': 1413309600,
                 'upload_date': '20141014',
                 'upload_date': '20141014',
@@ -35,6 +35,7 @@ class CollegeRamaIE(InfoExtractor):
                 'ext': 'wmv',
                 'ext': 'wmv',
                 'title': '64ste Vakantiecursus: Afvalwater',
                 'title': '64ste Vakantiecursus: Afvalwater',
                 'description': 'md5:7fd774865cc69d972f542b157c328305',
                 'description': 'md5:7fd774865cc69d972f542b157c328305',
+                'thumbnail': r're:^https?://.*\.jpg(?:\?.*?)?$',
                 'duration': 10853,
                 'duration': 10853,
                 'timestamp': 1326446400,
                 'timestamp': 1326446400,
                 'upload_date': '20120113',
                 'upload_date': '20120113',

+ 138 - 75
youtube_dl/extractor/common.py

@@ -245,6 +245,10 @@ class InfoExtractor(object):
                     specified in the URL.
                     specified in the URL.
     end_time:       Time in seconds where the reproduction should end, as
     end_time:       Time in seconds where the reproduction should end, as
                     specified in the URL.
                     specified in the URL.
+    chapters:       A list of dictionaries, with the following entries:
+                        * "start_time" - The start time of the chapter in seconds
+                        * "end_time" - The end time of the chapter in seconds
+                        * "title" (optional, string)
 
 
     The following fields should only be used when the video belongs to some logical
     The following fields should only be used when the video belongs to some logical
     chapter or section:
     chapter or section:
@@ -976,6 +980,23 @@ class InfoExtractor(object):
             return info
             return info
         if isinstance(json_ld, dict):
         if isinstance(json_ld, dict):
             json_ld = [json_ld]
             json_ld = [json_ld]
+
+        def extract_video_object(e):
+            assert e['@type'] == 'VideoObject'
+            info.update({
+                'url': e.get('contentUrl'),
+                'title': unescapeHTML(e.get('name')),
+                'description': unescapeHTML(e.get('description')),
+                'thumbnail': e.get('thumbnailUrl') or e.get('thumbnailURL'),
+                'duration': parse_duration(e.get('duration')),
+                'timestamp': unified_timestamp(e.get('uploadDate')),
+                'filesize': float_or_none(e.get('contentSize')),
+                'tbr': int_or_none(e.get('bitrate')),
+                'width': int_or_none(e.get('width')),
+                'height': int_or_none(e.get('height')),
+                'view_count': int_or_none(e.get('interactionCount')),
+            })
+
         for e in json_ld:
         for e in json_ld:
             if e.get('@context') == 'http://schema.org':
             if e.get('@context') == 'http://schema.org':
                 item_type = e.get('@type')
                 item_type = e.get('@type')
@@ -1000,18 +1021,11 @@ class InfoExtractor(object):
                         'description': unescapeHTML(e.get('articleBody')),
                         'description': unescapeHTML(e.get('articleBody')),
                     })
                     })
                 elif item_type == 'VideoObject':
                 elif item_type == 'VideoObject':
-                    info.update({
-                        'url': e.get('contentUrl'),
-                        'title': unescapeHTML(e.get('name')),
-                        'description': unescapeHTML(e.get('description')),
-                        'thumbnail': e.get('thumbnailUrl') or e.get('thumbnailURL'),
-                        'duration': parse_duration(e.get('duration')),
-                        'timestamp': unified_timestamp(e.get('uploadDate')),
-                        'filesize': float_or_none(e.get('contentSize')),
-                        'tbr': int_or_none(e.get('bitrate')),
-                        'width': int_or_none(e.get('width')),
-                        'height': int_or_none(e.get('height')),
-                    })
+                    extract_video_object(e)
+                elif item_type == 'WebPage':
+                    video = e.get('video')
+                    if isinstance(video, dict) and video.get('@type') == 'VideoObject':
+                        extract_video_object(video)
                 break
                 break
         return dict((k, v) for k, v in info.items() if v is not None)
         return dict((k, v) for k, v in info.items() if v is not None)
 
 
@@ -1303,40 +1317,50 @@ class InfoExtractor(object):
                               entry_protocol='m3u8', preference=None,
                               entry_protocol='m3u8', preference=None,
                               m3u8_id=None, note=None, errnote=None,
                               m3u8_id=None, note=None, errnote=None,
                               fatal=True, live=False):
                               fatal=True, live=False):
-
         res = self._download_webpage_handle(
         res = self._download_webpage_handle(
             m3u8_url, video_id,
             m3u8_url, video_id,
             note=note or 'Downloading m3u8 information',
             note=note or 'Downloading m3u8 information',
             errnote=errnote or 'Failed to download m3u8 information',
             errnote=errnote or 'Failed to download m3u8 information',
             fatal=fatal)
             fatal=fatal)
+
         if res is False:
         if res is False:
             return []
             return []
+
         m3u8_doc, urlh = res
         m3u8_doc, urlh = res
         m3u8_url = urlh.geturl()
         m3u8_url = urlh.geturl()
 
 
+        return self._parse_m3u8_formats(
+            m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol,
+            preference=preference, m3u8_id=m3u8_id, live=live)
+
+    def _parse_m3u8_formats(self, m3u8_doc, m3u8_url, ext=None,
+                            entry_protocol='m3u8', preference=None,
+                            m3u8_id=None, live=False):
         if '#EXT-X-FAXS-CM:' in m3u8_doc:  # Adobe Flash Access
         if '#EXT-X-FAXS-CM:' in m3u8_doc:  # Adobe Flash Access
             return []
             return []
 
 
-        formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)]
+        formats = []
 
 
         format_url = lambda u: (
         format_url = lambda u: (
             u
             u
             if re.match(r'^https?://', u)
             if re.match(r'^https?://', u)
             else compat_urlparse.urljoin(m3u8_url, u))
             else compat_urlparse.urljoin(m3u8_url, u))
 
 
-        # We should try extracting formats only from master playlists [1], i.e.
-        # playlists that describe available qualities. On the other hand media
-        # playlists [2] should be returned as is since they contain just the media
-        # without qualities renditions.
+        # References:
+        # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-21
+        # 2. https://github.com/rg3/youtube-dl/issues/12211
+
+        # We should try extracting formats only from master playlists [1, 4.3.4],
+        # i.e. playlists that describe available qualities. On the other hand
+        # media playlists [1, 4.3.3] should be returned as is since they contain
+        # just the media without qualities renditions.
         # Fortunately, master playlist can be easily distinguished from media
         # Fortunately, master playlist can be easily distinguished from media
-        # playlist based on particular tags availability. As of [1, 2] master
-        # playlist tags MUST NOT appear in a media playist and vice versa.
-        # As of [3] #EXT-X-TARGETDURATION tag is REQUIRED for every media playlist
-        # and MUST NOT appear in master playlist thus we can clearly detect media
-        # playlist with this criterion.
-        # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.4
-        # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3
-        # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.1
+        # playlist based on particular tags availability. As of [1, 4.3.3, 4.3.4]
+        # master playlist tags MUST NOT appear in a media playist and vice versa.
+        # As of [1, 4.3.3.1] #EXT-X-TARGETDURATION tag is REQUIRED for every
+        # media playlist and MUST NOT appear in master playlist thus we can
+        # clearly detect media playlist with this criterion.
+
         if '#EXT-X-TARGETDURATION' in m3u8_doc:  # media playlist, return as is
         if '#EXT-X-TARGETDURATION' in m3u8_doc:  # media playlist, return as is
             return [{
             return [{
                 'url': m3u8_url,
                 'url': m3u8_url,
@@ -1345,52 +1369,72 @@ class InfoExtractor(object):
                 'protocol': entry_protocol,
                 'protocol': entry_protocol,
                 'preference': preference,
                 'preference': preference,
             }]
             }]
-        audio_in_video_stream = {}
-        last_info = {}
-        last_media = {}
+
+        groups = {}
+        last_stream_inf = {}
+
+        def extract_media(x_media_line):
+            media = parse_m3u8_attributes(x_media_line)
+            # As per [1, 4.3.4.1] TYPE, GROUP-ID and NAME are REQUIRED
+            media_type, group_id, name = media.get('TYPE'), media.get('GROUP-ID'), media.get('NAME')
+            if not (media_type and group_id and name):
+                return
+            groups.setdefault(group_id, []).append(media)
+            if media_type not in ('VIDEO', 'AUDIO'):
+                return
+            media_url = media.get('URI')
+            if media_url:
+                format_id = []
+                for v in (group_id, name):
+                    if v:
+                        format_id.append(v)
+                f = {
+                    'format_id': '-'.join(format_id),
+                    'url': format_url(media_url),
+                    'manifest_url': m3u8_url,
+                    'language': media.get('LANGUAGE'),
+                    'ext': ext,
+                    'protocol': entry_protocol,
+                    'preference': preference,
+                }
+                if media_type == 'AUDIO':
+                    f['vcodec'] = 'none'
+                formats.append(f)
+
+        def build_stream_name():
+            # Despite specification does not mention NAME attribute for
+            # EXT-X-STREAM-INF tag it still sometimes may be present (see [1]
+            # or vidio test in TestInfoExtractor.test_parse_m3u8_formats)
+            # 1. http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015
+            stream_name = last_stream_inf.get('NAME')
+            if stream_name:
+                return stream_name
+            # If there is no NAME in EXT-X-STREAM-INF it will be obtained
+            # from corresponding rendition group
+            stream_group_id = last_stream_inf.get('VIDEO')
+            if not stream_group_id:
+                return
+            stream_group = groups.get(stream_group_id)
+            if not stream_group:
+                return stream_group_id
+            rendition = stream_group[0]
+            return rendition.get('NAME') or stream_group_id
+
         for line in m3u8_doc.splitlines():
         for line in m3u8_doc.splitlines():
             if line.startswith('#EXT-X-STREAM-INF:'):
             if line.startswith('#EXT-X-STREAM-INF:'):
-                last_info = parse_m3u8_attributes(line)
+                last_stream_inf = parse_m3u8_attributes(line)
             elif line.startswith('#EXT-X-MEDIA:'):
             elif line.startswith('#EXT-X-MEDIA:'):
-                media = parse_m3u8_attributes(line)
-                media_type = media.get('TYPE')
-                if media_type in ('VIDEO', 'AUDIO'):
-                    group_id = media.get('GROUP-ID')
-                    media_url = media.get('URI')
-                    if media_url:
-                        format_id = []
-                        for v in (group_id, media.get('NAME')):
-                            if v:
-                                format_id.append(v)
-                        f = {
-                            'format_id': '-'.join(format_id),
-                            'url': format_url(media_url),
-                            'language': media.get('LANGUAGE'),
-                            'ext': ext,
-                            'protocol': entry_protocol,
-                            'preference': preference,
-                        }
-                        if media_type == 'AUDIO':
-                            f['vcodec'] = 'none'
-                            if group_id and not audio_in_video_stream.get(group_id):
-                                audio_in_video_stream[group_id] = False
-                        formats.append(f)
-                    else:
-                        # When there is no URI in EXT-X-MEDIA let this tag's
-                        # data be used by regular URI lines below
-                        last_media = media
-                        if media_type == 'AUDIO' and group_id:
-                            audio_in_video_stream[group_id] = True
+                extract_media(line)
             elif line.startswith('#') or not line.strip():
             elif line.startswith('#') or not line.strip():
                 continue
                 continue
             else:
             else:
-                tbr = int_or_none(last_info.get('AVERAGE-BANDWIDTH') or last_info.get('BANDWIDTH'), scale=1000)
+                tbr = float_or_none(
+                    last_stream_inf.get('AVERAGE-BANDWIDTH') or
+                    last_stream_inf.get('BANDWIDTH'), scale=1000)
                 format_id = []
                 format_id = []
                 if m3u8_id:
                 if m3u8_id:
                     format_id.append(m3u8_id)
                     format_id.append(m3u8_id)
-                # Despite specification does not mention NAME attribute for
-                # EXT-X-STREAM-INF it still sometimes may be present
-                stream_name = last_info.get('NAME') or last_media.get('NAME')
+                stream_name = build_stream_name()
                 # Bandwidth of live streams may differ over time thus making
                 # Bandwidth of live streams may differ over time thus making
                 # format_id unpredictable. So it's better to keep provided
                 # format_id unpredictable. So it's better to keep provided
                 # format_id intact.
                 # format_id intact.
@@ -1400,14 +1444,14 @@ class InfoExtractor(object):
                 f = {
                 f = {
                     'format_id': '-'.join(format_id),
                     'format_id': '-'.join(format_id),
                     'url': manifest_url,
                     'url': manifest_url,
-                    'manifest_url': manifest_url,
+                    'manifest_url': m3u8_url,
                     'tbr': tbr,
                     'tbr': tbr,
                     'ext': ext,
                     'ext': ext,
-                    'fps': float_or_none(last_info.get('FRAME-RATE')),
+                    'fps': float_or_none(last_stream_inf.get('FRAME-RATE')),
                     'protocol': entry_protocol,
                     'protocol': entry_protocol,
                     'preference': preference,
                     'preference': preference,
                 }
                 }
-                resolution = last_info.get('RESOLUTION')
+                resolution = last_stream_inf.get('RESOLUTION')
                 if resolution:
                 if resolution:
                     mobj = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', resolution)
                     mobj = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', resolution)
                     if mobj:
                     if mobj:
@@ -1423,13 +1467,26 @@ class InfoExtractor(object):
                         'vbr': vbr,
                         'vbr': vbr,
                         'abr': abr,
                         'abr': abr,
                     })
                     })
-                f.update(parse_codecs(last_info.get('CODECS')))
-                if audio_in_video_stream.get(last_info.get('AUDIO')) is False and f['vcodec'] != 'none':
-                    # TODO: update acodec for audio only formats with the same GROUP-ID
-                    f['acodec'] = 'none'
+                codecs = parse_codecs(last_stream_inf.get('CODECS'))
+                f.update(codecs)
+                audio_group_id = last_stream_inf.get('AUDIO')
+                # As per [1, 4.3.4.1.1] any EXT-X-STREAM-INF tag which
+                # references a rendition group MUST have a CODECS attribute.
+                # However, this is not always respected, for example, [2]
+                # contains EXT-X-STREAM-INF tag which references AUDIO
+                # rendition group but does not have CODECS and despite
+                # referencing audio group an audio group, it represents
+                # a complete (with audio and video) format. So, for such cases
+                # we will ignore references to rendition groups and treat them
+                # as complete formats.
+                if audio_group_id and codecs and f.get('vcodec') != 'none':
+                    audio_group = groups.get(audio_group_id)
+                    if audio_group and audio_group[0].get('URI'):
+                        # TODO: update acodec for audio only formats with
+                        # the same GROUP-ID
+                        f['acodec'] = 'none'
                 formats.append(f)
                 formats.append(f)
-                last_info = {}
-                last_media = {}
+                last_stream_inf = {}
         return formats
         return formats
 
 
     @staticmethod
     @staticmethod
@@ -1803,7 +1860,7 @@ class InfoExtractor(object):
                             'ext': mimetype2ext(mime_type),
                             'ext': mimetype2ext(mime_type),
                             'width': int_or_none(representation_attrib.get('width')),
                             'width': int_or_none(representation_attrib.get('width')),
                             'height': int_or_none(representation_attrib.get('height')),
                             'height': int_or_none(representation_attrib.get('height')),
-                            'tbr': int_or_none(bandwidth, 1000),
+                            'tbr': float_or_none(bandwidth, 1000),
                             'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
                             'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
                             'fps': int_or_none(representation_attrib.get('frameRate')),
                             'fps': int_or_none(representation_attrib.get('frameRate')),
                             'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
                             'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
@@ -2182,7 +2239,7 @@ class InfoExtractor(object):
 
 
     def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
     def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
         mobj = re.search(
         mobj = re.search(
-            r'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P<options>[^)]+)\)',
+            r'(?s)jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)(?!</script>).*?\.setup\s*\((?P<options>[^)]+)\)',
             webpage)
             webpage)
         if mobj:
         if mobj:
             try:
             try:
@@ -2258,11 +2315,17 @@ class InfoExtractor(object):
 
 
     def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None,
     def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None,
                                 m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
                                 m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
+        urls = []
         formats = []
         formats = []
         for source in jwplayer_sources_data:
         for source in jwplayer_sources_data:
-            source_url = self._proto_relative_url(source['file'])
+            source_url = self._proto_relative_url(source.get('file'))
+            if not source_url:
+                continue
             if base_url:
             if base_url:
                 source_url = compat_urlparse.urljoin(base_url, source_url)
                 source_url = compat_urlparse.urljoin(base_url, source_url)
+            if source_url in urls:
+                continue
+            urls.append(source_url)
             source_type = source.get('type') or ''
             source_type = source.get('type') or ''
             ext = mimetype2ext(source_type) or determine_ext(source_url)
             ext = mimetype2ext(source_type) or determine_ext(source_url)
             if source_type == 'hls' or ext == 'm3u8':
             if source_type == 'hls' or ext == 'm3u8':

+ 1 - 4
youtube_dl/extractor/coub.py

@@ -24,12 +24,11 @@ class CoubIE(InfoExtractor):
             'duration': 4.6,
             'duration': 4.6,
             'timestamp': 1428527772,
             'timestamp': 1428527772,
             'upload_date': '20150408',
             'upload_date': '20150408',
-            'uploader': 'Артём Лоскутников',
+            'uploader': 'Artyom Loskutnikov',
             'uploader_id': 'artyom.loskutnikov',
             'uploader_id': 'artyom.loskutnikov',
             'view_count': int,
             'view_count': int,
             'like_count': int,
             'like_count': int,
             'repost_count': int,
             'repost_count': int,
-            'comment_count': int,
             'age_limit': 0,
             'age_limit': 0,
         },
         },
     }, {
     }, {
@@ -118,7 +117,6 @@ class CoubIE(InfoExtractor):
         view_count = int_or_none(coub.get('views_count') or coub.get('views_increase_count'))
         view_count = int_or_none(coub.get('views_count') or coub.get('views_increase_count'))
         like_count = int_or_none(coub.get('likes_count'))
         like_count = int_or_none(coub.get('likes_count'))
         repost_count = int_or_none(coub.get('recoubs_count'))
         repost_count = int_or_none(coub.get('recoubs_count'))
-        comment_count = int_or_none(coub.get('comments_count'))
 
 
         age_restricted = coub.get('age_restricted', coub.get('age_restricted_by_admin'))
         age_restricted = coub.get('age_restricted', coub.get('age_restricted_by_admin'))
         if age_restricted is not None:
         if age_restricted is not None:
@@ -137,7 +135,6 @@ class CoubIE(InfoExtractor):
             'view_count': view_count,
             'view_count': view_count,
             'like_count': like_count,
             'like_count': like_count,
             'repost_count': repost_count,
             'repost_count': repost_count,
-            'comment_count': comment_count,
             'age_limit': age_limit,
             'age_limit': age_limit,
             'formats': formats,
             'formats': formats,
         }
         }

+ 2 - 2
youtube_dl/extractor/crunchyroll.py

@@ -171,7 +171,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
         'info_dict': {
         'info_dict': {
             'id': '727589',
             'id': '727589',
             'ext': 'mp4',
             'ext': 'mp4',
-            'title': "KONOSUBA -God's blessing on this wonderful world! 2 Episode 1 – Give Me Deliverance from this Judicial Injustice!",
+            'title': "KONOSUBA -God's blessing on this wonderful world! 2 Episode 1 – Give Me Deliverance From This Judicial Injustice!",
             'description': 'md5:cbcf05e528124b0f3a0a419fc805ea7d',
             'description': 'md5:cbcf05e528124b0f3a0a419fc805ea7d',
             'thumbnail': r're:^https?://.*\.jpg$',
             'thumbnail': r're:^https?://.*\.jpg$',
             'uploader': 'Kadokawa Pictures Inc.',
             'uploader': 'Kadokawa Pictures Inc.',
@@ -179,7 +179,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
             'series': "KONOSUBA -God's blessing on this wonderful world!",
             'series': "KONOSUBA -God's blessing on this wonderful world!",
             'season': "KONOSUBA -God's blessing on this wonderful world! 2",
             'season': "KONOSUBA -God's blessing on this wonderful world! 2",
             'season_number': 2,
             'season_number': 2,
-            'episode': 'Give Me Deliverance from this Judicial Injustice!',
+            'episode': 'Give Me Deliverance From This Judicial Injustice!',
             'episode_number': 1,
             'episode_number': 1,
         },
         },
         'params': {
         'params': {

+ 20 - 1
youtube_dl/extractor/dailymotion.py

@@ -50,6 +50,24 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
     ]
     ]
 
 
     _TESTS = [
     _TESTS = [
+        {
+            'url': 'http://www.dailymotion.com/video/x5kesuj_office-christmas-party-review-jason-bateman-olivia-munn-t-j-miller_news',
+            'md5': '074b95bdee76b9e3654137aee9c79dfe',
+            'info_dict': {
+                'id': 'x5kesuj',
+                'ext': 'mp4',
+                'title': 'Office Christmas Party Review –  Jason Bateman, Olivia Munn, T.J. Miller',
+                'description': 'Office Christmas Party Review -  Jason Bateman, Olivia Munn, T.J. Miller',
+                'thumbnail': r're:^https?:.*\.(?:jpg|png)$',
+                'duration': 187,
+                'timestamp': 1493651285,
+                'upload_date': '20170501',
+                'uploader': 'Deadline',
+                'uploader_id': 'x1xm8ri',
+                'age_limit': 0,
+                'view_count': int,
+            },
+        },
         {
         {
             'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
             'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
             'md5': '2137c41a8e78554bb09225b8eb322406',
             'md5': '2137c41a8e78554bb09225b8eb322406',
@@ -66,7 +84,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
                 'uploader_id': 'xijv66',
                 'uploader_id': 'xijv66',
                 'age_limit': 0,
                 'age_limit': 0,
                 'view_count': int,
                 'view_count': int,
-            }
+            },
+            'skip': 'video gone',
         },
         },
         # Vevo video
         # Vevo video
         {
         {

+ 2 - 1
youtube_dl/extractor/democracynow.py

@@ -21,7 +21,8 @@ class DemocracynowIE(InfoExtractor):
         'info_dict': {
         'info_dict': {
             'id': '2015-0703-001',
             'id': '2015-0703-001',
             'ext': 'mp4',
             'ext': 'mp4',
-            'title': 'Daily Show',
+            'title': 'Daily Show for July 03, 2015',
+            'description': 'md5:80eb927244d6749900de6072c7cc2c86',
         },
         },
     }, {
     }, {
         'url': 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree',
         'url': 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree',

+ 1 - 1
youtube_dl/extractor/dotsub.py

@@ -35,7 +35,7 @@ class DotsubIE(InfoExtractor):
             'thumbnail': 're:^https?://dotsub.com/media/747bcf58-bd59-45b7-8c8c-ac312d084ee6/p',
             'thumbnail': 're:^https?://dotsub.com/media/747bcf58-bd59-45b7-8c8c-ac312d084ee6/p',
             'duration': 290,
             'duration': 290,
             'timestamp': 1476767794.2809999,
             'timestamp': 1476767794.2809999,
-            'upload_date': '20160525',
+            'upload_date': '20161018',
             'uploader': 'parthivi001',
             'uploader': 'parthivi001',
             'uploader_id': 'user52596202',
             'uploader_id': 'user52596202',
             'view_count': int,
             'view_count': int,

+ 2 - 2
youtube_dl/extractor/douyutv.py

@@ -20,7 +20,7 @@ class DouyuTVIE(InfoExtractor):
             'id': '17732',
             'id': '17732',
             'display_id': 'iseven',
             'display_id': 'iseven',
             'ext': 'flv',
             'ext': 'flv',
-            'title': 're:^清晨醒脑!T-ARA根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+            'title': 're:^清晨醒脑!根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
             'description': r're:.*m7show@163\.com.*',
             'description': r're:.*m7show@163\.com.*',
             'thumbnail': r're:^https?://.*\.jpg$',
             'thumbnail': r're:^https?://.*\.jpg$',
             'uploader': '7师傅',
             'uploader': '7师傅',
@@ -51,7 +51,7 @@ class DouyuTVIE(InfoExtractor):
             'id': '17732',
             'id': '17732',
             'display_id': '17732',
             'display_id': '17732',
             'ext': 'flv',
             'ext': 'flv',
-            'title': 're:^清晨醒脑!T-ARA根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+            'title': 're:^清晨醒脑!根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
             'description': r're:.*m7show@163\.com.*',
             'description': r're:.*m7show@163\.com.*',
             'thumbnail': r're:^https?://.*\.jpg$',
             'thumbnail': r're:^https?://.*\.jpg$',
             'uploader': '7师傅',
             'uploader': '7师傅',

+ 8 - 2
youtube_dl/extractor/extractors.py

@@ -41,6 +41,7 @@ from .alphaporno import AlphaPornoIE
 from .amcnetworks import AMCNetworksIE
 from .amcnetworks import AMCNetworksIE
 from .animeondemand import AnimeOnDemandIE
 from .animeondemand import AnimeOnDemandIE
 from .anitube import AnitubeIE
 from .anitube import AnitubeIE
+from .anvato import AnvatoIE
 from .anysex import AnySexIE
 from .anysex import AnySexIE
 from .aol import AolIE
 from .aol import AolIE
 from .allocine import AllocineIE
 from .allocine import AllocineIE
@@ -87,7 +88,6 @@ from .azmedien import (
     AZMedienPlaylistIE,
     AZMedienPlaylistIE,
     AZMedienShowPlaylistIE,
     AZMedienShowPlaylistIE,
 )
 )
-from .azubu import AzubuIE, AzubuLiveIE
 from .baidu import BaiduVideoIE
 from .baidu import BaiduVideoIE
 from .bambuser import BambuserIE, BambuserChannelIE
 from .bambuser import BambuserIE, BambuserChannelIE
 from .bandcamp import BandcampIE, BandcampAlbumIE
 from .bandcamp import BandcampIE, BandcampAlbumIE
@@ -663,6 +663,7 @@ from .nintendo import NintendoIE
 from .njpwworld import NJPWWorldIE
 from .njpwworld import NJPWWorldIE
 from .nobelprize import NobelPrizeIE
 from .nobelprize import NobelPrizeIE
 from .noco import NocoIE
 from .noco import NocoIE
+from .noovo import NoovoIE
 from .normalboots import NormalbootsIE
 from .normalboots import NormalbootsIE
 from .nosvideo import NosVideoIE
 from .nosvideo import NosVideoIE
 from .nova import NovaIE
 from .nova import NovaIE
@@ -939,6 +940,7 @@ from .srmediathek import SRMediathekIE
 from .stanfordoc import StanfordOpenClassroomIE
 from .stanfordoc import StanfordOpenClassroomIE
 from .steam import SteamIE
 from .steam import SteamIE
 from .streamable import StreamableIE
 from .streamable import StreamableIE
+from .streamango import StreamangoIE
 from .streamcloud import StreamcloudIE
 from .streamcloud import StreamcloudIE
 from .streamcz import StreamCZIE
 from .streamcz import StreamCZIE
 from .streetvoice import StreetVoiceIE
 from .streetvoice import StreetVoiceIE
@@ -1233,7 +1235,10 @@ from .wrzuta import (
     WrzutaIE,
     WrzutaIE,
     WrzutaPlaylistIE,
     WrzutaPlaylistIE,
 )
 )
-from .wsj import WSJIE
+from .wsj import (
+    WSJIE,
+    WSJArticleIE,
+)
 from .xbef import XBefIE
 from .xbef import XBefIE
 from .xboxclips import XboxClipsIE
 from .xboxclips import XboxClipsIE
 from .xfileshare import XFileShareIE
 from .xfileshare import XFileShareIE
@@ -1295,5 +1300,6 @@ from .youtube import (
     YoutubeWatchLaterIE,
     YoutubeWatchLaterIE,
 )
 )
 from .zapiks import ZapiksIE
 from .zapiks import ZapiksIE
+from .zaq1 import Zaq1IE
 from .zdf import ZDFIE, ZDFChannelIE
 from .zdf import ZDFIE, ZDFChannelIE
 from .zingmp3 import ZingMp3IE
 from .zingmp3 import ZingMp3IE

+ 5 - 4
youtube_dl/extractor/foxsports.py

@@ -11,10 +11,10 @@ class FoxSportsIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?foxsports\.com/(?:[^/]+/)*(?P<id>[^/]+)'
     _VALID_URL = r'https?://(?:www\.)?foxsports\.com/(?:[^/]+/)*(?P<id>[^/]+)'
 
 
     _TEST = {
     _TEST = {
-        'url': 'http://www.foxsports.com/video?vid=432609859715',
+        'url': 'http://www.foxsports.com/tennessee/video/432609859715',
         'md5': 'b49050e955bebe32c301972e4012ac17',
         'md5': 'b49050e955bebe32c301972e4012ac17',
         'info_dict': {
         'info_dict': {
-            'id': 'i0qKWsk3qJaM',
+            'id': 'bwduI3X_TgUB',
             'ext': 'mp4',
             'ext': 'mp4',
             'title': 'Courtney Lee on going up 2-0 in series vs. Blazers',
             'title': 'Courtney Lee on going up 2-0 in series vs. Blazers',
             'description': 'Courtney Lee talks about Memphis being focused.',
             'description': 'Courtney Lee talks about Memphis being focused.',
@@ -31,8 +31,9 @@ class FoxSportsIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
         webpage = self._download_webpage(url, video_id)
 
 
         config = self._parse_json(
         config = self._parse_json(
-            self._search_regex(
-                r"data-player-config='([^']+)'", webpage, 'data player config'),
+            self._html_search_regex(
+                r"""class="[^"]*(?:fs-player|platformPlayer-wrapper)[^"]*".+?data-player-config='([^']+)'""",
+                webpage, 'data player config'),
             video_id)
             video_id)
 
 
         return self.url_result(smuggle_url(update_url_query(
         return self.url_result(smuggle_url(update_url_query(

+ 1 - 2
youtube_dl/extractor/funnyordie.py

@@ -58,8 +58,7 @@ class FunnyOrDieIE(InfoExtractor):
             m3u8_url, video_id, 'mp4', 'm3u8_native',
             m3u8_url, video_id, 'mp4', 'm3u8_native',
             m3u8_id='hls', fatal=False)
             m3u8_id='hls', fatal=False)
         source_formats = list(filter(
         source_formats = list(filter(
-            lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
-            m3u8_formats))
+            lambda f: f.get('vcodec') != 'none', m3u8_formats))
 
 
         bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)(?=[,/])', m3u8_url)]
         bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)(?=[,/])', m3u8_url)]
         bitrates.sort()
         bitrates.sort()

+ 1 - 2
youtube_dl/extractor/gamespot.py

@@ -78,8 +78,7 @@ class GameSpotIE(OnceIE):
                     if m3u8_formats:
                     if m3u8_formats:
                         self._sort_formats(m3u8_formats)
                         self._sort_formats(m3u8_formats)
                         m3u8_formats = list(filter(
                         m3u8_formats = list(filter(
-                            lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
-                            m3u8_formats))
+                            lambda f: f.get('vcodec') != 'none', m3u8_formats))
                     if len(qualities) == len(m3u8_formats):
                     if len(qualities) == len(m3u8_formats):
                         for q, m3u8_format in zip(qualities, m3u8_formats):
                         for q, m3u8_format in zip(qualities, m3u8_formats):
                             f = m3u8_format.copy()
                             f = m3u8_format.copy()

+ 85 - 1
youtube_dl/extractor/generic.py

@@ -85,6 +85,9 @@ from .ustream import UstreamIE
 from .openload import OpenloadIE
 from .openload import OpenloadIE
 from .videopress import VideoPressIE
 from .videopress import VideoPressIE
 from .rutube import RutubeIE
 from .rutube import RutubeIE
+from .limelight import LimelightBaseIE
+from .anvato import AnvatoIE
+from .washingtonpost import WashingtonPostIE
 
 
 
 
 class GenericIE(InfoExtractor):
 class GenericIE(InfoExtractor):
@@ -429,6 +432,22 @@ class GenericIE(InfoExtractor):
                 'skip_download': True,  # m3u8 download
                 'skip_download': True,  # m3u8 download
             },
             },
         },
         },
+        {
+            # Brightcove video in <iframe>
+            'url': 'http://www.un.org/chinese/News/story.asp?NewsID=27724',
+            'md5': '36d74ef5e37c8b4a2ce92880d208b968',
+            'info_dict': {
+                'id': '5360463607001',
+                'ext': 'mp4',
+                'title': '叙利亚失明儿童在废墟上演唱《心跳》  呼吁获得正常童年生活',
+                'description': '联合国儿童基金会中东和北非区域大使、作曲家扎德·迪拉尼(Zade Dirani)在3月15日叙利亚冲突爆发7周年纪念日之际发布了为叙利亚谱写的歌曲《心跳》(HEARTBEAT),为受到六年冲突影响的叙利亚儿童发出强烈呐喊,呼吁世界做出共同努力,使叙利亚儿童重新获得享有正常童年生活的权利。',
+                'uploader': 'United Nations',
+                'uploader_id': '1362235914001',
+                'timestamp': 1489593889,
+                'upload_date': '20170315',
+            },
+            'add_ie': ['BrightcoveLegacy'],
+        },
         {
         {
             # Brightcove with alternative playerID key
             # Brightcove with alternative playerID key
             'url': 'http://www.nature.com/nmeth/journal/v9/n7/fig_tab/nmeth.2062_SV1.html',
             'url': 'http://www.nature.com/nmeth/journal/v9/n7/fig_tab/nmeth.2062_SV1.html',
@@ -1410,6 +1429,22 @@ class GenericIE(InfoExtractor):
                 'skip_download': True,
                 'skip_download': True,
             },
             },
         },
         },
+        {
+            # Brightcove embed with whitespace around attribute names
+            'url': 'http://www.stack.com/video/3167554373001/learn-to-hit-open-three-pointers-with-damian-lillard-s-baseline-drift-drill',
+            'info_dict': {
+                'id': '3167554373001',
+                'ext': 'mp4',
+                'title': "Learn to Hit Open Three-Pointers With Damian Lillard's Baseline Drift Drill",
+                'description': 'md5:57bacb0e0f29349de4972bfda3191713',
+                'uploader_id': '1079349493',
+                'upload_date': '20140207',
+                'timestamp': 1391810548,
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
         # Another form of arte.tv embed
         # Another form of arte.tv embed
         {
         {
             'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html',
             'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html',
@@ -1651,6 +1686,38 @@ class GenericIE(InfoExtractor):
             },
             },
             'add_ie': [SenateISVPIE.ie_key()],
             'add_ie': [SenateISVPIE.ie_key()],
         },
         },
+        {
+            # Limelight embeds (1 channel embed + 4 media embeds)
+            'url': 'http://www.sedona.com/FacilitatorTraining2017',
+            'info_dict': {
+                'id': 'FacilitatorTraining2017',
+                'title': 'Facilitator Training 2017',
+            },
+            'playlist_mincount': 5,
+        },
+        {
+            'url': 'http://kron4.com/2017/04/28/standoff-with-walnut-creek-murder-suspect-ends-with-arrest/',
+            'info_dict': {
+                'id': 'standoff-with-walnut-creek-murder-suspect-ends-with-arrest',
+                'title': 'Standoff with Walnut Creek murder suspect ends',
+                'description': 'md5:3ccc48a60fc9441eeccfc9c469ebf788',
+            },
+            'playlist_mincount': 4,
+        },
+        {
+            # WashingtonPost embed
+            'url': 'http://www.vanityfair.com/hollywood/2017/04/donald-trump-tv-pitches',
+            'info_dict': {
+                'id': '8caf6e88-d0ec-11e5-90d3-34c2c42653ac',
+                'ext': 'mp4',
+                'title': "No one has seen the drama series based on Trump's life \u2014 until now",
+                'description': 'Donald Trump wanted a weekly TV drama based on his life. It never aired. But The Washington Post recently obtained a scene from the pilot script — and enlisted actors.',
+                'timestamp': 1455216756,
+                'uploader': 'The Washington Post',
+                'upload_date': '20160211',
+            },
+            'add_ie': [WashingtonPostIE.ie_key()],
+        },
         # {
         # {
         #     # TODO: find another test
         #     # TODO: find another test
         #     # http://schema.org/VideoObject
         #     # http://schema.org/VideoObject
@@ -1693,7 +1760,7 @@ class GenericIE(InfoExtractor):
                 continue
                 continue
 
 
             entries.append({
             entries.append({
-                '_type': 'url',
+                '_type': 'url_transparent',
                 'url': next_url,
                 'url': next_url,
                 'title': it.find('title').text,
                 'title': it.find('title').text,
             })
             })
@@ -2483,6 +2550,11 @@ class GenericIE(InfoExtractor):
             return self.url_result(piksel_url, PikselIE.ie_key())
             return self.url_result(piksel_url, PikselIE.ie_key())
 
 
         # Look for Limelight embeds
         # Look for Limelight embeds
+        limelight_urls = LimelightBaseIE._extract_urls(webpage, url)
+        if limelight_urls:
+            return self.playlist_result(
+                limelight_urls, video_id, video_title, video_description)
+
         mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
         mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
         if mobj:
         if mobj:
             lm = {
             lm = {
@@ -2506,6 +2578,12 @@ class GenericIE(InfoExtractor):
                 'limelight:media:%s' % mobj.group('id'),
                 'limelight:media:%s' % mobj.group('id'),
                 {'source_url': url}), 'LimelightMedia', mobj.group('id'))
                 {'source_url': url}), 'LimelightMedia', mobj.group('id'))
 
 
+        # Look for Anvato embeds
+        anvato_urls = AnvatoIE._extract_urls(self, webpage, video_id)
+        if anvato_urls:
+            return self.playlist_result(
+                anvato_urls, video_id, video_title, video_description)
+
         # Look for AdobeTVVideo embeds
         # Look for AdobeTVVideo embeds
         mobj = re.search(
         mobj = re.search(
             r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
             r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
@@ -2623,6 +2701,12 @@ class GenericIE(InfoExtractor):
             return self.playlist_from_matches(
             return self.playlist_from_matches(
                 rutube_urls, ie=RutubeIE.ie_key())
                 rutube_urls, ie=RutubeIE.ie_key())
 
 
+        # Look for WashingtonPost embeds
+        wapo_urls = WashingtonPostIE._extract_urls(webpage)
+        if wapo_urls:
+            return self.playlist_from_matches(
+                wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key())
+
         # Looking for http://schema.org/VideoObject
         # Looking for http://schema.org/VideoObject
         json_ld = self._search_json_ld(
         json_ld = self._search_json_ld(
             webpage, video_id, default={}, expected_type='VideoObject')
             webpage, video_id, default={}, expected_type='VideoObject')

+ 35 - 14
youtube_dl/extractor/go.py

@@ -36,22 +36,26 @@ class GoIE(AdobePassIE):
             'requestor_id': 'DisneyXD',
             'requestor_id': 'DisneyXD',
         }
         }
     }
     }
-    _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys())
+    _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys())
     _TESTS = [{
     _TESTS = [{
-        'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx',
+        'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643',
         'info_dict': {
         'info_dict': {
-            'id': '0_g86w5onx',
+            'id': 'VDKA3807643',
             'ext': 'mp4',
             'ext': 'mp4',
-            'title': 'Sneak Peek: Language Arts',
-            'description': 'md5:7dcdab3b2d17e5217c953256af964e9c',
+            'title': 'The Traitor in the White House',
+            'description': 'md5:05b009d2d145a1e85d25111bd37222e8',
         },
         },
         'params': {
         'params': {
             # m3u8 download
             # m3u8 download
             'skip_download': True,
             'skip_download': True,
         },
         },
     }, {
     }, {
-        'url': 'http://abc.go.com/shows/after-paradise/video/most-recent/vdka3335601',
-        'only_matching': True,
+        'url': 'http://watchdisneyxd.go.com/doraemon',
+        'info_dict': {
+            'title': 'Doraemon',
+            'id': 'SH55574025',
+        },
+        'playlist_mincount': 51,
     }, {
     }, {
         'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
         'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
         'only_matching': True,
         'only_matching': True,
@@ -60,19 +64,36 @@ class GoIE(AdobePassIE):
         'only_matching': True,
         'only_matching': True,
     }]
     }]
 
 
+    def _extract_videos(self, brand, video_id='-1', show_id='-1'):
+        display_id = video_id if video_id != '-1' else show_id
+        return self._download_json(
+            'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/%s/-1/%s/-1/-1.json' % (brand, show_id, video_id),
+            display_id)['video']
+
     def _real_extract(self, url):
     def _real_extract(self, url):
         sub_domain, video_id, display_id = re.match(self._VALID_URL, url).groups()
         sub_domain, video_id, display_id = re.match(self._VALID_URL, url).groups()
+        site_info = self._SITE_INFO[sub_domain]
+        brand = site_info['brand']
         if not video_id:
         if not video_id:
             webpage = self._download_webpage(url, display_id)
             webpage = self._download_webpage(url, display_id)
             video_id = self._search_regex(
             video_id = self._search_regex(
                 # There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
                 # There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
                 # from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
                 # from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
-                r'data-video-id=["\']*VDKA(\w+)', webpage, 'video id')
-        site_info = self._SITE_INFO[sub_domain]
-        brand = site_info['brand']
-        video_data = self._download_json(
-            'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/-1/-1/%s/-1/-1.json' % (brand, video_id),
-            video_id)['video'][0]
+                r'data-video-id=["\']*(VDKA\w+)', webpage, 'video id', default=None)
+            if not video_id:
+                # show extraction works for Disney, DisneyJunior and DisneyXD
+                # ABC and Freeform has different layout
+                show_id = self._search_regex(r'data-show-id=["\']*(SH\d+)', webpage, 'show id')
+                videos = self._extract_videos(brand, show_id=show_id)
+                show_title = self._search_regex(r'data-show-title="([^"]+)"', webpage, 'show title', fatal=False)
+                entries = []
+                for video in videos:
+                    entries.append(self.url_result(
+                        video['url'], 'Go', video.get('id'), video.get('title')))
+                entries.reverse()
+                return self.playlist_result(entries, show_id, show_title)
+        video_data = self._extract_videos(brand, video_id)[0]
+        video_id = video_data['id']
         title = video_data['title']
         title = video_data['title']
 
 
         formats = []
         formats = []
@@ -105,7 +126,7 @@ class GoIE(AdobePassIE):
                     self._initialize_geo_bypass(['US'])
                     self._initialize_geo_bypass(['US'])
                 entitlement = self._download_json(
                 entitlement = self._download_json(
                     'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json',
                     'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json',
-                    video_id, data=urlencode_postdata(data), headers=self.geo_verification_headers())
+                    video_id, data=urlencode_postdata(data))
                 errors = entitlement.get('errors', {}).get('errors', [])
                 errors = entitlement.get('errors', {}).get('errors', [])
                 if errors:
                 if errors:
                     for error in errors:
                     for error in errors:

+ 36 - 2
youtube_dl/extractor/go90.py

@@ -5,6 +5,7 @@ import re
 
 
 from .common import InfoExtractor
 from .common import InfoExtractor
 from ..utils import (
 from ..utils import (
+    determine_ext,
     int_or_none,
     int_or_none,
     parse_iso8601,
     parse_iso8601,
 )
 )
@@ -18,7 +19,7 @@ class Go90IE(InfoExtractor):
         'info_dict': {
         'info_dict': {
             'id': '84BUqjLpf9D',
             'id': '84BUqjLpf9D',
             'ext': 'mp4',
             'ext': 'mp4',
-            'title': 'Inside The Utah Coalition Against Pornography Convention',
+            'title': 'Daily VICE - Inside The Utah Coalition Against Pornography Convention',
             'description': 'VICE\'s Karley Sciortino meets with activists who discuss the state\'s strong anti-porn stance. Then, VICE Sports explains NFL contracts.',
             'description': 'VICE\'s Karley Sciortino meets with activists who discuss the state\'s strong anti-porn stance. Then, VICE Sports explains NFL contracts.',
             'timestamp': 1491868800,
             'timestamp': 1491868800,
             'upload_date': '20170411',
             'upload_date': '20170411',
@@ -32,11 +33,28 @@ class Go90IE(InfoExtractor):
             video_id, headers={
             video_id, headers={
                 'Content-Type': 'application/json; charset=utf-8',
                 'Content-Type': 'application/json; charset=utf-8',
             }, data=b'{"client":"web","device_type":"pc"}')
             }, data=b'{"client":"web","device_type":"pc"}')
-        title = video_data['title']
         main_video_asset = video_data['main_video_asset']
         main_video_asset = video_data['main_video_asset']
 
 
+        episode_number = int_or_none(video_data.get('episode_number'))
+        series = None
+        season = None
+        season_id = None
+        season_number = None
+        for metadata in video_data.get('__children', {}).get('Item', {}).values():
+            if metadata.get('type') == 'show':
+                series = metadata.get('title')
+            elif metadata.get('type') == 'season':
+                season = metadata.get('title')
+                season_id = metadata.get('id')
+                season_number = int_or_none(metadata.get('season_number'))
+
+        title = episode = video_data.get('title') or series
+        if series and series != title:
+            title = '%s - %s' % (series, title)
+
         thumbnails = []
         thumbnails = []
         formats = []
         formats = []
+        subtitles = {}
         for asset in video_data.get('assets'):
         for asset in video_data.get('assets'):
             if asset.get('id') == main_video_asset:
             if asset.get('id') == main_video_asset:
                 for source in asset.get('sources', []):
                 for source in asset.get('sources', []):
@@ -70,6 +88,15 @@ class Go90IE(InfoExtractor):
                             'height': int_or_none(source.get('height')),
                             'height': int_or_none(source.get('height')),
                             'tbr': int_or_none(source.get('bitrate')),
                             'tbr': int_or_none(source.get('bitrate')),
                         })
                         })
+
+                for caption in asset.get('caption_metadata', []):
+                    caption_url = caption.get('source_url')
+                    if not caption_url:
+                        continue
+                    subtitles.setdefault(caption.get('language', 'en'), []).append({
+                        'url': caption_url,
+                        'ext': determine_ext(caption_url, 'vtt'),
+                    })
             elif asset.get('type') == 'image':
             elif asset.get('type') == 'image':
                 asset_location = asset.get('location')
                 asset_location = asset.get('location')
                 if not asset_location:
                 if not asset_location:
@@ -89,4 +116,11 @@ class Go90IE(InfoExtractor):
             'description': video_data.get('short_description'),
             'description': video_data.get('short_description'),
             'like_count': int_or_none(video_data.get('like_count')),
             'like_count': int_or_none(video_data.get('like_count')),
             'timestamp': parse_iso8601(video_data.get('released_at')),
             'timestamp': parse_iso8601(video_data.get('released_at')),
+            'series': series,
+            'episode': episode,
+            'season': season,
+            'season_id': season_id,
+            'season_number': season_number,
+            'episode_number': episode_number,
+            'subtitles': subtitles,
         }
         }

+ 2 - 2
youtube_dl/extractor/infoq.py

@@ -87,8 +87,8 @@ class InfoQIE(BokeCCBaseIE):
 
 
     def _extract_http_audio(self, webpage, video_id):
     def _extract_http_audio(self, webpage, video_id):
         fields = self._hidden_inputs(webpage)
         fields = self._hidden_inputs(webpage)
-        http_audio_url = fields['filename']
-        if http_audio_url is None:
+        http_audio_url = fields.get('filename')
+        if not http_audio_url:
             return []
             return []
 
 
         cookies_header = {'Cookie': self._extract_cookies(webpage)}
         cookies_header = {'Cookie': self._extract_cookies(webpage)}

+ 6 - 2
youtube_dl/extractor/instagram.py

@@ -112,7 +112,8 @@ class InstagramIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
         webpage = self._download_webpage(url, video_id)
 
 
         (video_url, description, thumbnail, timestamp, uploader,
         (video_url, description, thumbnail, timestamp, uploader,
-         uploader_id, like_count, comment_count, height, width) = [None] * 10
+         uploader_id, like_count, comment_count, comments, height,
+         width) = [None] * 11
 
 
         shared_data = self._parse_json(
         shared_data = self._parse_json(
             self._search_regex(
             self._search_regex(
@@ -121,7 +122,10 @@ class InstagramIE(InfoExtractor):
             video_id, fatal=False)
             video_id, fatal=False)
         if shared_data:
         if shared_data:
             media = try_get(
             media = try_get(
-                shared_data, lambda x: x['entry_data']['PostPage'][0]['media'], dict)
+                shared_data,
+                (lambda x: x['entry_data']['PostPage'][0]['graphql']['shortcode_media'],
+                 lambda x: x['entry_data']['PostPage'][0]['media']),
+                dict)
             if media:
             if media:
                 video_url = media.get('video_url')
                 video_url = media.get('video_url')
                 height = int_or_none(media.get('dimensions', {}).get('height'))
                 height = int_or_none(media.get('dimensions', {}).get('height'))

+ 17 - 9
youtube_dl/extractor/iqiyi.py

@@ -189,7 +189,11 @@ class IqiyiIE(InfoExtractor):
         'only_matching': True,
         'only_matching': True,
     }, {
     }, {
         'url': 'http://yule.iqiyi.com/pcb.html',
         'url': 'http://yule.iqiyi.com/pcb.html',
-        'only_matching': True,
+        'info_dict': {
+            'id': '4a0af228fddb55ec96398a364248ed7f',
+            'ext': 'mp4',
+            'title': '第2017-04-21期 女艺人频遭极端粉丝骚扰',
+        },
     }, {
     }, {
         # VIP-only video. The first 2 parts (6 minutes) are available without login
         # VIP-only video. The first 2 parts (6 minutes) are available without login
         # MD5 sums omitted as values are different on Travis CI and my machine
         # MD5 sums omitted as values are different on Travis CI and my machine
@@ -337,15 +341,18 @@ class IqiyiIE(InfoExtractor):
             url, 'temp_id', note='download video page')
             url, 'temp_id', note='download video page')
 
 
         # There's no simple way to determine whether an URL is a playlist or not
         # There's no simple way to determine whether an URL is a playlist or not
-        # So detect it
-        playlist_result = self._extract_playlist(webpage)
-        if playlist_result:
-            return playlist_result
-
+        # Sometimes there are playlist links in individual videos, so treat it
+        # as a single video first
         tvid = self._search_regex(
         tvid = self._search_regex(
-            r'data-player-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid')
+            r'data-(?:player|shareplattrigger)-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid', default=None)
+        if tvid is None:
+            playlist_result = self._extract_playlist(webpage)
+            if playlist_result:
+                return playlist_result
+            raise ExtractorError('Can\'t find any video')
+
         video_id = self._search_regex(
         video_id = self._search_regex(
-            r'data-player-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id')
+            r'data-(?:player|shareplattrigger)-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id')
 
 
         formats = []
         formats = []
         for _ in range(5):
         for _ in range(5):
@@ -377,7 +384,8 @@ class IqiyiIE(InfoExtractor):
 
 
         self._sort_formats(formats)
         self._sort_formats(formats)
         title = (get_element_by_id('widget-videotitle', webpage) or
         title = (get_element_by_id('widget-videotitle', webpage) or
-                 clean_html(get_element_by_attribute('class', 'mod-play-tit', webpage)))
+                 clean_html(get_element_by_attribute('class', 'mod-play-tit', webpage)) or
+                 self._html_search_regex(r'<span[^>]+data-videochanged-title="word"[^>]*>([^<]+)</span>', webpage, 'title'))
 
 
         return {
         return {
             'id': video_id,
             'id': video_id,

+ 22 - 6
youtube_dl/extractor/itv.py

@@ -116,13 +116,25 @@ class ITVIE(InfoExtractor):
             if not play_path:
             if not play_path:
                 continue
                 continue
             tbr = int_or_none(media_file.get('bitrate'), 1000)
             tbr = int_or_none(media_file.get('bitrate'), 1000)
-            formats.append({
+            f = {
                 'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''),
                 'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''),
-                'url': rtmp_url,
                 'play_path': play_path,
                 'play_path': play_path,
+                # Providing this swfVfy allows to avoid truncated downloads
+                'player_url': 'http://www.itv.com/mercury/Mercury_VideoPlayer.swf',
+                'page_url': url,
                 'tbr': tbr,
                 'tbr': tbr,
                 'ext': 'flv',
                 'ext': 'flv',
-            })
+            }
+            app = self._search_regex(
+                'rtmpe?://[^/]+/(.+)$', rtmp_url, 'app', default=None)
+            if app:
+                f.update({
+                    'url': rtmp_url.split('?', 1)[0],
+                    'app': app,
+                })
+            else:
+                f['url'] = rtmp_url
+            formats.append(f)
 
 
         ios_playlist_url = params.get('data-video-playlist')
         ios_playlist_url = params.get('data-video-playlist')
         hmac = params.get('data-video-hmac')
         hmac = params.get('data-video-hmac')
@@ -172,7 +184,9 @@ class ITVIE(InfoExtractor):
                         href = ios_base_url + href
                         href = ios_base_url + href
                     ext = determine_ext(href)
                     ext = determine_ext(href)
                     if ext == 'm3u8':
                     if ext == 'm3u8':
-                        formats.extend(self._extract_m3u8_formats(href, video_id, 'mp4', m3u8_id='hls', fatal=False))
+                        formats.extend(self._extract_m3u8_formats(
+                            href, video_id, 'mp4', entry_protocol='m3u8_native',
+                            m3u8_id='hls', fatal=False))
                     else:
                     else:
                         formats.append({
                         formats.append({
                             'url': href,
                             'url': href,
@@ -189,7 +203,8 @@ class ITVIE(InfoExtractor):
                 'ext': 'ttml' if ext == 'xml' else ext,
                 'ext': 'ttml' if ext == 'xml' else ext,
             })
             })
 
 
-        return {
+        info = self._search_json_ld(webpage, video_id, default={})
+        info.update({
             'id': video_id,
             'id': video_id,
             'title': title,
             'title': title,
             'formats': formats,
             'formats': formats,
@@ -198,4 +213,5 @@ class ITVIE(InfoExtractor):
             'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')),
             'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')),
             'series': xpath_text(playlist, 'ProgrammeTitle'),
             'series': xpath_text(playlist, 'ProgrammeTitle'),
             'duartion': parse_duration(xpath_text(playlist, 'Duration')),
             'duartion': parse_duration(xpath_text(playlist, 'Duration')),
-        }
+        })
+        return info

+ 37 - 74
youtube_dl/extractor/leeco.py

@@ -23,7 +23,6 @@ from ..utils import (
     str_or_none,
     str_or_none,
     url_basename,
     url_basename,
     urshift,
     urshift,
-    update_url_query,
 )
 )
 
 
 
 
@@ -51,7 +50,7 @@ class LeIE(InfoExtractor):
             'id': '1415246',
             'id': '1415246',
             'ext': 'mp4',
             'ext': 'mp4',
             'title': '美人天下01',
             'title': '美人天下01',
-            'description': 'md5:f88573d9d7225ada1359eaf0dbf8bcda',
+            'description': 'md5:28942e650e82ed4fcc8e4de919ee854d',
         },
         },
         'params': {
         'params': {
             'hls_prefer_native': True,
             'hls_prefer_native': True,
@@ -69,7 +68,6 @@ class LeIE(InfoExtractor):
         'params': {
         'params': {
             'hls_prefer_native': True,
             'hls_prefer_native': True,
         },
         },
-        'skip': 'Only available in China',
     }, {
     }, {
         'url': 'http://sports.le.com/video/25737697.html',
         'url': 'http://sports.le.com/video/25737697.html',
         'only_matching': True,
         'only_matching': True,
@@ -81,7 +79,7 @@ class LeIE(InfoExtractor):
         'only_matching': True,
         'only_matching': True,
     }]
     }]
 
 
-    # ror() and calc_time_key() are reversed from a embedded swf file in KLetvPlayer.swf
+    # ror() and calc_time_key() are reversed from a embedded swf file in LetvPlayer.swf
     def ror(self, param1, param2):
     def ror(self, param1, param2):
         _loc3_ = 0
         _loc3_ = 0
         while _loc3_ < param2:
         while _loc3_ < param2:
@@ -90,15 +88,8 @@ class LeIE(InfoExtractor):
         return param1
         return param1
 
 
     def calc_time_key(self, param1):
     def calc_time_key(self, param1):
-        _loc2_ = 773625421
-        _loc3_ = self.ror(param1, _loc2_ % 13)
-        _loc3_ = _loc3_ ^ _loc2_
-        _loc3_ = self.ror(_loc3_, _loc2_ % 17)
-        return _loc3_
-
-    # reversed from http://jstatic.letvcdn.com/sdk/player.js
-    def get_mms_key(self, time):
-        return self.ror(time, 8) ^ 185025305
+        _loc2_ = 185025305
+        return self.ror(param1, _loc2_ % 17) ^ _loc2_
 
 
     # see M3U8Encryption class in KLetvPlayer.swf
     # see M3U8Encryption class in KLetvPlayer.swf
     @staticmethod
     @staticmethod
@@ -122,7 +113,7 @@ class LeIE(InfoExtractor):
 
 
     def _check_errors(self, play_json):
     def _check_errors(self, play_json):
         # Check for errors
         # Check for errors
-        playstatus = play_json['playstatus']
+        playstatus = play_json['msgs']['playstatus']
         if playstatus['status'] == 0:
         if playstatus['status'] == 0:
             flag = playstatus['flag']
             flag = playstatus['flag']
             if flag == 1:
             if flag == 1:
@@ -134,58 +125,31 @@ class LeIE(InfoExtractor):
         media_id = self._match_id(url)
         media_id = self._match_id(url)
         page = self._download_webpage(url, media_id)
         page = self._download_webpage(url, media_id)
 
 
-        play_json_h5 = self._download_json(
-            'http://api.le.com/mms/out/video/playJsonH5',
-            media_id, 'Downloading html5 playJson data', query={
-                'id': media_id,
-                'platid': 3,
-                'splatid': 304,
-                'format': 1,
-                'tkey': self.get_mms_key(int(time.time())),
-                'domain': 'www.le.com',
-                'tss': 'no',
-            },
-            headers=self.geo_verification_headers())
-        self._check_errors(play_json_h5)
-
         play_json_flash = self._download_json(
         play_json_flash = self._download_json(
-            'http://api.le.com/mms/out/video/playJson',
+            'http://player-pc.le.com/mms/out/video/playJson',
             media_id, 'Downloading flash playJson data', query={
             media_id, 'Downloading flash playJson data', query={
                 'id': media_id,
                 'id': media_id,
                 'platid': 1,
                 'platid': 1,
                 'splatid': 101,
                 'splatid': 101,
                 'format': 1,
                 'format': 1,
+                'source': 1000,
                 'tkey': self.calc_time_key(int(time.time())),
                 'tkey': self.calc_time_key(int(time.time())),
                 'domain': 'www.le.com',
                 'domain': 'www.le.com',
+                'region': 'cn',
             },
             },
             headers=self.geo_verification_headers())
             headers=self.geo_verification_headers())
         self._check_errors(play_json_flash)
         self._check_errors(play_json_flash)
 
 
-        def get_h5_urls(media_url, format_id):
-            location = self._download_json(
-                media_url, media_id,
-                'Download JSON metadata for format %s' % format_id, query={
-                    'format': 1,
-                    'expect': 3,
-                    'tss': 'no',
-                })['location']
-
-            return {
-                'http': update_url_query(location, {'tss': 'no'}),
-                'hls': update_url_query(location, {'tss': 'ios'}),
-            }
-
         def get_flash_urls(media_url, format_id):
         def get_flash_urls(media_url, format_id):
-            media_url += '&' + compat_urllib_parse_urlencode({
-                'm3v': 1,
-                'format': 1,
-                'expect': 3,
-                'rateid': format_id,
-            })
-
             nodes_data = self._download_json(
             nodes_data = self._download_json(
                 media_url, media_id,
                 media_url, media_id,
-                'Download JSON metadata for format %s' % format_id)
+                'Download JSON metadata for format %s' % format_id,
+                query={
+                    'm3v': 1,
+                    'format': 1,
+                    'expect': 3,
+                    'tss': 'ios',
+                })
 
 
             req = self._request_webpage(
             req = self._request_webpage(
                 nodes_data['nodelist'][0]['location'], media_id,
                 nodes_data['nodelist'][0]['location'], media_id,
@@ -199,29 +163,28 @@ class LeIE(InfoExtractor):
 
 
         extracted_formats = []
         extracted_formats = []
         formats = []
         formats = []
-        for play_json, get_urls in ((play_json_h5, get_h5_urls), (play_json_flash, get_flash_urls)):
-            playurl = play_json['playurl']
-            play_domain = playurl['domain'][0]
-
-            for format_id, format_data in playurl.get('dispatch', []).items():
-                if format_id in extracted_formats:
-                    continue
-                extracted_formats.append(format_id)
-
-                media_url = play_domain + format_data[0]
-                for protocol, format_url in get_urls(media_url, format_id).items():
-                    f = {
-                        'url': format_url,
-                        'ext': determine_ext(format_data[1]),
-                        'format_id': '%s-%s' % (protocol, format_id),
-                        'protocol': 'm3u8_native' if protocol == 'hls' else 'http',
-                        'quality': int_or_none(format_id),
-                    }
-
-                    if format_id[-1:] == 'p':
-                        f['height'] = int_or_none(format_id[:-1])
-
-                    formats.append(f)
+        playurl = play_json_flash['msgs']['playurl']
+        play_domain = playurl['domain'][0]
+
+        for format_id, format_data in playurl.get('dispatch', []).items():
+            if format_id in extracted_formats:
+                continue
+            extracted_formats.append(format_id)
+
+            media_url = play_domain + format_data[0]
+            for protocol, format_url in get_flash_urls(media_url, format_id).items():
+                f = {
+                    'url': format_url,
+                    'ext': determine_ext(format_data[1]),
+                    'format_id': '%s-%s' % (protocol, format_id),
+                    'protocol': 'm3u8_native' if protocol == 'hls' else 'http',
+                    'quality': int_or_none(format_id),
+                }
+
+                if format_id[-1:] == 'p':
+                    f['height'] = int_or_none(format_id[:-1])
+
+                formats.append(f)
         self._sort_formats(formats, ('height', 'quality', 'format_id'))
         self._sort_formats(formats, ('height', 'quality', 'format_id'))
 
 
         publish_time = parse_iso8601(self._html_search_regex(
         publish_time = parse_iso8601(self._html_search_regex(

+ 1 - 1
youtube_dl/extractor/lego.py

@@ -86,7 +86,7 @@ class LEGOIE(InfoExtractor):
         formats = self._extract_akamai_formats(
         formats = self._extract_akamai_formats(
             '%si/s/public/%s_,%s,.mp4.csmil/master.m3u8' % (streaming_base, path, streaming_path), video_id)
             '%si/s/public/%s_,%s,.mp4.csmil/master.m3u8' % (streaming_base, path, streaming_path), video_id)
         m3u8_formats = list(filter(
         m3u8_formats = list(filter(
-            lambda f: f.get('protocol') == 'm3u8_native' and f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
+            lambda f: f.get('protocol') == 'm3u8_native' and f.get('vcodec') != 'none',
             formats))
             formats))
         if len(m3u8_formats) == len(self._BITRATES):
         if len(m3u8_formats) == len(self._BITRATES):
             self._sort_formats(m3u8_formats)
             self._sort_formats(m3u8_formats)

+ 37 - 0
youtube_dl/extractor/limelight.py

@@ -9,6 +9,7 @@ from ..utils import (
     determine_ext,
     determine_ext,
     float_or_none,
     float_or_none,
     int_or_none,
     int_or_none,
+    smuggle_url,
     unsmuggle_url,
     unsmuggle_url,
     ExtractorError,
     ExtractorError,
 )
 )
@@ -18,6 +19,42 @@ class LimelightBaseIE(InfoExtractor):
     _PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s'
     _PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s'
     _API_URL = 'http://api.video.limelight.com/rest/organizations/%s/%s/%s/%s.json'
     _API_URL = 'http://api.video.limelight.com/rest/organizations/%s/%s/%s/%s.json'
 
 
+    @classmethod
+    def _extract_urls(cls, webpage, source_url):
+        lm = {
+            'Media': 'media',
+            'Channel': 'channel',
+            'ChannelList': 'channel_list',
+        }
+        entries = []
+        for kind, video_id in re.findall(
+                r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})',
+                webpage):
+            entries.append(cls.url_result(
+                smuggle_url(
+                    'limelight:%s:%s' % (lm[kind], video_id),
+                    {'source_url': source_url}),
+                'Limelight%s' % kind, video_id))
+        for mobj in re.finditer(
+                # As per [1] class attribute should be exactly equal to
+                # LimelightEmbeddedPlayerFlash but numerous examples seen
+                # that don't exactly match it (e.g. [2]).
+                # 1. http://support.3playmedia.com/hc/en-us/articles/227732408-Limelight-Embedding-the-Captions-Plugin-with-the-Limelight-Player-on-Your-Webpage
+                # 2. http://www.sedona.com/FacilitatorTraining2017
+                r'''(?sx)
+                    <object[^>]+class=(["\'])(?:(?!\1).)*\bLimelightEmbeddedPlayerFlash\b(?:(?!\1).)*\1[^>]*>.*?
+                        <param[^>]+
+                            name=(["\'])flashVars\2[^>]+
+                            value=(["\'])(?:(?!\3).)*(?P<kind>media|channel(?:List)?)Id=(?P<id>[a-z0-9]{32})
+                ''', webpage):
+            kind, video_id = mobj.group('kind'), mobj.group('id')
+            entries.append(cls.url_result(
+                smuggle_url(
+                    'limelight:%s:%s' % (kind, video_id),
+                    {'source_url': source_url}),
+                'Limelight%s' % kind.capitalize(), video_id))
+        return entries
+
     def _call_playlist_service(self, item_id, method, fatal=True, referer=None):
     def _call_playlist_service(self, item_id, method, fatal=True, referer=None):
         headers = {}
         headers = {}
         if referer:
         if referer:

+ 97 - 0
youtube_dl/extractor/noovo.py

@@ -0,0 +1,97 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .brightcove import BrightcoveNewIE
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    int_or_none,
+    smuggle_url,
+    try_get,
+)
+
+
+class NoovoIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:[^/]+\.)?noovo\.ca/videos/(?P<id>[^/]+/[^/?#&]+)'
+    _TESTS = [{
+        # clip
+        'url': 'http://noovo.ca/videos/rpm-plus/chrysler-imperial',
+        'info_dict': {
+            'id': '5386045029001',
+            'ext': 'mp4',
+            'title': 'Chrysler Imperial',
+            'description': 'md5:de3c898d1eb810f3e6243e08c8b4a056',
+            'timestamp': 1491399228,
+            'upload_date': '20170405',
+            'uploader_id': '618566855001',
+            'creator': 'vtele',
+            'view_count': int,
+            'series': 'RPM+',
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        # episode
+        'url': 'http://noovo.ca/videos/l-amour-est-dans-le-pre/episode-13-8',
+        'info_dict': {
+            'id': '5395865725001',
+            'title': 'Épisode 13 : Les retrouvailles',
+            'description': 'md5:336d5ebc5436534e61d16e63ddfca327',
+            'ext': 'mp4',
+            'timestamp': 1492019320,
+            'upload_date': '20170412',
+            'uploader_id': '618566855001',
+            'creator': 'vtele',
+            'view_count': int,
+            'series': "L'amour est dans le pré",
+            'season_number': 5,
+            'episode': 'Épisode 13',
+            'episode_number': 13,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }]
+    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/618566855001/default_default/index.html?videoId=%s'
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        data = self._download_json(
+            'http://api.noovo.ca/api/v1/pages/single-episode/%s' % video_id,
+            video_id)['data']
+
+        content = try_get(data, lambda x: x['contents'][0])
+
+        brightcove_id = data.get('brightcoveId') or content['brightcoveId']
+
+        series = try_get(
+            data, (
+                lambda x: x['show']['title'],
+                lambda x: x['season']['show']['title']),
+            compat_str)
+
+        episode = None
+        og = data.get('og')
+        if isinstance(og, dict) and og.get('type') == 'video.episode':
+            episode = og.get('title')
+
+        video = content or data
+
+        return {
+            '_type': 'url_transparent',
+            'ie_key': BrightcoveNewIE.ie_key(),
+            'url': smuggle_url(
+                self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
+                {'geo_countries': ['CA']}),
+            'id': brightcove_id,
+            'title': video.get('title'),
+            'creator': video.get('source'),
+            'view_count': int_or_none(video.get('viewsCount')),
+            'series': series,
+            'season_number': int_or_none(try_get(
+                data, lambda x: x['season']['seasonNumber'])),
+            'episode': episode,
+            'episode_number': int_or_none(data.get('episodeNumber')),
+        }

+ 1 - 1
youtube_dl/extractor/nowness.py

@@ -28,7 +28,7 @@ class NownessBaseIE(InfoExtractor):
                         bc_url = BrightcoveLegacyIE._extract_brightcove_url(player_code)
                         bc_url = BrightcoveLegacyIE._extract_brightcove_url(player_code)
                         if bc_url:
                         if bc_url:
                             return self.url_result(bc_url, BrightcoveLegacyIE.ie_key())
                             return self.url_result(bc_url, BrightcoveLegacyIE.ie_key())
-                        bc_url = BrightcoveNewIE._extract_url(player_code)
+                        bc_url = BrightcoveNewIE._extract_url(self, player_code)
                         if bc_url:
                         if bc_url:
                             return self.url_result(bc_url, BrightcoveNewIE.ie_key())
                             return self.url_result(bc_url, BrightcoveNewIE.ie_key())
                         raise ExtractorError('Could not find player definition')
                         raise ExtractorError('Could not find player definition')

+ 26 - 6
youtube_dl/extractor/odnoklassniki.py

@@ -3,6 +3,7 @@ from __future__ import unicode_literals
 
 
 from .common import InfoExtractor
 from .common import InfoExtractor
 from ..compat import (
 from ..compat import (
+    compat_etree_fromstring,
     compat_parse_qs,
     compat_parse_qs,
     compat_urllib_parse_unquote,
     compat_urllib_parse_unquote,
     compat_urllib_parse_urlparse,
     compat_urllib_parse_urlparse,
@@ -37,7 +38,7 @@ class OdnoklassnikiIE(InfoExtractor):
     }, {
     }, {
         # metadataUrl
         # metadataUrl
         'url': 'http://ok.ru/video/63567059965189-0?fromTime=5',
         'url': 'http://ok.ru/video/63567059965189-0?fromTime=5',
-        'md5': '9676cf86eff5391d35dea675d224e131',
+        'md5': '6ff470ea2dd51d5d18c295a355b0b6bc',
         'info_dict': {
         'info_dict': {
             'id': '63567059965189-0',
             'id': '63567059965189-0',
             'ext': 'mp4',
             'ext': 'mp4',
@@ -53,7 +54,7 @@ class OdnoklassnikiIE(InfoExtractor):
     }, {
     }, {
         # YouTube embed (metadataUrl, provider == USER_YOUTUBE)
         # YouTube embed (metadataUrl, provider == USER_YOUTUBE)
         'url': 'http://ok.ru/video/64211978996595-1',
         'url': 'http://ok.ru/video/64211978996595-1',
-        'md5': '5d7475d428845cd2e13bae6f1a992278',
+        'md5': '2f206894ffb5dbfcce2c5a14b909eea5',
         'info_dict': {
         'info_dict': {
             'id': '64211978996595-1',
             'id': '64211978996595-1',
             'ext': 'mp4',
             'ext': 'mp4',
@@ -61,8 +62,8 @@ class OdnoklassnikiIE(InfoExtractor):
             'description': 'md5:848eb8b85e5e3471a3a803dae1343ed0',
             'description': 'md5:848eb8b85e5e3471a3a803dae1343ed0',
             'duration': 440,
             'duration': 440,
             'upload_date': '20150826',
             'upload_date': '20150826',
-            'uploader_id': '750099571',
-            'uploader': 'Алина П',
+            'uploader_id': 'tvroscosmos',
+            'uploader': 'Телестудия Роскосмоса',
             'age_limit': 0,
             'age_limit': 0,
         },
         },
     }, {
     }, {
@@ -81,6 +82,7 @@ class OdnoklassnikiIE(InfoExtractor):
         'params': {
         'params': {
             'skip_download': True,
             'skip_download': True,
         },
         },
+        'skip': 'Video has not been found',
     }, {
     }, {
         'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452',
         'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452',
         'only_matching': True,
         'only_matching': True,
@@ -176,14 +178,32 @@ class OdnoklassnikiIE(InfoExtractor):
             })
             })
             return info
             return info
 
 
-        quality = qualities(('mobile', 'lowest', 'low', 'sd', 'hd', 'full'))
+        quality = qualities(('4', '0', '1', '2', '3', '5'))
 
 
         formats = [{
         formats = [{
             'url': f['url'],
             'url': f['url'],
             'ext': 'mp4',
             'ext': 'mp4',
             'format_id': f['name'],
             'format_id': f['name'],
-            'quality': quality(f['name']),
         } for f in metadata['videos']]
         } for f in metadata['videos']]
+
+        m3u8_url = metadata.get('hlsManifestUrl')
+        if m3u8_url:
+            formats.extend(self._extract_m3u8_formats(
+                m3u8_url, video_id, 'mp4', 'm3u8_native',
+                m3u8_id='hls', fatal=False))
+
+        dash_manifest = metadata.get('metadataEmbedded')
+        if dash_manifest:
+            formats.extend(self._parse_mpd_formats(
+                compat_etree_fromstring(dash_manifest), 'mpd'))
+
+        for fmt in formats:
+            fmt_type = self._search_regex(
+                r'\btype[/=](\d)', fmt['url'],
+                'format type', default=None)
+            if fmt_type:
+                fmt['quality'] = quality(fmt_type)
+
         self._sort_formats(formats)
         self._sort_formats(formats)
 
 
         info['formats'] = formats
         info['formats'] = formats

+ 18 - 1
youtube_dl/extractor/pbs.py

@@ -8,6 +8,7 @@ from ..utils import (
     ExtractorError,
     ExtractorError,
     determine_ext,
     determine_ext,
     int_or_none,
     int_or_none,
+    float_or_none,
     js_to_json,
     js_to_json,
     strip_jsonp,
     strip_jsonp,
     strip_or_none,
     strip_or_none,
@@ -464,6 +465,7 @@ class PBSIE(InfoExtractor):
                     redirects.append(redirect)
                     redirects.append(redirect)
                     redirect_urls.add(redirect_url)
                     redirect_urls.add(redirect_url)
 
 
+        chapters = []
         # Player pages may also serve different qualities
         # Player pages may also serve different qualities
         for page in ('widget/partnerplayer', 'portalplayer'):
         for page in ('widget/partnerplayer', 'portalplayer'):
             player = self._download_webpage(
             player = self._download_webpage(
@@ -479,6 +481,20 @@ class PBSIE(InfoExtractor):
                     extract_redirect_urls(video_info)
                     extract_redirect_urls(video_info)
                     if not info:
                     if not info:
                         info = video_info
                         info = video_info
+                if not chapters:
+                    for chapter_data in re.findall(r'(?s)chapters\.push\(({.*?})\)', player):
+                        chapter = self._parse_json(chapter_data, video_id, js_to_json, fatal=False)
+                        if not chapter:
+                            continue
+                        start_time = float_or_none(chapter.get('start_time'), 1000)
+                        duration = float_or_none(chapter.get('duration'), 1000)
+                        if start_time is None or duration is None:
+                            continue
+                        chapters.append({
+                            'start_time': start_time,
+                            'end_time': start_time + duration,
+                            'title': chapter.get('title'),
+                        })
 
 
         formats = []
         formats = []
         http_url = None
         http_url = None
@@ -515,7 +531,7 @@ class PBSIE(InfoExtractor):
                     http_url = format_url
                     http_url = format_url
         self._remove_duplicate_formats(formats)
         self._remove_duplicate_formats(formats)
         m3u8_formats = list(filter(
         m3u8_formats = list(filter(
-            lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
+            lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none',
             formats))
             formats))
         if http_url:
         if http_url:
             for m3u8_format in m3u8_formats:
             for m3u8_format in m3u8_formats:
@@ -588,4 +604,5 @@ class PBSIE(InfoExtractor):
             'upload_date': upload_date,
             'upload_date': upload_date,
             'formats': formats,
             'formats': formats,
             'subtitles': subtitles,
             'subtitles': subtitles,
+            'chapters': chapters,
         }
         }

+ 6 - 26
youtube_dl/extractor/porn91.py

@@ -1,10 +1,6 @@
 # coding: utf-8
 # coding: utf-8
 from __future__ import unicode_literals
 from __future__ import unicode_literals
 
 
-from ..compat import (
-    compat_urllib_parse_unquote,
-    compat_urllib_parse_urlencode,
-)
 from .common import InfoExtractor
 from .common import InfoExtractor
 from ..utils import (
 from ..utils import (
     parse_duration,
     parse_duration,
@@ -19,7 +15,7 @@ class Porn91IE(InfoExtractor):
 
 
     _TEST = {
     _TEST = {
         'url': 'http://91porn.com/view_video.php?viewkey=7e42283b4f5ab36da134',
         'url': 'http://91porn.com/view_video.php?viewkey=7e42283b4f5ab36da134',
-        'md5': '6df8f6d028bc8b14f5dbd73af742fb20',
+        'md5': '7fcdb5349354f40d41689bd0fa8db05a',
         'info_dict': {
         'info_dict': {
             'id': '7e42283b4f5ab36da134',
             'id': '7e42283b4f5ab36da134',
             'title': '18岁大一漂亮学妹,水嫩性感,再爽一次!',
             'title': '18岁大一漂亮学妹,水嫩性感,再爽一次!',
@@ -43,24 +39,7 @@ class Porn91IE(InfoExtractor):
             r'<div id="viewvideo-title">([^<]+)</div>', webpage, 'title')
             r'<div id="viewvideo-title">([^<]+)</div>', webpage, 'title')
         title = title.replace('\n', '')
         title = title.replace('\n', '')
 
 
-        # get real url
-        file_id = self._search_regex(
-            r'so.addVariable\(\'file\',\'(\d+)\'', webpage, 'file id')
-        sec_code = self._search_regex(
-            r'so.addVariable\(\'seccode\',\'([^\']+)\'', webpage, 'sec code')
-        max_vid = self._search_regex(
-            r'so.addVariable\(\'max_vid\',\'(\d+)\'', webpage, 'max vid')
-        url_params = compat_urllib_parse_urlencode({
-            'VID': file_id,
-            'mp4': '1',
-            'seccode': sec_code,
-            'max_vid': max_vid,
-        })
-        info_cn = self._download_webpage(
-            'http://91porn.com/getfile.php?' + url_params, video_id,
-            'Downloading real video url')
-        video_url = compat_urllib_parse_unquote(self._search_regex(
-            r'file=([^&]+)&', info_cn, 'url'))
+        info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0]
 
 
         duration = parse_duration(self._search_regex(
         duration = parse_duration(self._search_regex(
             r'时长:\s*</span>\s*(\d+:\d+)', webpage, 'duration', fatal=False))
             r'时长:\s*</span>\s*(\d+:\d+)', webpage, 'duration', fatal=False))
@@ -68,11 +47,12 @@ class Porn91IE(InfoExtractor):
         comment_count = int_or_none(self._search_regex(
         comment_count = int_or_none(self._search_regex(
             r'留言:\s*</span>\s*(\d+)', webpage, 'comment count', fatal=False))
             r'留言:\s*</span>\s*(\d+)', webpage, 'comment count', fatal=False))
 
 
-        return {
+        info_dict.update({
             'id': video_id,
             'id': video_id,
             'title': title,
             'title': title,
-            'url': video_url,
             'duration': duration,
             'duration': duration,
             'comment_count': comment_count,
             'comment_count': comment_count,
             'age_limit': self._rta_search(webpage),
             'age_limit': self._rta_search(webpage),
-        }
+        })
+
+        return info_dict

+ 1 - 2
youtube_dl/extractor/r7.py

@@ -62,8 +62,7 @@ class R7IE(InfoExtractor):
             # m3u8 format always matches the http format, let's copy metadata from
             # m3u8 format always matches the http format, let's copy metadata from
             # one to another
             # one to another
             m3u8_formats = list(filter(
             m3u8_formats = list(filter(
-                lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
-                formats))
+                lambda f: f.get('vcodec') != 'none', formats))
             if len(m3u8_formats) == 1:
             if len(m3u8_formats) == 1:
                 f_copy = m3u8_formats[0].copy()
                 f_copy = m3u8_formats[0].copy()
                 f_copy.update(f)
                 f_copy.update(f)

+ 5 - 1
youtube_dl/extractor/streamable.py

@@ -12,7 +12,7 @@ from ..utils import (
 
 
 
 
 class StreamableIE(InfoExtractor):
 class StreamableIE(InfoExtractor):
-    _VALID_URL = r'https?://streamable\.com/(?:e/)?(?P<id>\w+)'
+    _VALID_URL = r'https?://streamable\.com/(?:[es]/)?(?P<id>\w+)'
     _TESTS = [
     _TESTS = [
         {
         {
             'url': 'https://streamable.com/dnd1',
             'url': 'https://streamable.com/dnd1',
@@ -47,6 +47,10 @@ class StreamableIE(InfoExtractor):
         {
         {
             'url': 'https://streamable.com/e/dnd1',
             'url': 'https://streamable.com/e/dnd1',
             'only_matching': True,
             'only_matching': True,
+        },
+        {
+            'url': 'https://streamable.com/s/okkqk/drxjds',
+            'only_matching': True,
         }
         }
     ]
     ]
 
 

+ 64 - 0
youtube_dl/extractor/streamango.py

@@ -0,0 +1,64 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    determine_ext,
+    int_or_none,
+    js_to_json,
+)
+
+
+class StreamangoIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?streamango\.com/(?:f|embed)/(?P<id>[^/?#&]+)'
+    _TESTS = [{
+        'url': 'https://streamango.com/f/clapasobsptpkdfe/20170315_150006_mp4',
+        'md5': 'e992787515a182f55e38fc97588d802a',
+        'info_dict': {
+            'id': 'clapasobsptpkdfe',
+            'ext': 'mp4',
+            'title': '20170315_150006.mp4',
+        }
+    }, {
+        'url': 'https://streamango.com/embed/clapasobsptpkdfe/20170315_150006_mp4',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._og_search_title(webpage)
+
+        formats = []
+        for format_ in re.findall(r'({[^}]*\bsrc\s*:\s*[^}]*})', webpage):
+            video = self._parse_json(
+                format_, video_id, transform_source=js_to_json, fatal=False)
+            if not video:
+                continue
+            src = video.get('src')
+            if not src:
+                continue
+            ext = determine_ext(src, default_ext=None)
+            if video.get('type') == 'application/dash+xml' or ext == 'mpd':
+                formats.extend(self._extract_mpd_formats(
+                    src, video_id, mpd_id='dash', fatal=False))
+            else:
+                formats.append({
+                    'url': src,
+                    'ext': ext or 'mp4',
+                    'width': int_or_none(video.get('width')),
+                    'height': int_or_none(video.get('height')),
+                    'tbr': int_or_none(video.get('bitrate')),
+                })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'url': url,
+            'title': title,
+            'formats': formats,
+        }

+ 1 - 1
youtube_dl/extractor/ted.py

@@ -210,7 +210,7 @@ class TEDIE(InfoExtractor):
                     resources.get('stream'), video_name, 'mp4', m3u8_id=format_id, fatal=False))
                     resources.get('stream'), video_name, 'mp4', m3u8_id=format_id, fatal=False))
 
 
         m3u8_formats = list(filter(
         m3u8_formats = list(filter(
-            lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
+            lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none',
             formats))
             formats))
         if http_url:
         if http_url:
             for m3u8_format in m3u8_formats:
             for m3u8_format in m3u8_formats:

+ 1 - 2
youtube_dl/extractor/tvp.py

@@ -150,8 +150,7 @@ class TVPEmbedIE(InfoExtractor):
                 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
                 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
             self._sort_formats(m3u8_formats)
             self._sort_formats(m3u8_formats)
             m3u8_formats = list(filter(
             m3u8_formats = list(filter(
-                lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
-                m3u8_formats))
+                lambda f: f.get('vcodec') != 'none', m3u8_formats))
             formats.extend(m3u8_formats)
             formats.extend(m3u8_formats)
             for i, m3u8_format in enumerate(m3u8_formats, 2):
             for i, m3u8_format in enumerate(m3u8_formats, 2):
                 http_url = '%s-%d.mp4' % (video_url_base, i)
                 http_url = '%s-%d.mp4' % (video_url_base, i)

+ 23 - 12
youtube_dl/extractor/tvplayer.py

@@ -2,9 +2,13 @@
 from __future__ import unicode_literals
 from __future__ import unicode_literals
 
 
 from .common import InfoExtractor
 from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from ..compat import (
+    compat_HTTPError,
+    compat_str,
+)
 from ..utils import (
 from ..utils import (
     extract_attributes,
     extract_attributes,
+    try_get,
     urlencode_postdata,
     urlencode_postdata,
     ExtractorError,
     ExtractorError,
 )
 )
@@ -34,25 +38,32 @@ class TVPlayerIE(InfoExtractor):
             webpage, 'channel element'))
             webpage, 'channel element'))
         title = current_channel['data-name']
         title = current_channel['data-name']
 
 
-        resource_id = self._search_regex(
-            r'resourceId\s*=\s*"(\d+)"', webpage, 'resource id')
-        platform = self._search_regex(
-            r'platform\s*=\s*"([^"]+)"', webpage, 'platform')
+        resource_id = current_channel['data-id']
+
         token = self._search_regex(
         token = self._search_regex(
-            r'token\s*=\s*"([^"]+)"', webpage, 'token', default='null')
-        validate = self._search_regex(
-            r'validate\s*=\s*"([^"]+)"', webpage, 'validate', default='null')
+            r'data-token=(["\'])(?P<token>(?!\1).+)\1', webpage,
+            'token', group='token')
+
+        context = self._download_json(
+            'https://tvplayer.com/watch/context', display_id,
+            'Downloading JSON context', query={
+                'resource': resource_id,
+                'nonce': token,
+            })
+
+        validate = context['validate']
+        platform = try_get(
+            context, lambda x: x['platform']['key'], compat_str) or 'firefox'
 
 
         try:
         try:
             response = self._download_json(
             response = self._download_json(
                 'http://api.tvplayer.com/api/v2/stream/live',
                 'http://api.tvplayer.com/api/v2/stream/live',
-                resource_id, headers={
+                display_id, 'Downloading JSON stream', headers={
                     'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
                     'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
                 }, data=urlencode_postdata({
                 }, data=urlencode_postdata({
+                    'id': resource_id,
                     'service': 1,
                     'service': 1,
                     'platform': platform,
                     'platform': platform,
-                    'id': resource_id,
-                    'token': token,
                     'validate': validate,
                     'validate': validate,
                 }))['tvplayer']['response']
                 }))['tvplayer']['response']
         except ExtractorError as e:
         except ExtractorError as e:
@@ -63,7 +74,7 @@ class TVPlayerIE(InfoExtractor):
                     '%s said: %s' % (self.IE_NAME, response['error']), expected=True)
                     '%s said: %s' % (self.IE_NAME, response['error']), expected=True)
             raise
             raise
 
 
-        formats = self._extract_m3u8_formats(response['stream'], resource_id, 'mp4')
+        formats = self._extract_m3u8_formats(response['stream'], display_id, 'mp4')
         self._sort_formats(formats)
         self._sort_formats(formats)
 
 
         return {
         return {

+ 11 - 6
youtube_dl/extractor/vevo.py

@@ -1,6 +1,7 @@
 from __future__ import unicode_literals
 from __future__ import unicode_literals
 
 
 import re
 import re
+import json
 
 
 from .common import InfoExtractor
 from .common import InfoExtractor
 from ..compat import (
 from ..compat import (
@@ -11,7 +12,6 @@ from ..compat import (
 from ..utils import (
 from ..utils import (
     ExtractorError,
     ExtractorError,
     int_or_none,
     int_or_none,
-    sanitized_Request,
     parse_iso8601,
     parse_iso8601,
 )
 )
 
 
@@ -154,19 +154,24 @@ class VevoIE(VevoBaseIE):
     }
     }
 
 
     def _initialize_api(self, video_id):
     def _initialize_api(self, video_id):
-        req = sanitized_Request(
-            'http://www.vevo.com/auth', data=b'')
         webpage = self._download_webpage(
         webpage = self._download_webpage(
-            req, None,
+            'https://accounts.vevo.com/token', None,
             note='Retrieving oauth token',
             note='Retrieving oauth token',
-            errnote='Unable to retrieve oauth token')
+            errnote='Unable to retrieve oauth token',
+            data=json.dumps({
+                'client_id': 'SPupX1tvqFEopQ1YS6SS',
+                'grant_type': 'urn:vevo:params:oauth:grant-type:anonymous',
+            }).encode('utf-8'),
+            headers={
+                'Content-Type': 'application/json',
+            })
 
 
         if re.search(r'(?i)THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION', webpage):
         if re.search(r'(?i)THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION', webpage):
             self.raise_geo_restricted(
             self.raise_geo_restricted(
                 '%s said: This page is currently unavailable in your region' % self.IE_NAME)
                 '%s said: This page is currently unavailable in your region' % self.IE_NAME)
 
 
         auth_info = self._parse_json(webpage, video_id)
         auth_info = self._parse_json(webpage, video_id)
-        self._api_url_template = self.http_scheme() + '//apiv2.vevo.com/%s?token=' + auth_info['access_token']
+        self._api_url_template = self.http_scheme() + '//apiv2.vevo.com/%s?token=' + auth_info['legacy_token']
 
 
     def _call_api(self, path, *args, **kwargs):
     def _call_api(self, path, *args, **kwargs):
         try:
         try:

+ 3 - 6
youtube_dl/extractor/videopress.py

@@ -1,7 +1,6 @@
 # coding: utf-8
 # coding: utf-8
 from __future__ import unicode_literals
 from __future__ import unicode_literals
 
 
-import random
 import re
 import re
 
 
 from .common import InfoExtractor
 from .common import InfoExtractor
@@ -11,6 +10,7 @@ from ..utils import (
     float_or_none,
     float_or_none,
     parse_age_limit,
     parse_age_limit,
     qualities,
     qualities,
+    random_birthday,
     try_get,
     try_get,
     unified_timestamp,
     unified_timestamp,
     urljoin,
     urljoin,
@@ -47,13 +47,10 @@ class VideoPressIE(InfoExtractor):
     def _real_extract(self, url):
     def _real_extract(self, url):
         video_id = self._match_id(url)
         video_id = self._match_id(url)
 
 
+        query = random_birthday('birth_year', 'birth_month', 'birth_day')
         video = self._download_json(
         video = self._download_json(
             'https://public-api.wordpress.com/rest/v1.1/videos/%s' % video_id,
             'https://public-api.wordpress.com/rest/v1.1/videos/%s' % video_id,
-            video_id, query={
-                'birth_month': random.randint(1, 12),
-                'birth_day': random.randint(1, 31),
-                'birth_year': random.randint(1950, 1995),
-            })
+            video_id, query=query)
 
 
         title = video['title']
         title = video['title']
 
 

+ 5 - 2
youtube_dl/extractor/vidio.py

@@ -49,8 +49,11 @@ class VidioIE(InfoExtractor):
             thumbnail = clip.get('image')
             thumbnail = clip.get('image')
 
 
         m3u8_url = m3u8_url or self._search_regex(
         m3u8_url = m3u8_url or self._search_regex(
-            r'data(?:-vjs)?-clip-hls-url=(["\'])(?P<url>.+?)\1', webpage, 'hls url')
-        formats = self._extract_m3u8_formats(m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native')
+            r'data(?:-vjs)?-clip-hls-url=(["\'])(?P<url>(?!\1).+)\1',
+            webpage, 'hls url')
+        formats = self._extract_m3u8_formats(
+            m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native')
+        self._sort_formats(formats)
 
 
         duration = int_or_none(duration or self._search_regex(
         duration = int_or_none(duration or self._search_regex(
             r'data-video-duration=(["\'])(?P<duartion>\d+)\1', webpage, 'duration'))
             r'data-video-duration=(["\'])(?P<duartion>\d+)\1', webpage, 'duration'))

+ 6 - 5
youtube_dl/extractor/vidzi.py

@@ -42,14 +42,15 @@ class VidziIE(InfoExtractor):
         title = self._html_search_regex(
         title = self._html_search_regex(
             r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title')
             r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title')
 
 
-        packed_codes = [mobj.group(0) for mobj in re.finditer(
-            PACKED_CODES_RE, webpage)]
-        for num, pc in enumerate(packed_codes, 1):
-            code = decode_packed_codes(pc).replace('\\\'', '\'')
+        codes = [webpage]
+        codes.extend([
+            decode_packed_codes(mobj.group(0)).replace('\\\'', '\'')
+            for mobj in re.finditer(PACKED_CODES_RE, webpage)])
+        for num, code in enumerate(codes, 1):
             jwplayer_data = self._parse_json(
             jwplayer_data = self._parse_json(
                 self._search_regex(
                 self._search_regex(
                     r'setup\(([^)]+)\)', code, 'jwplayer data',
                     r'setup\(([^)]+)\)', code, 'jwplayer data',
-                    default=NO_DEFAULT if num == len(packed_codes) else '{}'),
+                    default=NO_DEFAULT if num == len(codes) else '{}'),
                 video_id, transform_source=js_to_json)
                 video_id, transform_source=js_to_json)
             if jwplayer_data:
             if jwplayer_data:
                 break
                 break

+ 1 - 2
youtube_dl/extractor/viewster.py

@@ -176,8 +176,7 @@ class ViewsterIE(InfoExtractor):
                     if m3u8_formats:
                     if m3u8_formats:
                         self._sort_formats(m3u8_formats)
                         self._sort_formats(m3u8_formats)
                         m3u8_formats = list(filter(
                         m3u8_formats = list(filter(
-                            lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
-                            m3u8_formats))
+                            lambda f: f.get('vcodec') != 'none', m3u8_formats))
                     if len(qualities) == len(m3u8_formats):
                     if len(qualities) == len(m3u8_formats):
                         for q, m3u8_format in zip(qualities, m3u8_formats):
                         for q, m3u8_format in zip(qualities, m3u8_formats):
                             f = m3u8_format.copy()
                             f = m3u8_format.copy()

+ 6 - 0
youtube_dl/extractor/washingtonpost.py

@@ -13,6 +13,7 @@ from ..utils import (
 class WashingtonPostIE(InfoExtractor):
 class WashingtonPostIE(InfoExtractor):
     IE_NAME = 'washingtonpost'
     IE_NAME = 'washingtonpost'
     _VALID_URL = r'(?:washingtonpost:|https?://(?:www\.)?washingtonpost\.com/video/(?:[^/]+/)*)(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
     _VALID_URL = r'(?:washingtonpost:|https?://(?:www\.)?washingtonpost\.com/video/(?:[^/]+/)*)(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+    _EMBED_URL = r'https?://(?:www\.)?washingtonpost\.com/video/c/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
     _TEST = {
     _TEST = {
         'url': 'https://www.washingtonpost.com/video/c/video/480ba4ee-1ec7-11e6-82c2-a7dcb313287d',
         'url': 'https://www.washingtonpost.com/video/c/video/480ba4ee-1ec7-11e6-82c2-a7dcb313287d',
         'md5': '6f537e1334b714eb15f9563bd4b9cdfa',
         'md5': '6f537e1334b714eb15f9563bd4b9cdfa',
@@ -27,6 +28,11 @@ class WashingtonPostIE(InfoExtractor):
         },
         },
     }
     }
 
 
+    @classmethod
+    def _extract_urls(cls, webpage):
+        return re.findall(
+            r'<iframe[^>]+\bsrc=["\'](%s)' % cls._EMBED_URL, webpage)
+
     def _real_extract(self, url):
     def _real_extract(self, url):
         video_id = self._match_id(url)
         video_id = self._match_id(url)
         video_data = self._download_json(
         video_data = self._download_json(

+ 40 - 12
youtube_dl/extractor/wsj.py

@@ -10,12 +10,14 @@ from ..utils import (
 
 
 
 
 class WSJIE(InfoExtractor):
 class WSJIE(InfoExtractor):
-    _VALID_URL = r'''(?x)https?://
-        (?:
-            video-api\.wsj\.com/api-video/player/iframe\.html\?guid=|
-            (?:www\.)?wsj\.com/video/[^/]+/
-        )
-        (?P<id>[a-zA-Z0-9-]+)'''
+    _VALID_URL = r'''(?x)
+                        (?:
+                            https?://video-api\.wsj\.com/api-video/player/iframe\.html\?.*?\bguid=|
+                            https?://(?:www\.)?wsj\.com/video/[^/]+/|
+                            wsj:
+                        )
+                        (?P<id>[a-fA-F0-9-]{36})
+                    '''
     IE_DESC = 'Wall Street Journal'
     IE_DESC = 'Wall Street Journal'
     _TESTS = [{
     _TESTS = [{
         'url': 'http://video-api.wsj.com/api-video/player/iframe.html?guid=1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A',
         'url': 'http://video-api.wsj.com/api-video/player/iframe.html?guid=1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A',
@@ -38,12 +40,17 @@ class WSJIE(InfoExtractor):
     def _real_extract(self, url):
     def _real_extract(self, url):
         video_id = self._match_id(url)
         video_id = self._match_id(url)
 
 
-        api_url = (
-            'http://video-api.wsj.com/api-video/find_all_videos.asp?'
-            'type=guid&count=1&query=%s&fields=type,hls,videoMP4List,'
-            'thumbnailList,author,description,name,duration,videoURL,'
-            'titletag,formattedCreationDate,keywords,editor' % video_id)
-        info = self._download_json(api_url, video_id)['items'][0]
+        info = self._download_json(
+            'http://video-api.wsj.com/api-video/find_all_videos.asp', video_id,
+            query={
+                'type': 'guid',
+                'count': 1,
+                'query': video_id,
+                'fields': ','.join((
+                    'type', 'hls', 'videoMP4List', 'thumbnailList', 'author',
+                    'description', 'name', 'duration', 'videoURL', 'titletag',
+                    'formattedCreationDate', 'keywords', 'editor')),
+            })['items'][0]
         title = info.get('name', info.get('titletag'))
         title = info.get('name', info.get('titletag'))
 
 
         formats = []
         formats = []
@@ -87,3 +94,24 @@ class WSJIE(InfoExtractor):
             'title': title,
             'title': title,
             'categories': info.get('keywords'),
             'categories': info.get('keywords'),
         }
         }
+
+
+class WSJArticleIE(InfoExtractor):
+    _VALID_URL = r'(?i)https?://(?:www\.)?wsj\.com/articles/(?P<id>[^/?#&]+)'
+    _TEST = {
+        'url': 'https://www.wsj.com/articles/dont-like-china-no-pandas-for-you-1490366939?',
+        'info_dict': {
+            'id': '4B13FA62-1D8C-45DB-8EA1-4105CB20B362',
+            'ext': 'mp4',
+            'upload_date': '20170221',
+            'uploader_id': 'ralcaraz',
+            'title': 'Bao Bao the Panda Leaves for China',
+        }
+    }
+
+    def _real_extract(self, url):
+        article_id = self._match_id(url)
+        webpage = self._download_webpage(url, article_id)
+        video_id = self._search_regex(
+            r'data-src=["\']([a-fA-F0-9-]{36})', webpage, 'video id')
+        return self.url_result('wsj:%s' % video_id, WSJIE.ie_key(), video_id)

+ 14 - 14
youtube_dl/extractor/xfileshare.py

@@ -17,24 +17,24 @@ from ..utils import (
 
 
 class XFileShareIE(InfoExtractor):
 class XFileShareIE(InfoExtractor):
     _SITES = (
     _SITES = (
-        ('daclips.in', 'DaClips'),
-        ('filehoot.com', 'FileHoot'),
-        ('gorillavid.in', 'GorillaVid'),
-        ('movpod.in', 'MovPod'),
-        ('powerwatch.pw', 'PowerWatch'),
-        ('rapidvideo.ws', 'Rapidvideo.ws'),
-        ('thevideobee.to', 'TheVideoBee'),
-        ('vidto.me', 'Vidto'),
-        ('streamin.to', 'Streamin.To'),
-        ('xvidstage.com', 'XVIDSTAGE'),
-        ('vidabc.com', 'Vid ABC'),
-        ('vidbom.com', 'VidBom'),
-        ('vidlo.us', 'vidlo'),
+        (r'daclips\.(?:in|com)', 'DaClips'),
+        (r'filehoot\.com', 'FileHoot'),
+        (r'gorillavid\.(?:in|com)', 'GorillaVid'),
+        (r'movpod\.in', 'MovPod'),
+        (r'powerwatch\.pw', 'PowerWatch'),
+        (r'rapidvideo\.ws', 'Rapidvideo.ws'),
+        (r'thevideobee\.to', 'TheVideoBee'),
+        (r'vidto\.me', 'Vidto'),
+        (r'streamin\.to', 'Streamin.To'),
+        (r'xvidstage\.com', 'XVIDSTAGE'),
+        (r'vidabc\.com', 'Vid ABC'),
+        (r'vidbom\.com', 'VidBom'),
+        (r'vidlo\.us', 'vidlo'),
     )
     )
 
 
     IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1])
     IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1])
     _VALID_URL = (r'https?://(?P<host>(?:www\.)?(?:%s))/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
     _VALID_URL = (r'https?://(?P<host>(?:www\.)?(?:%s))/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
-                  % '|'.join(re.escape(site) for site in list(zip(*_SITES))[0]))
+                  % '|'.join(site for site in list(zip(*_SITES))[0]))
 
 
     _FILE_NOT_FOUND_REGEXES = (
     _FILE_NOT_FOUND_REGEXES = (
         r'>(?:404 - )?File Not Found<',
         r'>(?:404 - )?File Not Found<',

+ 21 - 2
youtube_dl/extractor/xtube.py

@@ -6,6 +6,7 @@ import re
 from .common import InfoExtractor
 from .common import InfoExtractor
 from ..utils import (
 from ..utils import (
     int_or_none,
     int_or_none,
+    js_to_json,
     orderedSet,
     orderedSet,
     parse_duration,
     parse_duration,
     sanitized_Request,
     sanitized_Request,
@@ -37,6 +38,22 @@ class XTubeIE(InfoExtractor):
             'comment_count': int,
             'comment_count': int,
             'age_limit': 18,
             'age_limit': 18,
         }
         }
+    }, {
+        # FLV videos with duplicated formats
+        'url': 'http://www.xtube.com/video-watch/A-Super-Run-Part-1-YT-9299752',
+        'md5': 'a406963eb349dd43692ec54631efd88b',
+        'info_dict': {
+            'id': '9299752',
+            'display_id': 'A-Super-Run-Part-1-YT',
+            'ext': 'flv',
+            'title': 'A Super Run - Part 1 (YT)',
+            'description': 'md5:ca0d47afff4a9b2942e4b41aa970fd93',
+            'uploader': 'tshirtguy59',
+            'duration': 579,
+            'view_count': int,
+            'comment_count': int,
+            'age_limit': 18,
+        },
     }, {
     }, {
         # new URL schema
         # new URL schema
         'url': 'http://www.xtube.com/video-watch/strange-erotica-625837',
         'url': 'http://www.xtube.com/video-watch/strange-erotica-625837',
@@ -68,8 +85,9 @@ class XTubeIE(InfoExtractor):
             })
             })
 
 
         sources = self._parse_json(self._search_regex(
         sources = self._parse_json(self._search_regex(
-            r'(["\'])sources\1\s*:\s*(?P<sources>{.+?}),',
-            webpage, 'sources', group='sources'), video_id)
+            r'(["\'])?sources\1?\s*:\s*(?P<sources>{.+?}),',
+            webpage, 'sources', group='sources'), video_id,
+            transform_source=js_to_json)
 
 
         formats = []
         formats = []
         for format_id, format_url in sources.items():
         for format_id, format_url in sources.items():
@@ -78,6 +96,7 @@ class XTubeIE(InfoExtractor):
                 'format_id': format_id,
                 'format_id': format_id,
                 'height': int_or_none(format_id),
                 'height': int_or_none(format_id),
             })
             })
+        self._remove_duplicate_formats(formats)
         self._sort_formats(formats)
         self._sort_formats(formats)
 
 
         title = self._search_regex(
         title = self._search_regex(

+ 10 - 1
youtube_dl/extractor/xvideos.py

@@ -6,8 +6,10 @@ from .common import InfoExtractor
 from ..compat import compat_urllib_parse_unquote
 from ..compat import compat_urllib_parse_unquote
 from ..utils import (
 from ..utils import (
     clean_html,
     clean_html,
-    ExtractorError,
     determine_ext,
     determine_ext,
+    ExtractorError,
+    int_or_none,
+    parse_duration,
 )
 )
 
 
 
 
@@ -20,6 +22,7 @@ class XVideosIE(InfoExtractor):
             'id': '4588838',
             'id': '4588838',
             'ext': 'mp4',
             'ext': 'mp4',
             'title': 'Biker Takes his Girl',
             'title': 'Biker Takes his Girl',
+            'duration': 108,
             'age_limit': 18,
             'age_limit': 18,
         }
         }
     }
     }
@@ -36,6 +39,11 @@ class XVideosIE(InfoExtractor):
             r'<title>(.*?)\s+-\s+XVID', webpage, 'title')
             r'<title>(.*?)\s+-\s+XVID', webpage, 'title')
         video_thumbnail = self._search_regex(
         video_thumbnail = self._search_regex(
             r'url_bigthumb=(.+?)&amp', webpage, 'thumbnail', fatal=False)
             r'url_bigthumb=(.+?)&amp', webpage, 'thumbnail', fatal=False)
+        video_duration = int_or_none(self._og_search_property(
+            'duration', webpage, default=None)) or parse_duration(
+            self._search_regex(
+                r'<span[^>]+class=["\']duration["\'][^>]*>.*?(\d[^<]+)',
+                webpage, 'duration', fatal=False))
 
 
         formats = []
         formats = []
 
 
@@ -67,6 +75,7 @@ class XVideosIE(InfoExtractor):
             'id': video_id,
             'id': video_id,
             'formats': formats,
             'formats': formats,
             'title': video_title,
             'title': video_title,
+            'duration': video_duration,
             'thumbnail': video_thumbnail,
             'thumbnail': video_thumbnail,
             'age_limit': 18,
             'age_limit': 18,
         }
         }

+ 1 - 1
youtube_dl/extractor/yahoo.py

@@ -258,7 +258,7 @@ class YahooIE(InfoExtractor):
             return self.url_result(bc_url, BrightcoveLegacyIE.ie_key())
             return self.url_result(bc_url, BrightcoveLegacyIE.ie_key())
 
 
         # Look for Brightcove New Studio embeds
         # Look for Brightcove New Studio embeds
-        bc_url = BrightcoveNewIE._extract_url(webpage)
+        bc_url = BrightcoveNewIE._extract_url(self, webpage)
         if bc_url:
         if bc_url:
             return self.url_result(bc_url, BrightcoveNewIE.ie_key())
             return self.url_result(bc_url, BrightcoveNewIE.ie_key())
 
 

+ 2 - 1
youtube_dl/extractor/yandexmusic.py

@@ -234,7 +234,8 @@ class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
                 'overembed': 'false',
                 'overembed': 'false',
             })['playlist']
             })['playlist']
 
 
-        tracks, track_ids = playlist['tracks'], map(compat_str, playlist['trackIds'])
+        tracks = playlist['tracks']
+        track_ids = [compat_str(track_id) for track_id in playlist['trackIds']]
 
 
         # tracks dictionary shipped with playlist.jsx API is limited to 150 tracks,
         # tracks dictionary shipped with playlist.jsx API is limited to 150 tracks,
         # missing tracks should be retrieved manually.
         # missing tracks should be retrieved manually.

+ 3 - 2
youtube_dl/extractor/youtube.py

@@ -963,7 +963,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
 
     def _extract_signature_function(self, video_id, player_url, example_sig):
     def _extract_signature_function(self, video_id, player_url, example_sig):
         id_m = re.match(
         id_m = re.match(
-            r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|/base)?\.(?P<ext>[a-z]+)$',
+            r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
             player_url)
             player_url)
         if not id_m:
         if not id_m:
             raise ExtractorError('Cannot identify player %r' % player_url)
             raise ExtractorError('Cannot identify player %r' % player_url)
@@ -1629,7 +1629,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                                 player_desc = 'flash player %s' % player_version
                                 player_desc = 'flash player %s' % player_version
                             else:
                             else:
                                 player_version = self._search_regex(
                                 player_version = self._search_regex(
-                                    [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js', r'(?:www|player)-([^/]+)/base\.js'],
+                                    [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
+                                     r'(?:www|player)-([^/]+)(?:/[a-z]{2}_[A-Z]{2})?/base\.js'],
                                     player_url,
                                     player_url,
                                     'html5 player', fatal=False)
                                     'html5 player', fatal=False)
                                 player_desc = 'html5 player %s' % player_version
                                 player_desc = 'html5 player %s' % player_version

+ 101 - 0
youtube_dl/extractor/zaq1.py

@@ -0,0 +1,101 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    unified_timestamp,
+)
+
+
+class Zaq1IE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?zaq1\.pl/video/(?P<id>[^/?#&]+)'
+    _TESTS = [{
+        'url': 'http://zaq1.pl/video/xev0e',
+        'md5': '24a5eb3f052e604ae597c4d0d19b351e',
+        'info_dict': {
+            'id': 'xev0e',
+            'title': 'DJ NA WESELE. TANIEC Z FIGURAMI.węgrów/sokołów podlaski/siedlce/mińsk mazowiecki/warszawa',
+            'description': 'www.facebook.com/weseledjKontakt: 728 448 199 / 505 419 147',
+            'ext': 'mp4',
+            'duration': 511,
+            'timestamp': 1490896361,
+            'uploader': 'Anonim',
+            'upload_date': '20170330',
+            'view_count': int,
+        }
+    }, {
+        # malformed JSON-LD
+        'url': 'http://zaq1.pl/video/x81vn',
+        'info_dict': {
+            'id': 'x81vn',
+            'title': 'SEKRETNE ŻYCIE WALTERA MITTY',
+            'ext': 'mp4',
+            'duration': 6234,
+            'timestamp': 1493494860,
+            'uploader': 'Anonim',
+            'upload_date': '20170429',
+            'view_count': int,
+        },
+        'params': {
+            'skip_download': True,
+        },
+        'expected_warnings': ['Failed to parse JSON'],
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        video_url = self._search_regex(
+            r'data-video-url=(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+            'video url', group='url')
+
+        info = self._search_json_ld(webpage, video_id, fatal=False)
+
+        def extract_data(field, name, fatal=False):
+            return self._search_regex(
+                r'data-%s=(["\'])(?P<field>(?:(?!\1).)+)\1' % field,
+                webpage, field, fatal=fatal, group='field')
+
+        if not info.get('title'):
+            info['title'] = extract_data('file-name', 'title', fatal=True)
+
+        if not info.get('duration'):
+            info['duration'] = int_or_none(extract_data('duration', 'duration'))
+
+        if not info.get('thumbnail'):
+            info['thumbnail'] = extract_data('photo-url', 'thumbnail')
+
+        if not info.get('timestamp'):
+            info['timestamp'] = unified_timestamp(self._html_search_meta(
+                'uploadDate', webpage, 'timestamp'))
+
+        if not info.get('interactionCount'):
+            info['view_count'] = int_or_none(self._html_search_meta(
+                'interactionCount', webpage, 'view count'))
+
+        uploader = self._html_search_regex(
+            r'Wideo dodał:\s*<a[^>]*>([^<]+)</a>', webpage, 'uploader',
+            fatal=False)
+
+        width = int_or_none(self._html_search_meta(
+            'width', webpage, fatal=False))
+        height = int_or_none(self._html_search_meta(
+            'height', webpage, fatal=False))
+
+        info.update({
+            'id': video_id,
+            'formats': [{
+                'url': video_url,
+                'width': width,
+                'height': height,
+                'http_headers': {
+                    'Referer': url,
+                },
+            }],
+            'uploader': uploader,
+        })
+
+        return info

+ 4 - 0
youtube_dl/options.py

@@ -468,6 +468,10 @@ def parseOpts(overrideArguments=None):
         '--abort-on-unavailable-fragment',
         '--abort-on-unavailable-fragment',
         action='store_false', dest='skip_unavailable_fragments',
         action='store_false', dest='skip_unavailable_fragments',
         help='Abort downloading when some fragment is not available')
         help='Abort downloading when some fragment is not available')
+    downloader.add_option(
+        '--keep-fragments',
+        action='store_true', dest='keep_fragments', default=False,
+        help='Keep downloaded fragments on disk after downloading is finished; fragments are erased by default')
     downloader.add_option(
     downloader.add_option(
         '--buffer-size',
         '--buffer-size',
         dest='buffersize', metavar='SIZE', default='1024',
         dest='buffersize', metavar='SIZE', default='1024',

+ 28 - 3
youtube_dl/postprocessor/ffmpeg.py

@@ -4,6 +4,7 @@ import io
 import os
 import os
 import subprocess
 import subprocess
 import time
 import time
+import re
 
 
 
 
 from .common import AudioConversionError, PostProcessor
 from .common import AudioConversionError, PostProcessor
@@ -22,6 +23,7 @@ from ..utils import (
     subtitles_filename,
     subtitles_filename,
     dfxp2srt,
     dfxp2srt,
     ISO639Utils,
     ISO639Utils,
+    replace_extension,
 )
 )
 
 
 
 
@@ -429,17 +431,40 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
 
 
         filename = info['filepath']
         filename = info['filepath']
         temp_filename = prepend_extension(filename, 'temp')
         temp_filename = prepend_extension(filename, 'temp')
+        in_filenames = [filename]
+        options = []
 
 
         if info['ext'] == 'm4a':
         if info['ext'] == 'm4a':
-            options = ['-vn', '-acodec', 'copy']
+            options.extend(['-vn', '-acodec', 'copy'])
         else:
         else:
-            options = ['-c', 'copy']
+            options.extend(['-c', 'copy'])
 
 
         for (name, value) in metadata.items():
         for (name, value) in metadata.items():
             options.extend(['-metadata', '%s=%s' % (name, value)])
             options.extend(['-metadata', '%s=%s' % (name, value)])
 
 
+        chapters = info.get('chapters', [])
+        if chapters:
+            metadata_filename = encodeFilename(replace_extension(filename, 'meta'))
+            with io.open(metadata_filename, 'wt', encoding='utf-8') as f:
+                def ffmpeg_escape(text):
+                    return re.sub(r'(=|;|#|\\|\n)', r'\\\1', text)
+
+                metadata_file_content = ';FFMETADATA1\n'
+                for chapter in chapters:
+                    metadata_file_content += '[CHAPTER]\nTIMEBASE=1/1000\n'
+                    metadata_file_content += 'START=%d\n' % (chapter['start_time'] * 1000)
+                    metadata_file_content += 'END=%d\n' % (chapter['end_time'] * 1000)
+                    chapter_title = chapter.get('title')
+                    if chapter_title:
+                        metadata_file_content += 'title=%s\n' % ffmpeg_escape(chapter_title)
+                f.write(metadata_file_content)
+                in_filenames.append(metadata_filename)
+                options.extend(['-map_metadata', '1'])
+
         self._downloader.to_screen('[ffmpeg] Adding metadata to \'%s\'' % filename)
         self._downloader.to_screen('[ffmpeg] Adding metadata to \'%s\'' % filename)
-        self.run_ffmpeg(filename, temp_filename, options)
+        self.run_ffmpeg_multiple_files(in_filenames, temp_filename, options)
+        if chapters:
+            os.remove(metadata_filename)
         os.remove(encodeFilename(filename))
         os.remove(encodeFilename(filename))
         os.rename(encodeFilename(temp_filename), encodeFilename(filename))
         os.rename(encodeFilename(temp_filename), encodeFilename(filename))
         return [], info
         return [], info

+ 3 - 2
youtube_dl/socks.py

@@ -193,9 +193,10 @@ class sockssocket(socket.socket):
 
 
         self._check_response_version(SOCKS5_VERSION, version)
         self._check_response_version(SOCKS5_VERSION, version)
 
 
-        if method == Socks5Auth.AUTH_NO_ACCEPTABLE:
+        if method == Socks5Auth.AUTH_NO_ACCEPTABLE or (
+                method == Socks5Auth.AUTH_USER_PASS and (not self._proxy.username or not self._proxy.password)):
             self.close()
             self.close()
-            raise Socks5Error(method)
+            raise Socks5Error(Socks5Auth.AUTH_NO_ACCEPTABLE)
 
 
         if method == Socks5Auth.AUTH_USER_PASS:
         if method == Socks5Auth.AUTH_USER_PASS:
             username = self._proxy.username.encode('utf-8')
             username = self._proxy.username.encode('utf-8')

+ 189 - 21
youtube_dl/utils.py

@@ -11,6 +11,7 @@ import contextlib
 import ctypes
 import ctypes
 import datetime
 import datetime
 import email.utils
 import email.utils
+import email.header
 import errno
 import errno
 import functools
 import functools
 import gzip
 import gzip
@@ -421,8 +422,8 @@ def clean_html(html):
 
 
     # Newline vs <br />
     # Newline vs <br />
     html = html.replace('\n', ' ')
     html = html.replace('\n', ' ')
-    html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
-    html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
+    html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
+    html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
     # Strip html tags
     # Strip html tags
     html = re.sub('<.*?>', '', html)
     html = re.sub('<.*?>', '', html)
     # Replace html entities
     # Replace html entities
@@ -1194,6 +1195,11 @@ def unified_timestamp(date_str, day_first=True):
     # Remove AM/PM + timezone
     # Remove AM/PM + timezone
     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
 
 
+    # Remove unrecognized timezones from ISO 8601 alike timestamps
+    m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
+    if m:
+        date_str = date_str[:-len(m.group('tz'))]
+
     for expression in date_formats(day_first):
     for expression in date_formats(day_first):
         try:
         try:
             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
@@ -2092,6 +2098,58 @@ def update_Request(req, url=None, data=None, headers={}, query={}):
     return new_req
     return new_req
 
 
 
 
+def try_multipart_encode(data, boundary):
+    content_type = 'multipart/form-data; boundary=%s' % boundary
+
+    out = b''
+    for k, v in data.items():
+        out += b'--' + boundary.encode('ascii') + b'\r\n'
+        if isinstance(k, compat_str):
+            k = k.encode('utf-8')
+        if isinstance(v, compat_str):
+            v = v.encode('utf-8')
+        # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
+        # suggests sending UTF-8 directly. Firefox sends UTF-8, too
+        content = b'Content-Disposition: form-data; name="%s"\r\n\r\n' % k + v + b'\r\n'
+        if boundary.encode('ascii') in content:
+            raise ValueError('Boundary overlaps with data')
+        out += content
+
+    out += b'--' + boundary.encode('ascii') + b'--\r\n'
+
+    return out, content_type
+
+
+def multipart_encode(data, boundary=None):
+    '''
+    Encode a dict to RFC 7578-compliant form-data
+
+    data:
+        A dict where keys and values can be either Unicode or bytes-like
+        objects.
+    boundary:
+        If specified a Unicode object, it's used as the boundary. Otherwise
+        a random boundary is generated.
+
+    Reference: https://tools.ietf.org/html/rfc7578
+    '''
+    has_specified_boundary = boundary is not None
+
+    while True:
+        if boundary is None:
+            boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
+
+        try:
+            out, content_type = try_multipart_encode(data, boundary)
+            break
+        except ValueError:
+            if has_specified_boundary:
+                raise
+            boundary = None
+
+    return out, content_type
+
+
 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
     if isinstance(key_or_keys, (list, tuple)):
     if isinstance(key_or_keys, (list, tuple)):
         for key in key_or_keys:
         for key in key_or_keys:
@@ -2103,13 +2161,16 @@ def dict_get(d, key_or_keys, default=None, skip_false_values=True):
 
 
 
 
 def try_get(src, getter, expected_type=None):
 def try_get(src, getter, expected_type=None):
-    try:
-        v = getter(src)
-    except (AttributeError, KeyError, TypeError, IndexError):
-        pass
-    else:
-        if expected_type is None or isinstance(v, expected_type):
-            return v
+    if not isinstance(getter, (list, tuple)):
+        getter = [getter]
+    for get in getter:
+        try:
+            v = get(src)
+        except (AttributeError, KeyError, TypeError, IndexError):
+            pass
+        else:
+            if expected_type is None or isinstance(v, expected_type):
+                return v
 
 
 
 
 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
@@ -2270,10 +2331,8 @@ def mimetype2ext(mt):
     return {
     return {
         '3gpp': '3gp',
         '3gpp': '3gp',
         'smptett+xml': 'tt',
         'smptett+xml': 'tt',
-        'srt': 'srt',
         'ttaf+xml': 'dfxp',
         'ttaf+xml': 'dfxp',
         'ttml+xml': 'ttml',
         'ttml+xml': 'ttml',
-        'vtt': 'vtt',
         'x-flv': 'flv',
         'x-flv': 'flv',
         'x-mp4-fragmented': 'mp4',
         'x-mp4-fragmented': 'mp4',
         'x-ms-wmv': 'wmv',
         'x-ms-wmv': 'wmv',
@@ -2281,11 +2340,11 @@ def mimetype2ext(mt):
         'x-mpegurl': 'm3u8',
         'x-mpegurl': 'm3u8',
         'vnd.apple.mpegurl': 'm3u8',
         'vnd.apple.mpegurl': 'm3u8',
         'dash+xml': 'mpd',
         'dash+xml': 'mpd',
-        'f4m': 'f4m',
         'f4m+xml': 'f4m',
         'f4m+xml': 'f4m',
         'hds+xml': 'f4m',
         'hds+xml': 'f4m',
         'vnd.ms-sstr+xml': 'ism',
         'vnd.ms-sstr+xml': 'ism',
         'quicktime': 'mov',
         'quicktime': 'mov',
+        'mp2t': 'ts',
     }.get(res, res)
     }.get(res, res)
 
 
 
 
@@ -2508,27 +2567,97 @@ def srt_subtitles_timecode(seconds):
 
 
 
 
 def dfxp2srt(dfxp_data):
 def dfxp2srt(dfxp_data):
+    LEGACY_NAMESPACES = (
+        ('http://www.w3.org/ns/ttml', [
+            'http://www.w3.org/2004/11/ttaf1',
+            'http://www.w3.org/2006/04/ttaf1',
+            'http://www.w3.org/2006/10/ttaf1',
+        ]),
+        ('http://www.w3.org/ns/ttml#styling', [
+            'http://www.w3.org/ns/ttml#style',
+        ]),
+    )
+
+    SUPPORTED_STYLING = [
+        'color',
+        'fontFamily',
+        'fontSize',
+        'fontStyle',
+        'fontWeight',
+        'textDecoration'
+    ]
+
     _x = functools.partial(xpath_with_ns, ns_map={
     _x = functools.partial(xpath_with_ns, ns_map={
         'ttml': 'http://www.w3.org/ns/ttml',
         'ttml': 'http://www.w3.org/ns/ttml',
-        'ttaf1': 'http://www.w3.org/2006/10/ttaf1',
-        'ttaf1_0604': 'http://www.w3.org/2006/04/ttaf1',
+        'tts': 'http://www.w3.org/ns/ttml#styling',
     })
     })
 
 
+    styles = {}
+    default_style = {}
+
     class TTMLPElementParser(object):
     class TTMLPElementParser(object):
-        out = ''
+        _out = ''
+        _unclosed_elements = []
+        _applied_styles = []
 
 
         def start(self, tag, attrib):
         def start(self, tag, attrib):
-            if tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'):
-                self.out += '\n'
+            if tag in (_x('ttml:br'), 'br'):
+                self._out += '\n'
+            else:
+                unclosed_elements = []
+                style = {}
+                element_style_id = attrib.get('style')
+                if default_style:
+                    style.update(default_style)
+                if element_style_id:
+                    style.update(styles.get(element_style_id, {}))
+                for prop in SUPPORTED_STYLING:
+                    prop_val = attrib.get(_x('tts:' + prop))
+                    if prop_val:
+                        style[prop] = prop_val
+                if style:
+                    font = ''
+                    for k, v in sorted(style.items()):
+                        if self._applied_styles and self._applied_styles[-1].get(k) == v:
+                            continue
+                        if k == 'color':
+                            font += ' color="%s"' % v
+                        elif k == 'fontSize':
+                            font += ' size="%s"' % v
+                        elif k == 'fontFamily':
+                            font += ' face="%s"' % v
+                        elif k == 'fontWeight' and v == 'bold':
+                            self._out += '<b>'
+                            unclosed_elements.append('b')
+                        elif k == 'fontStyle' and v == 'italic':
+                            self._out += '<i>'
+                            unclosed_elements.append('i')
+                        elif k == 'textDecoration' and v == 'underline':
+                            self._out += '<u>'
+                            unclosed_elements.append('u')
+                    if font:
+                        self._out += '<font' + font + '>'
+                        unclosed_elements.append('font')
+                    applied_style = {}
+                    if self._applied_styles:
+                        applied_style.update(self._applied_styles[-1])
+                    applied_style.update(style)
+                    self._applied_styles.append(applied_style)
+                self._unclosed_elements.append(unclosed_elements)
 
 
         def end(self, tag):
         def end(self, tag):
-            pass
+            if tag not in (_x('ttml:br'), 'br'):
+                unclosed_elements = self._unclosed_elements.pop()
+                for element in reversed(unclosed_elements):
+                    self._out += '</%s>' % element
+                if unclosed_elements and self._applied_styles:
+                    self._applied_styles.pop()
 
 
         def data(self, data):
         def data(self, data):
-            self.out += data
+            self._out += data
 
 
         def close(self):
         def close(self):
-            return self.out.strip()
+            return self._out.strip()
 
 
     def parse_node(node):
     def parse_node(node):
         target = TTMLPElementParser()
         target = TTMLPElementParser()
@@ -2536,13 +2665,45 @@ def dfxp2srt(dfxp_data):
         parser.feed(xml.etree.ElementTree.tostring(node))
         parser.feed(xml.etree.ElementTree.tostring(node))
         return parser.close()
         return parser.close()
 
 
+    for k, v in LEGACY_NAMESPACES:
+        for ns in v:
+            dfxp_data = dfxp_data.replace(ns, k)
+
     dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8'))
     dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8'))
     out = []
     out = []
-    paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall(_x('.//ttaf1_0604:p')) or dfxp.findall('.//p')
+    paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
 
 
     if not paras:
     if not paras:
         raise ValueError('Invalid dfxp/TTML subtitle')
         raise ValueError('Invalid dfxp/TTML subtitle')
 
 
+    repeat = False
+    while True:
+        for style in dfxp.findall(_x('.//ttml:style')):
+            style_id = style.get('id')
+            parent_style_id = style.get('style')
+            if parent_style_id:
+                if parent_style_id not in styles:
+                    repeat = True
+                    continue
+                styles[style_id] = styles[parent_style_id].copy()
+            for prop in SUPPORTED_STYLING:
+                prop_val = style.get(_x('tts:' + prop))
+                if prop_val:
+                    styles.setdefault(style_id, {})[prop] = prop_val
+        if repeat:
+            repeat = False
+        else:
+            break
+
+    for p in ('body', 'div'):
+        ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
+        if ele is None:
+            continue
+        style = styles.get(ele.get('style'))
+        if not style:
+            continue
+        default_style.update(style)
+
     for para, index in zip(paras, itertools.count(1)):
     for para, index in zip(paras, itertools.count(1)):
         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
@@ -3862,3 +4023,10 @@ class PhantomJSwrapper(object):
 
 
         return (html, encodeArgument(out))
         return (html, encodeArgument(out))
 
 
+
+def random_birthday(year_field, month_field, day_field):
+    return {
+        year_field: str(random.randint(1950, 1995)),
+        month_field: str(random.randint(1, 12)),
+        day_field: str(random.randint(1, 31)),
+    }

+ 1 - 1
youtube_dl/version.py

@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 from __future__ import unicode_literals
 
 
-__version__ = '2017.04.15'
+__version__ = '2017.05.01'