Browse Source

Merge branch 'UP/youtube-dl' into dl/YoutubeSearchURLIE

pukkandan 3 years ago
parent
commit
a3373da70c
100 changed files with 4912 additions and 2600 deletions
  1. 3 3
      .github/ISSUE_TEMPLATE/1_broken_site.md
  2. 2 2
      .github/ISSUE_TEMPLATE/2_site_support_request.md
  3. 2 2
      .github/ISSUE_TEMPLATE/3_site_feature_request.md
  4. 3 3
      .github/ISSUE_TEMPLATE/4_bug_report.md
  5. 2 2
      .github/ISSUE_TEMPLATE/5_feature_request.md
  6. 12 5
      .github/workflows/ci.yml
  7. 1 0
      AUTHORS
  8. 394 0
      ChangeLog
  9. 403 366
      README.md
  10. 27 10
      docs/supportedsites.md
  11. 12 4
      test/test_YoutubeDL.py
  12. 0 9
      test/test_all_urls.py
  13. 10 0
      test/test_execution.py
  14. 0 20
      test/test_youtube_chapters.py
  15. 14 5
      test/test_youtube_lists.py
  16. 26 0
      test/test_youtube_misc.py
  17. 11 26
      test/test_youtube_signature.py
  18. 163 123
      youtube_dl/YoutubeDL.py
  19. 1 0
      youtube_dl/__init__.py
  20. 10 0
      youtube_dl/compat.py
  21. 72 62
      youtube_dl/extractor/abcnews.py
  22. 130 68
      youtube_dl/extractor/adn.py
  23. 2 2
      youtube_dl/extractor/aenetworks.py
  24. 32 9
      youtube_dl/extractor/aljazeera.py
  25. 95 2
      youtube_dl/extractor/americastestkitchen.py
  26. 2 1
      youtube_dl/extractor/amp.py
  27. 16 10
      youtube_dl/extractor/animeondemand.py
  28. 9 3
      youtube_dl/extractor/aol.py
  29. 24 23
      youtube_dl/extractor/apa.py
  30. 8 5
      youtube_dl/extractor/appleconnect.py
  31. 1 0
      youtube_dl/extractor/applepodcasts.py
  32. 42 12
      youtube_dl/extractor/archiveorg.py
  33. 74 44
      youtube_dl/extractor/ard.py
  34. 101 0
      youtube_dl/extractor/arnes.py
  35. 2 0
      youtube_dl/extractor/awaan.py
  36. 1 1
      youtube_dl/extractor/azmedien.py
  37. 37 0
      youtube_dl/extractor/bandaichannel.py
  38. 3 1
      youtube_dl/extractor/bandcamp.py
  39. 273 24
      youtube_dl/extractor/bbc.py
  40. 2 1
      youtube_dl/extractor/bilibili.py
  41. 8 2
      youtube_dl/extractor/bleacherreport.py
  42. 0 86
      youtube_dl/extractor/blinkx.py
  43. 10 4
      youtube_dl/extractor/bravotv.py
  44. 53 3
      youtube_dl/extractor/canvas.py
  45. 4 1
      youtube_dl/extractor/cbs.py
  46. 1 1
      youtube_dl/extractor/cbsnews.py
  47. 99 24
      youtube_dl/extractor/cbssports.py
  48. 57 11
      youtube_dl/extractor/ccma.py
  49. 7 4
      youtube_dl/extractor/cda.py
  50. 26 117
      youtube_dl/extractor/comedycentral.py
  51. 16 18
      youtube_dl/extractor/common.py
  52. 26 1
      youtube_dl/extractor/cspan.py
  53. 62 46
      youtube_dl/extractor/curiositystream.py
  54. 26 20
      youtube_dl/extractor/dispeak.py
  55. 129 30
      youtube_dl/extractor/dplay.py
  56. 35 185
      youtube_dl/extractor/dreisat.py
  57. 28 17
      youtube_dl/extractor/egghead.py
  58. 9 12
      youtube_dl/extractor/eroprofile.py
  59. 74 19
      youtube_dl/extractor/extractors.py
  60. 4 1
      youtube_dl/extractor/facebook.py
  61. 13 19
      youtube_dl/extractor/formula1.py
  62. 12 8
      youtube_dl/extractor/franceculture.py
  63. 6 1
      youtube_dl/extractor/francetv.py
  64. 1 1
      youtube_dl/extractor/fujitv.py
  65. 5 1
      youtube_dl/extractor/funimation.py
  66. 33 1
      youtube_dl/extractor/gdcvault.py
  67. 161 0
      youtube_dl/extractor/gedidigital.py
  68. 55 33
      youtube_dl/extractor/generic.py
  69. 38 8
      youtube_dl/extractor/go.py
  70. 7 1
      youtube_dl/extractor/googledrive.py
  71. 198 173
      youtube_dl/extractor/ign.py
  72. 27 2
      youtube_dl/extractor/instagram.py
  73. 41 33
      youtube_dl/extractor/jamendo.py
  74. 30 34
      youtube_dl/extractor/kakao.py
  75. 7 7
      youtube_dl/extractor/kaltura.py
  76. 81 56
      youtube_dl/extractor/khanacademy.py
  77. 78 12
      youtube_dl/extractor/lbry.py
  78. 141 1
      youtube_dl/extractor/line.py
  79. 0 191
      youtube_dl/extractor/liveleak.py
  80. 31 0
      youtube_dl/extractor/maoritv.py
  81. 13 7
      youtube_dl/extractor/medaltv.py
  82. 76 231
      youtube_dl/extractor/medialaan.py
  83. 196 0
      youtube_dl/extractor/minds.py
  84. 7 2
      youtube_dl/extractor/mixcloud.py
  85. 168 21
      youtube_dl/extractor/mlb.py
  86. 15 13
      youtube_dl/extractor/mtv.py
  87. 1 3
      youtube_dl/extractor/ninecninemedia.py
  88. 107 81
      youtube_dl/extractor/ninegag.py
  89. 28 26
      youtube_dl/extractor/njpwworld.py
  90. 1 1
      youtube_dl/extractor/nrk.py
  91. 24 2
      youtube_dl/extractor/orf.py
  92. 148 0
      youtube_dl/extractor/palcomp3.py
  93. 36 12
      youtube_dl/extractor/peertube.py
  94. 7 1
      youtube_dl/extractor/periscope.py
  95. 121 33
      youtube_dl/extractor/phoenix.py
  96. 37 63
      youtube_dl/extractor/picarto.py
  97. 3 1
      youtube_dl/extractor/pinterest.py
  98. 65 0
      youtube_dl/extractor/playstuff.py
  99. 1 1
      youtube_dl/extractor/pluralsight.py
  100. 197 70
      youtube_dl/extractor/pornhub.py

+ 3 - 3
.github/ISSUE_TEMPLATE/1_broken_site.md

@@ -18,7 +18,7 @@ title: ''
 
 
 <!--
 <!--
 Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
 Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
-- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.01.08. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
+- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.12.17. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
 - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
 - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
 - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
 - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
 - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
 - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
@@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com
 -->
 -->
 
 
 - [ ] I'm reporting a broken site support
 - [ ] I'm reporting a broken site support
-- [ ] I've verified that I'm running youtube-dl version **2021.01.08**
+- [ ] I've verified that I'm running youtube-dl version **2021.12.17**
 - [ ] I've checked that all provided URLs are alive and playable in a browser
 - [ ] I've checked that all provided URLs are alive and playable in a browser
 - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
 - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
 - [ ] I've searched the bugtracker for similar issues including closed ones
 - [ ] I've searched the bugtracker for similar issues including closed ones
@@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
  [debug] User config: []
  [debug] User config: []
  [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
  [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
  [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
  [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
- [debug] youtube-dl version 2021.01.08
+ [debug] youtube-dl version 2021.12.17
  [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
  [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
  [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
  [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
  [debug] Proxy map: {}
  [debug] Proxy map: {}

+ 2 - 2
.github/ISSUE_TEMPLATE/2_site_support_request.md

@@ -19,7 +19,7 @@ labels: 'site-support-request'
 
 
 <!--
 <!--
 Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
 Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
-- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.01.08. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
+- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.12.17. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
 - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
 - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
 - Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
 - Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
 - Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
 - Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
@@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
 -->
 -->
 
 
 - [ ] I'm reporting a new site support request
 - [ ] I'm reporting a new site support request
-- [ ] I've verified that I'm running youtube-dl version **2021.01.08**
+- [ ] I've verified that I'm running youtube-dl version **2021.12.17**
 - [ ] I've checked that all provided URLs are alive and playable in a browser
 - [ ] I've checked that all provided URLs are alive and playable in a browser
 - [ ] I've checked that none of provided URLs violate any copyrights
 - [ ] I've checked that none of provided URLs violate any copyrights
 - [ ] I've searched the bugtracker for similar site support requests including closed ones
 - [ ] I've searched the bugtracker for similar site support requests including closed ones

+ 2 - 2
.github/ISSUE_TEMPLATE/3_site_feature_request.md

@@ -18,13 +18,13 @@ title: ''
 
 
 <!--
 <!--
 Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
 Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
-- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.01.08. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
+- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.12.17. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
 - Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
 - Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
 - Finally, put x into all relevant boxes (like this [x])
 - Finally, put x into all relevant boxes (like this [x])
 -->
 -->
 
 
 - [ ] I'm reporting a site feature request
 - [ ] I'm reporting a site feature request
-- [ ] I've verified that I'm running youtube-dl version **2021.01.08**
+- [ ] I've verified that I'm running youtube-dl version **2021.12.17**
 - [ ] I've searched the bugtracker for similar site feature requests including closed ones
 - [ ] I've searched the bugtracker for similar site feature requests including closed ones
 
 
 
 

+ 3 - 3
.github/ISSUE_TEMPLATE/4_bug_report.md

@@ -18,7 +18,7 @@ title: ''
 
 
 <!--
 <!--
 Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
 Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
-- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.01.08. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
+- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.12.17. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
 - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
 - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
 - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
 - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
 - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
 - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
@@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
 -->
 -->
 
 
 - [ ] I'm reporting a broken site support issue
 - [ ] I'm reporting a broken site support issue
-- [ ] I've verified that I'm running youtube-dl version **2021.01.08**
+- [ ] I've verified that I'm running youtube-dl version **2021.12.17**
 - [ ] I've checked that all provided URLs are alive and playable in a browser
 - [ ] I've checked that all provided URLs are alive and playable in a browser
 - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
 - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
 - [ ] I've searched the bugtracker for similar bug reports including closed ones
 - [ ] I've searched the bugtracker for similar bug reports including closed ones
@@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
  [debug] User config: []
  [debug] User config: []
  [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
  [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
  [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
  [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
- [debug] youtube-dl version 2021.01.08
+ [debug] youtube-dl version 2021.12.17
  [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
  [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
  [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
  [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
  [debug] Proxy map: {}
  [debug] Proxy map: {}

+ 2 - 2
.github/ISSUE_TEMPLATE/5_feature_request.md

@@ -19,13 +19,13 @@ labels: 'request'
 
 
 <!--
 <!--
 Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
 Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
-- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.01.08. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
+- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.12.17. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
 - Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
 - Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
 - Finally, put x into all relevant boxes (like this [x])
 - Finally, put x into all relevant boxes (like this [x])
 -->
 -->
 
 
 - [ ] I'm reporting a feature request
 - [ ] I'm reporting a feature request
-- [ ] I've verified that I'm running youtube-dl version **2021.01.08**
+- [ ] I've verified that I'm running youtube-dl version **2021.12.17**
 - [ ] I've searched the bugtracker for similar feature requests including closed ones
 - [ ] I've searched the bugtracker for similar feature requests including closed ones
 
 
 
 

+ 12 - 5
.github/workflows/ci.yml

@@ -1,5 +1,5 @@
 name: CI
 name: CI
-on: [push]
+on: [push, pull_request]
 jobs:
 jobs:
   tests:
   tests:
     name: Tests
     name: Tests
@@ -7,7 +7,7 @@ jobs:
     strategy:
     strategy:
       fail-fast: true
       fail-fast: true
       matrix:
       matrix:
-        os: [ubuntu-latest]
+        os: [ubuntu-18.04]
         # TODO: python 2.6
         # TODO: python 2.6
         python-version: [2.7, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, pypy-2.7, pypy-3.6, pypy-3.7]
         python-version: [2.7, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, pypy-2.7, pypy-3.6, pypy-3.7]
         python-impl: [cpython]
         python-impl: [cpython]
@@ -26,11 +26,11 @@ jobs:
           ytdl-test-set: download
           ytdl-test-set: download
           run-tests-ext: bat
           run-tests-ext: bat
         # jython
         # jython
-        - os: ubuntu-latest
+        - os: ubuntu-18.04
           python-impl: jython
           python-impl: jython
           ytdl-test-set: core
           ytdl-test-set: core
           run-tests-ext: sh
           run-tests-ext: sh
-        - os: ubuntu-latest
+        - os: ubuntu-18.04
           python-impl: jython
           python-impl: jython
           ytdl-test-set: download
           ytdl-test-set: download
           run-tests-ext: sh
           run-tests-ext: sh
@@ -49,11 +49,18 @@ jobs:
     - name: Install Jython
     - name: Install Jython
       if: ${{ matrix.python-impl == 'jython' }}
       if: ${{ matrix.python-impl == 'jython' }}
       run: |
       run: |
-        wget http://search.maven.org/remotecontent?filepath=org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar -O jython-installer.jar
+        wget https://repo1.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar -O jython-installer.jar
         java -jar jython-installer.jar -s -d "$HOME/jython"
         java -jar jython-installer.jar -s -d "$HOME/jython"
         echo "$HOME/jython/bin" >> $GITHUB_PATH
         echo "$HOME/jython/bin" >> $GITHUB_PATH
     - name: Install nose
     - name: Install nose
+      if: ${{ matrix.python-impl != 'jython' }}
       run: pip install nose
       run: pip install nose
+    - name: Install nose (Jython)
+      if: ${{ matrix.python-impl == 'jython' }}
+      # Working around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb)
+      run: |
+        wget https://files.pythonhosted.org/packages/99/4f/13fb671119e65c4dce97c60e67d3fd9e6f7f809f2b307e2611f4701205cb/nose-1.3.7-py2-none-any.whl
+        pip install nose-1.3.7-py2-none-any.whl
     - name: Run tests
     - name: Run tests
       continue-on-error: ${{ matrix.ytdl-test-set == 'download' || matrix.python-impl == 'jython' }}
       continue-on-error: ${{ matrix.ytdl-test-set == 'download' || matrix.python-impl == 'jython' }}
       env:
       env:

+ 1 - 0
AUTHORS

@@ -246,3 +246,4 @@ Enes Solak
 Nathan Rossi
 Nathan Rossi
 Thomas van der Berg
 Thomas van der Berg
 Luca Cherubin
 Luca Cherubin
+Adrian Heine

+ 394 - 0
ChangeLog

@@ -1,3 +1,397 @@
+version 2021.12.17
+
+Core
+* [postprocessor/ffmpeg] Show ffmpeg output on error (#22680, #29336)
+
+Extractors
+* [youtube] Update signature function patterns (#30363, #30366)
+* [peertube] Only call description endpoint if necessary (#29383)
+* [periscope] Pass referer to HLS requests (#29419)
+- [liveleak] Remove extractor (#17625, #24222, #29331)
++ [pornhub] Add support for pornhubthbh7ap3u.onion
+* [pornhub] Detect geo restriction
+* [pornhub] Dismiss tbr extracted from download URLs (#28927)
+* [curiositystream:collection] Extend _VALID_URL (#26326, #29117)
+* [youtube] Make get_video_info processing more robust (#29333)
+* [youtube] Workaround for get_video_info request (#29333)
+* [bilibili] Strip uploader name (#29202)
+* [youtube] Update invidious instance list (#29281)
+* [umg:de] Update GraphQL API URL (#29304)
+* [nrk] Switch psapi URL to https (#29344)
++ [egghead] Add support for app.egghead.io (#28404, #29303)
+* [appleconnect] Fix extraction (#29208)
++ [orf:tvthek] Add support for MPD formats (#28672, #29236)
+
+
+version 2021.06.06
+
+Extractors
+* [facebook] Improve login required detection
+* [youporn] Fix formats and view count extraction (#29216)
+* [orf:tvthek] Fix thumbnails extraction (#29217)
+* [formula1] Fix extraction (#29206)
+* [ard] Relax URL regular expression and fix video ids (#22724, #29091)
++ [ustream] Detect https embeds (#29133)
+* [ted] Prefer own formats over external sources (#29142)
+* [twitch:clips] Improve extraction (#29149)
++ [twitch:clips] Add access token query to download URLs (#29136)
+* [youtube] Fix get_video_info request (#29086, #29165)
+* [vimeo] Fix vimeo pro embed extraction (#29126)
+* [redbulltv] Fix embed data extraction (#28770)
+* [shahid] Relax URL regular expression (#28772, #28930)
+
+
+version 2021.05.16
+
+Core
+* [options] Fix thumbnail option group name (#29042)
+* [YoutubeDL] Improve extract_info doc (#28946)
+
+Extractors
++ [playstuff] Add support for play.stuff.co.nz (#28901, #28931)
+* [eroprofile] Fix extraction (#23200, #23626, #29008)
++ [vivo] Add support for vivo.st (#29009)
++ [generic] Add support for og:audio (#28311, #29015)
+* [phoenix] Fix extraction (#29057)
++ [generic] Add support for sibnet embeds
++ [vk] Add support for sibnet embeds (#9500)
++ [generic] Add Referer header for direct videojs download URLs (#2879,
+  #20217, #29053)
+* [orf:radio] Switch download URLs to HTTPS (#29012, #29046)
+- [blinkx] Remove extractor (#28941)
+* [medaltv] Relax URL regular expression (#28884)
++ [funimation] Add support for optional lang code in URLs (#28950)
++ [gdcvault] Add support for HTML5 videos
+* [dispeak] Improve FLV extraction (#13513, #28970)
+* [kaltura] Improve iframe extraction (#28969)
+* [kaltura] Make embed code alternatives actually work
+* [cda] Improve extraction (#28709, #28937)
+* [twitter] Improve formats extraction from vmap URL (#28909)
+* [xtube] Fix formats extraction (#28870)
+* [svtplay] Improve extraction (#28507, #28876)
+* [tv2dk] Fix extraction (#28888)
+
+
+version 2021.04.26
+
+Extractors
++ [xfileshare] Add support for wolfstream.tv (#28858)
+* [francetvinfo] Improve video id extraction (#28792)
+* [medaltv] Fix extraction (#28807)
+* [tver] Redirect all downloads to Brightcove (#28849)
+* [go] Improve video id extraction (#25207, #25216, #26058)
+* [youtube] Fix lazy extractors (#28780)
++ [bbc] Extract description and timestamp from __INITIAL_DATA__ (#28774)
+* [cbsnews] Fix extraction for python <3.6 (#23359)
+
+
+version 2021.04.17
+
+Core
++ [utils] Add support for experimental HTTP response status code
+  308 Permanent Redirect (#27877, #28768)
+
+Extractors
++ [lbry] Add support for HLS videos (#27877, #28768)
+* [youtube] Fix stretched ratio calculation
+* [youtube] Improve stretch extraction (#28769)
+* [youtube:tab] Improve grid extraction (#28725)
++ [youtube:tab] Detect series playlist on playlists page (#28723)
++ [youtube] Add more invidious instances (#28706)
+* [pluralsight] Extend anti-throttling timeout (#28712)
+* [youtube] Improve URL to extractor routing (#27572, #28335, #28742)
++ [maoritv] Add support for maoritelevision.com (#24552)
++ [youtube:tab] Pass innertube context and x-goog-visitor-id header along with
+  continuation requests (#28702)
+* [mtv] Fix Viacom A/B Testing Video Player extraction (#28703)
++ [pornhub] Extract DASH and HLS formats from get_media end point (#28698)
+* [cbssports] Fix extraction (#28682)
+* [jamendo] Fix track extraction (#28686)
+* [curiositystream] Fix format extraction (#26845, #28668)
+
+
+version 2021.04.07
+
+Core
+* [extractor/common] Use compat_cookies_SimpleCookie for _get_cookies
++ [compat] Introduce compat_cookies_SimpleCookie
+* [extractor/common] Improve JSON-LD author extraction
+* [extractor/common] Fix _get_cookies on python 2 (#20673, #23256, #20326,
+  #28640)
+
+Extractors
+* [youtube] Fix extraction of videos with restricted location (#28685)
++ [line] Add support for live.line.me (#17205, #28658)
+* [vimeo] Improve extraction (#28591)
+* [youku] Update ccode (#17852, #28447, #28460, #28648)
+* [youtube] Prefer direct entry metadata over entry metadata from playlist
+  (#28619, #28636)
+* [screencastomatic] Fix extraction (#11976, #24489)
++ [palcomp3] Add support for palcomp3.com (#13120)
++ [arnes] Add support for video.arnes.si (#28483)
++ [youtube:tab] Add support for hashtags (#28308)
+
+
+version 2021.04.01
+
+Extractors
+* [youtube] Setup CONSENT cookie when needed (#28604)
+* [vimeo] Fix password protected review extraction (#27591)
+* [youtube] Improve age-restricted video extraction (#28578)
+
+
+version 2021.03.31
+
+Extractors
+* [vlive] Fix inkey request (#28589)
+* [francetvinfo] Improve video id extraction (#28584)
++ [instagram] Extract duration (#28469)
+* [instagram] Improve title extraction (#28469)
++ [sbs] Add support for ondemand watch URLs (#28566)
+* [youtube] Fix video's channel extraction (#28562)
+* [picarto] Fix live stream extraction (#28532)
+* [vimeo] Fix unlisted video extraction (#28414)
+* [youtube:tab] Fix playlist/community continuation items extraction (#28266)
+* [ard] Improve clip id extraction (#22724, #28528)
+
+
+version 2021.03.25
+
+Extractors
++ [zoom] Add support for zoom.us (#16597, #27002, #28531)
+* [bbc] Fix BBC IPlayer Episodes/Group extraction (#28360)
+* [youtube] Fix default value for youtube_include_dash_manifest (#28523)
+* [zingmp3] Fix extraction (#11589, #16409, #16968, #27205)
++ [vgtv] Add support for new tv.aftonbladet.se URL schema (#28514)
++ [tiktok] Detect private videos (#28453)
+* [vimeo:album] Fix extraction for albums with number of videos multiple
+  to page size (#28486)
+* [vvvvid] Fix kenc format extraction (#28473)
+* [mlb] Fix video extraction (#21241)
+* [svtplay] Improve extraction (#28448)
+* [applepodcasts] Fix extraction (#28445)
+* [rtve] Improve extraction
+    + Extract all formats
+    * Fix RTVE Infantil extraction (#24851)
+    + Extract is_live and series
+
+
+version 2021.03.14
+
+Core
++ Introduce release_timestamp meta field (#28386)
+
+Extractors
++ [southpark] Add support for southparkstudios.com (#28413)
+* [southpark] Fix extraction (#26763, #28413)
+* [sportdeutschland] Fix extraction (#21856, #28425)
+* [pinterest] Reduce the number of HLS format requests
+* [peertube] Improve thumbnail extraction (#28419)
+* [tver] Improve title extraction (#28418)
+* [fujitv] Fix HLS formats extension (#28416)
+* [shahid] Fix format extraction (#28383)
++ [lbry] Add support for channel filters (#28385)
++ [bandcamp] Extract release timestamp
++ [lbry] Extract release timestamp (#28386)
+* [pornhub] Detect flagged videos
++ [pornhub] Extract formats from get_media end point (#28395)
+* [bilibili] Fix video info extraction (#28341)
++ [cbs] Add support for Paramount+ (#28342)
++ [trovo] Add Origin header to VOD formats (#28346)
+* [voxmedia] Fix volume embed extraction (#28338)
+
+
+version 2021.03.03
+
+Extractors
+* [youtube:tab] Switch continuation to browse API (#28289, #28327)
+* [9c9media] Fix extraction for videos with multiple ContentPackages (#28309)
++ [bbc] Add support for BBC Reel videos (#21870, #23660, #28268)
+
+
+version 2021.03.02
+
+Extractors
+* [zdf] Rework extractors (#11606, #13473, #17354, #21185, #26711, #27068,
+  #27930, #28198, #28199, #28274)
+    * Generalize cross-extractor video ids for zdf based extractors
+    * Improve extraction
+    * Fix 3sat and phoenix
+* [stretchinternet] Fix extraction (#28297)
+* [urplay] Fix episode data extraction (#28292)
++ [bandaichannel] Add support for b-ch.com (#21404)
+* [srgssr] Improve extraction (#14717, #14725, #27231, #28238)
+    + Extract subtitle
+    * Fix extraction for new videos
+    * Update srf download domains
+* [vvvvid] Reduce season request payload size
++ [vvvvid] Extract series sublists playlist title (#27601, #27618)
++ [dplay] Extract Ad-Free uplynk URLs (#28160)
++ [wat] Detect DRM protected videos (#27958)
+* [tf1] Improve extraction (#27980, #28040)
+* [tmz] Fix and improve extraction (#24603, #24687, 28211)
++ [gedidigital] Add support for Gedi group sites (#7347, #26946)
+* [youtube] Fix get_video_info request
+
+
+version 2021.02.22
+
+Core
++ [postprocessor/embedthumbnail] Recognize atomicparsley binary in lowercase
+  (#28112)
+
+Extractors
+* [apa] Fix and improve extraction (#27750)
++ [youporn] Extract duration (#28019)
++ [peertube] Add support for canard.tube (#28190)
+* [youtube] Fixup m4a_dash formats (#28165)
++ [samplefocus] Add support for samplefocus.com (#27763)
++ [vimeo] Add support for unlisted video source format extraction
+* [viki] Improve extraction (#26522, #28203)
+    * Extract uploader URL and episode number
+    * Report login required error
+    + Extract 480p formats
+    * Fix API v4 calls
+* [ninegag] Unescape title (#28201)
+* [youtube] Improve URL regular expression (#28193)
++ [youtube] Add support for redirect.invidious.io (#28193)
++ [dplay] Add support for de.hgtv.com (#28182)
++ [dplay] Add support for discoveryplus.com (#24698)
++ [simplecast] Add support for simplecast.com (#24107)
+* [youtube] Fix uploader extraction in flat playlist mode (#28045)
+* [yandexmusic:playlist] Request missing tracks in chunks (#27355, #28184)
++ [storyfire] Add support for storyfire.com (#25628, #26349)
++ [zhihu] Add support for zhihu.com (#28177)
+* [youtube] Fix controversial videos when authenticated with cookies (#28174)
+* [ccma] Fix timestamp parsing in python 2
++ [videopress] Add support for video.wordpress.com
+* [kakao] Improve info extraction and detect geo restriction (#26577)
+* [xboxclips] Fix extraction (#27151)
+* [ard] Improve formats extraction (#28155)
++ [canvas] Add support for dagelijksekost.een.be (#28119)
+
+
+version 2021.02.10
+
+Extractors
+* [youtube:tab] Improve grid continuation extraction (#28130)
+* [ign] Fix extraction (#24771)
++ [xhamster] Extract format filesize
++ [xhamster] Extract formats from xplayer settings (#28114)
++ [youtube] Add support phone/tablet JS player (#26424)
+* [archiveorg] Fix and improve extraction (#21330, #23586, #25277, #26780,
+  #27109, #27236, #28063)
++ [cda] Detect geo restricted videos (#28106)
+* [urplay] Fix extraction (#28073, #28074)
+* [youtube] Fix release date extraction (#28094)
++ [youtube] Extract abr and vbr (#28100)
+* [youtube] Skip OTF formats (#28070)
+
+
+version 2021.02.04.1
+
+Extractors
+* [youtube] Prefer DASH formats (#28070)
+* [azmedien] Fix extraction (#28064)
+
+
+version 2021.02.04
+
+Extractors
+* [pornhub] Implement lazy playlist extraction
+* [svtplay] Fix video id extraction (#28058)
++ [pornhub] Add support for authentication (#18797, #21416, #24294)
+* [pornhub:user] Improve paging
++ [pornhub:user] Add support for URLs unavailable via /videos page (#27853)
++ [bravotv] Add support for oxygen.com (#13357, #22500)
++ [youtube] Pass embed URL to get_video_info request
+* [ccma] Improve metadata extraction (#27994)
+    + Extract age limit, alt title, categories, series and episode number
+    * Fix timestamp multiple subtitles extraction
+* [egghead] Update API domain (#28038)
+- [vidzi] Remove extractor (#12629)
+* [vidio] Improve metadata extraction
+* [youtube] Improve subtitles extraction
+* [youtube] Fix chapter extraction fallback
+* [youtube] Rewrite extractor
+    * Improve format sorting
+    * Remove unused code
+    * Fix series metadata extraction
+    * Fix trailer video extraction
+    * Improve error reporting
+    + Extract video location
++ [vvvvid] Add support for youtube embeds (#27825)
+* [googledrive] Report download page errors (#28005)
+* [vlive] Fix error message decoding for python 2 (#28004)
+* [youtube] Improve DASH formats file size extraction
+* [cda] Improve birth validation detection (#14022, #27929)
++ [awaan] Extract uploader id (#27963)
++ [medialaan] Add support DPG Media MyChannels based websites (#14871, #15597,
+  #16106, #16489)
+* [abcnews] Fix extraction (#12394, #27920)
+* [AMP] Fix upload date and timestamp extraction (#27970)
+* [tv4] Relax URL regular expression (#27964)
++ [tv2] Add support for mtvuutiset.fi (#27744)
+* [adn] Improve login warning reporting
+* [zype] Fix uplynk id extraction (#27956)
++ [adn] Add support for authentication (#17091, #27841, #27937)
+
+
+version 2021.01.24.1
+
+Core
+* Introduce --output-na-placeholder (#27896)
+
+Extractors
+* [franceculture] Make thumbnail optional (#18807)
+* [franceculture] Fix extraction (#27891, #27903)
+* [njpwworld] Fix extraction (#27890)
+* [comedycentral] Fix extraction (#27905)
+* [wat] Fix format extraction (#27901)
++ [americastestkitchen:season] Add support for seasons (#27861)
++ [trovo] Add support for trovo.live (#26125)
++ [aol] Add support for yahoo videos (#26650)
+* [yahoo] Fix single video extraction
+* [lbry] Unescape lbry URI (#27872)
+* [9gag] Fix and improve extraction (#23022)
+* [americastestkitchen] Improve metadata extraction for ATK episodes (#27860)
+* [aljazeera] Fix extraction (#20911, #27779)
++ [minds] Add support for minds.com (#17934)
+* [ard] Fix title and description extraction (#27761)
++ [spotify] Add support for Spotify Podcasts (#27443)
+
+
+version 2021.01.16
+
+Core
+* [YoutubeDL] Protect from infinite recursion due to recursively nested
+  playlists (#27833)
+* [YoutubeDL] Ignore failure to create existing directory (#27811)
+* [YoutubeDL] Raise syntax error for format selection expressions with multiple
+  + operators (#27803)
+
+Extractors
++ [animeondemand] Add support for lazy playlist extraction (#27829)
+* [youporn] Restrict fallback download URL (#27822)
+* [youporn] Improve height and tbr extraction (#20425, #23659)
+* [youporn] Fix extraction (#27822)
++ [twitter] Add support for unified cards (#27826)
++ [twitch] Add Authorization header with OAuth token for GraphQL requests
+  (#27790)
+* [mixcloud:playlist:base] Extract video id in flat playlist mode (#27787)
+* [cspan] Improve info extraction (#27791)
+* [adn] Improve info extraction
+* [adn] Fix extraction (#26963, #27732)
+* [youtube:search] Extract from all sections (#27604)
+* [youtube:search] fix viewcount and try to extract all video sections (#27604)
+* [twitch] Improve login error extraction
+* [twitch] Fix authentication (#27743)
+* [3qsdn] Improve extraction (#21058)
+* [peertube] Extract formats from streamingPlaylists (#26002, #27586, #27728)
+* [khanacademy] Fix extraction (#2887, #26803)
+* [spike] Update Paramount Network feed URL (#27715)
+
+
 version 2021.01.08
 version 2021.01.08
 
 
 Core
 Core

+ 403 - 366
README.md

@@ -52,394 +52,431 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
     youtube-dl [OPTIONS] URL [URL...]
     youtube-dl [OPTIONS] URL [URL...]
 
 
 # OPTIONS
 # OPTIONS
-    -h, --help                       Print this help text and exit
-    --version                        Print program version and exit
-    -U, --update                     Update this program to latest version. Make
-                                     sure that you have sufficient permissions
-                                     (run with sudo if needed)
-    -i, --ignore-errors              Continue on download errors, for example to
-                                     skip unavailable videos in a playlist
-    --abort-on-error                 Abort downloading of further videos (in the
-                                     playlist or the command line) if an error
-                                     occurs
-    --dump-user-agent                Display the current browser identification
-    --list-extractors                List all supported extractors
-    --extractor-descriptions         Output descriptions of all supported
-                                     extractors
-    --force-generic-extractor        Force extraction to use the generic
-                                     extractor
-    --default-search PREFIX          Use this prefix for unqualified URLs. For
-                                     example "gvsearch2:" downloads two videos
-                                     from google videos for youtube-dl "large
-                                     apple". Use the value "auto" to let
-                                     youtube-dl guess ("auto_warning" to emit a
-                                     warning when guessing). "error" just throws
-                                     an error. The default value "fixup_error"
-                                     repairs broken URLs, but emits an error if
-                                     this is not possible instead of searching.
-    --ignore-config                  Do not read configuration files. When given
-                                     in the global configuration file
-                                     /etc/youtube-dl.conf: Do not read the user
-                                     configuration in ~/.config/youtube-
-                                     dl/config (%APPDATA%/youtube-dl/config.txt
-                                     on Windows)
-    --config-location PATH           Location of the configuration file; either
-                                     the path to the config or its containing
-                                     directory.
-    --flat-playlist                  Do not extract the videos of a playlist,
-                                     only list them.
-    --mark-watched                   Mark videos watched (YouTube only)
-    --no-mark-watched                Do not mark videos watched (YouTube only)
-    --no-color                       Do not emit color codes in output
+    -h, --help                           Print this help text and exit
+    --version                            Print program version and exit
+    -U, --update                         Update this program to latest version.
+                                         Make sure that you have sufficient
+                                         permissions (run with sudo if needed)
+    -i, --ignore-errors                  Continue on download errors, for
+                                         example to skip unavailable videos in a
+                                         playlist
+    --abort-on-error                     Abort downloading of further videos (in
+                                         the playlist or the command line) if an
+                                         error occurs
+    --dump-user-agent                    Display the current browser
+                                         identification
+    --list-extractors                    List all supported extractors
+    --extractor-descriptions             Output descriptions of all supported
+                                         extractors
+    --force-generic-extractor            Force extraction to use the generic
+                                         extractor
+    --default-search PREFIX              Use this prefix for unqualified URLs.
+                                         For example "gvsearch2:" downloads two
+                                         videos from google videos for youtube-
+                                         dl "large apple". Use the value "auto"
+                                         to let youtube-dl guess ("auto_warning"
+                                         to emit a warning when guessing).
+                                         "error" just throws an error. The
+                                         default value "fixup_error" repairs
+                                         broken URLs, but emits an error if this
+                                         is not possible instead of searching.
+    --ignore-config                      Do not read configuration files. When
+                                         given in the global configuration file
+                                         /etc/youtube-dl.conf: Do not read the
+                                         user configuration in
+                                         ~/.config/youtube-dl/config
+                                         (%APPDATA%/youtube-dl/config.txt on
+                                         Windows)
+    --config-location PATH               Location of the configuration file;
+                                         either the path to the config or its
+                                         containing directory.
+    --flat-playlist                      Do not extract the videos of a
+                                         playlist, only list them.
+    --mark-watched                       Mark videos watched (YouTube only)
+    --no-mark-watched                    Do not mark videos watched (YouTube
+                                         only)
+    --no-color                           Do not emit color codes in output
 
 
 ## Network Options:
 ## Network Options:
-    --proxy URL                      Use the specified HTTP/HTTPS/SOCKS proxy.
-                                     To enable SOCKS proxy, specify a proper
-                                     scheme. For example
-                                     socks5://127.0.0.1:1080/. Pass in an empty
-                                     string (--proxy "") for direct connection
-    --socket-timeout SECONDS         Time to wait before giving up, in seconds
-    --source-address IP              Client-side IP address to bind to
-    -4, --force-ipv4                 Make all connections via IPv4
-    -6, --force-ipv6                 Make all connections via IPv6
+    --proxy URL                          Use the specified HTTP/HTTPS/SOCKS
+                                         proxy. To enable SOCKS proxy, specify a
+                                         proper scheme. For example
+                                         socks5://127.0.0.1:1080/. Pass in an
+                                         empty string (--proxy "") for direct
+                                         connection
+    --socket-timeout SECONDS             Time to wait before giving up, in
+                                         seconds
+    --source-address IP                  Client-side IP address to bind to
+    -4, --force-ipv4                     Make all connections via IPv4
+    -6, --force-ipv6                     Make all connections via IPv6
 
 
 ## Geo Restriction:
 ## Geo Restriction:
-    --geo-verification-proxy URL     Use this proxy to verify the IP address for
-                                     some geo-restricted sites. The default
-                                     proxy specified by --proxy (or none, if the
-                                     option is not present) is used for the
-                                     actual downloading.
-    --geo-bypass                     Bypass geographic restriction via faking
-                                     X-Forwarded-For HTTP header
-    --no-geo-bypass                  Do not bypass geographic restriction via
-                                     faking X-Forwarded-For HTTP header
-    --geo-bypass-country CODE        Force bypass geographic restriction with
-                                     explicitly provided two-letter ISO 3166-2
-                                     country code
-    --geo-bypass-ip-block IP_BLOCK   Force bypass geographic restriction with
-                                     explicitly provided IP block in CIDR
-                                     notation
+    --geo-verification-proxy URL         Use this proxy to verify the IP address
+                                         for some geo-restricted sites. The
+                                         default proxy specified by --proxy (or
+                                         none, if the option is not present) is
+                                         used for the actual downloading.
+    --geo-bypass                         Bypass geographic restriction via
+                                         faking X-Forwarded-For HTTP header
+    --no-geo-bypass                      Do not bypass geographic restriction
+                                         via faking X-Forwarded-For HTTP header
+    --geo-bypass-country CODE            Force bypass geographic restriction
+                                         with explicitly provided two-letter ISO
+                                         3166-2 country code
+    --geo-bypass-ip-block IP_BLOCK       Force bypass geographic restriction
+                                         with explicitly provided IP block in
+                                         CIDR notation
 
 
 ## Video Selection:
 ## Video Selection:
-    --playlist-start NUMBER          Playlist video to start at (default is 1)
-    --playlist-end NUMBER            Playlist video to end at (default is last)
-    --playlist-items ITEM_SPEC       Playlist video items to download. Specify
-                                     indices of the videos in the playlist
-                                     separated by commas like: "--playlist-items
-                                     1,2,5,8" if you want to download videos
-                                     indexed 1, 2, 5, 8 in the playlist. You can
-                                     specify range: "--playlist-items
-                                     1-3,7,10-13", it will download the videos
-                                     at index 1, 2, 3, 7, 10, 11, 12 and 13.
-    --match-title REGEX              Download only matching titles (regex or
-                                     caseless sub-string)
-    --reject-title REGEX             Skip download for matching titles (regex or
-                                     caseless sub-string)
-    --max-downloads NUMBER           Abort after downloading NUMBER files
-    --min-filesize SIZE              Do not download any videos smaller than
-                                     SIZE (e.g. 50k or 44.6m)
-    --max-filesize SIZE              Do not download any videos larger than SIZE
-                                     (e.g. 50k or 44.6m)
-    --date DATE                      Download only videos uploaded in this date
-    --datebefore DATE                Download only videos uploaded on or before
-                                     this date (i.e. inclusive)
-    --dateafter DATE                 Download only videos uploaded on or after
-                                     this date (i.e. inclusive)
-    --min-views COUNT                Do not download any videos with less than
-                                     COUNT views
-    --max-views COUNT                Do not download any videos with more than
-                                     COUNT views
-    --match-filter FILTER            Generic video filter. Specify any key (see
-                                     the "OUTPUT TEMPLATE" for a list of
-                                     available keys) to match if the key is
-                                     present, !key to check if the key is not
-                                     present, key > NUMBER (like "comment_count
-                                     > 12", also works with >=, <, <=, !=, =) to
-                                     compare against a number, key = 'LITERAL'
-                                     (like "uploader = 'Mike Smith'", also works
-                                     with !=) to match against a string literal
-                                     and & to require multiple matches. Values
-                                     which are not known are excluded unless you
-                                     put a question mark (?) after the operator.
-                                     For example, to only match videos that have
-                                     been liked more than 100 times and disliked
-                                     less than 50 times (or the dislike
-                                     functionality is not available at the given
-                                     service), but who also have a description,
-                                     use --match-filter "like_count > 100 &
-                                     dislike_count <? 50 & description" .
-    --no-playlist                    Download only the video, if the URL refers
-                                     to a video and a playlist.
-    --yes-playlist                   Download the playlist, if the URL refers to
-                                     a video and a playlist.
-    --age-limit YEARS                Download only videos suitable for the given
-                                     age
-    --download-archive FILE          Download only videos not listed in the
-                                     archive file. Record the IDs of all
-                                     downloaded videos in it.
-    --include-ads                    Download advertisements as well
-                                     (experimental)
+    --playlist-start NUMBER              Playlist video to start at (default is
+                                         1)
+    --playlist-end NUMBER                Playlist video to end at (default is
+                                         last)
+    --playlist-items ITEM_SPEC           Playlist video items to download.
+                                         Specify indices of the videos in the
+                                         playlist separated by commas like: "--
+                                         playlist-items 1,2,5,8" if you want to
+                                         download videos indexed 1, 2, 5, 8 in
+                                         the playlist. You can specify range: "
+                                         --playlist-items 1-3,7,10-13", it will
+                                         download the videos at index 1, 2, 3,
+                                         7, 10, 11, 12 and 13.
+    --match-title REGEX                  Download only matching titles (regex or
+                                         caseless sub-string)
+    --reject-title REGEX                 Skip download for matching titles
+                                         (regex or caseless sub-string)
+    --max-downloads NUMBER               Abort after downloading NUMBER files
+    --min-filesize SIZE                  Do not download any videos smaller than
+                                         SIZE (e.g. 50k or 44.6m)
+    --max-filesize SIZE                  Do not download any videos larger than
+                                         SIZE (e.g. 50k or 44.6m)
+    --date DATE                          Download only videos uploaded in this
+                                         date
+    --datebefore DATE                    Download only videos uploaded on or
+                                         before this date (i.e. inclusive)
+    --dateafter DATE                     Download only videos uploaded on or
+                                         after this date (i.e. inclusive)
+    --min-views COUNT                    Do not download any videos with less
+                                         than COUNT views
+    --max-views COUNT                    Do not download any videos with more
+                                         than COUNT views
+    --match-filter FILTER                Generic video filter. Specify any key
+                                         (see the "OUTPUT TEMPLATE" for a list
+                                         of available keys) to match if the key
+                                         is present, !key to check if the key is
+                                         not present, key > NUMBER (like
+                                         "comment_count > 12", also works with
+                                         >=, <, <=, !=, =) to compare against a
+                                         number, key = 'LITERAL' (like "uploader
+                                         = 'Mike Smith'", also works with !=) to
+                                         match against a string literal and & to
+                                         require multiple matches. Values which
+                                         are not known are excluded unless you
+                                         put a question mark (?) after the
+                                         operator. For example, to only match
+                                         videos that have been liked more than
+                                         100 times and disliked less than 50
+                                         times (or the dislike functionality is
+                                         not available at the given service),
+                                         but who also have a description, use
+                                         --match-filter "like_count > 100 &
+                                         dislike_count <? 50 & description" .
+    --no-playlist                        Download only the video, if the URL
+                                         refers to a video and a playlist.
+    --yes-playlist                       Download the playlist, if the URL
+                                         refers to a video and a playlist.
+    --age-limit YEARS                    Download only videos suitable for the
+                                         given age
+    --download-archive FILE              Download only videos not listed in the
+                                         archive file. Record the IDs of all
+                                         downloaded videos in it.
+    --include-ads                        Download advertisements as well
+                                         (experimental)
 
 
 ## Download Options:
 ## Download Options:
-    -r, --limit-rate RATE            Maximum download rate in bytes per second
-                                     (e.g. 50K or 4.2M)
-    -R, --retries RETRIES            Number of retries (default is 10), or
-                                     "infinite".
-    --fragment-retries RETRIES       Number of retries for a fragment (default
-                                     is 10), or "infinite" (DASH, hlsnative and
-                                     ISM)
-    --skip-unavailable-fragments     Skip unavailable fragments (DASH, hlsnative
-                                     and ISM)
-    --abort-on-unavailable-fragment  Abort downloading when some fragment is not
-                                     available
-    --keep-fragments                 Keep downloaded fragments on disk after
-                                     downloading is finished; fragments are
-                                     erased by default
-    --buffer-size SIZE               Size of download buffer (e.g. 1024 or 16K)
-                                     (default is 1024)
-    --no-resize-buffer               Do not automatically adjust the buffer
-                                     size. By default, the buffer size is
-                                     automatically resized from an initial value
-                                     of SIZE.
-    --http-chunk-size SIZE           Size of a chunk for chunk-based HTTP
-                                     downloading (e.g. 10485760 or 10M) (default
-                                     is disabled). May be useful for bypassing
-                                     bandwidth throttling imposed by a webserver
-                                     (experimental)
-    --playlist-reverse               Download playlist videos in reverse order
-    --playlist-random                Download playlist videos in random order
-    --xattr-set-filesize             Set file xattribute ytdl.filesize with
-                                     expected file size
-    --hls-prefer-native              Use the native HLS downloader instead of
-                                     ffmpeg
-    --hls-prefer-ffmpeg              Use ffmpeg instead of the native HLS
-                                     downloader
-    --hls-use-mpegts                 Use the mpegts container for HLS videos,
-                                     allowing to play the video while
-                                     downloading (some players may not be able
-                                     to play it)
-    --external-downloader COMMAND    Use the specified external downloader.
-                                     Currently supports
-                                     aria2c,avconv,axel,curl,ffmpeg,httpie,wget
-    --external-downloader-args ARGS  Give these arguments to the external
-                                     downloader
+    -r, --limit-rate RATE                Maximum download rate in bytes per
+                                         second (e.g. 50K or 4.2M)
+    -R, --retries RETRIES                Number of retries (default is 10), or
+                                         "infinite".
+    --fragment-retries RETRIES           Number of retries for a fragment
+                                         (default is 10), or "infinite" (DASH,
+                                         hlsnative and ISM)
+    --skip-unavailable-fragments         Skip unavailable fragments (DASH,
+                                         hlsnative and ISM)
+    --abort-on-unavailable-fragment      Abort downloading when some fragment is
+                                         not available
+    --keep-fragments                     Keep downloaded fragments on disk after
+                                         downloading is finished; fragments are
+                                         erased by default
+    --buffer-size SIZE                   Size of download buffer (e.g. 1024 or
+                                         16K) (default is 1024)
+    --no-resize-buffer                   Do not automatically adjust the buffer
+                                         size. By default, the buffer size is
+                                         automatically resized from an initial
+                                         value of SIZE.
+    --http-chunk-size SIZE               Size of a chunk for chunk-based HTTP
+                                         downloading (e.g. 10485760 or 10M)
+                                         (default is disabled). May be useful
+                                         for bypassing bandwidth throttling
+                                         imposed by a webserver (experimental)
+    --playlist-reverse                   Download playlist videos in reverse
+                                         order
+    --playlist-random                    Download playlist videos in random
+                                         order
+    --xattr-set-filesize                 Set file xattribute ytdl.filesize with
+                                         expected file size
+    --hls-prefer-native                  Use the native HLS downloader instead
+                                         of ffmpeg
+    --hls-prefer-ffmpeg                  Use ffmpeg instead of the native HLS
+                                         downloader
+    --hls-use-mpegts                     Use the mpegts container for HLS
+                                         videos, allowing to play the video
+                                         while downloading (some players may not
+                                         be able to play it)
+    --external-downloader COMMAND        Use the specified external downloader.
+                                         Currently supports aria2c,avconv,axel,c
+                                         url,ffmpeg,httpie,wget
+    --external-downloader-args ARGS      Give these arguments to the external
+                                         downloader
 
 
 ## Filesystem Options:
 ## Filesystem Options:
-    -a, --batch-file FILE            File containing URLs to download ('-' for
-                                     stdin), one URL per line. Lines starting
-                                     with '#', ';' or ']' are considered as
-                                     comments and ignored.
-    --id                             Use only video ID in file name
-    -o, --output TEMPLATE            Output filename template, see the "OUTPUT
-                                     TEMPLATE" for all the info
-    --autonumber-start NUMBER        Specify the start value for %(autonumber)s
-                                     (default is 1)
-    --restrict-filenames             Restrict filenames to only ASCII
-                                     characters, and avoid "&" and spaces in
-                                     filenames
-    -w, --no-overwrites              Do not overwrite files
-    -c, --continue                   Force resume of partially downloaded files.
-                                     By default, youtube-dl will resume
-                                     downloads if possible.
-    --no-continue                    Do not resume partially downloaded files
-                                     (restart from beginning)
-    --no-part                        Do not use .part files - write directly
-                                     into output file
-    --no-mtime                       Do not use the Last-modified header to set
-                                     the file modification time
-    --write-description              Write video description to a .description
-                                     file
-    --write-info-json                Write video metadata to a .info.json file
-    --write-annotations              Write video annotations to a
-                                     .annotations.xml file
-    --load-info-json FILE            JSON file containing the video information
-                                     (created with the "--write-info-json"
-                                     option)
-    --cookies FILE                   File to read cookies from and dump cookie
-                                     jar in
-    --cache-dir DIR                  Location in the filesystem where youtube-dl
-                                     can store some downloaded information
-                                     permanently. By default
-                                     $XDG_CACHE_HOME/youtube-dl or
-                                     ~/.cache/youtube-dl . At the moment, only
-                                     YouTube player files (for videos with
-                                     obfuscated signatures) are cached, but that
-                                     may change.
-    --no-cache-dir                   Disable filesystem caching
-    --rm-cache-dir                   Delete all filesystem cache files
-
-## Thumbnail images:
-    --write-thumbnail                Write thumbnail image to disk
-    --write-all-thumbnails           Write all thumbnail image formats to disk
-    --list-thumbnails                Simulate and list all available thumbnail
-                                     formats
+    -a, --batch-file FILE                File containing URLs to download ('-'
+                                         for stdin), one URL per line. Lines
+                                         starting with '#', ';' or ']' are
+                                         considered as comments and ignored.
+    --id                                 Use only video ID in file name
+    -o, --output TEMPLATE                Output filename template, see the
+                                         "OUTPUT TEMPLATE" for all the info
+    --output-na-placeholder PLACEHOLDER  Placeholder value for unavailable meta
+                                         fields in output filename template
+                                         (default is "NA")
+    --autonumber-start NUMBER            Specify the start value for
+                                         %(autonumber)s (default is 1)
+    --restrict-filenames                 Restrict filenames to only ASCII
+                                         characters, and avoid "&" and spaces in
+                                         filenames
+    -w, --no-overwrites                  Do not overwrite files
+    -c, --continue                       Force resume of partially downloaded
+                                         files. By default, youtube-dl will
+                                         resume downloads if possible.
+    --no-continue                        Do not resume partially downloaded
+                                         files (restart from beginning)
+    --no-part                            Do not use .part files - write directly
+                                         into output file
+    --no-mtime                           Do not use the Last-modified header to
+                                         set the file modification time
+    --write-description                  Write video description to a
+                                         .description file
+    --write-info-json                    Write video metadata to a .info.json
+                                         file
+    --write-annotations                  Write video annotations to a
+                                         .annotations.xml file
+    --load-info-json FILE                JSON file containing the video
+                                         information (created with the "--write-
+                                         info-json" option)
+    --cookies FILE                       File to read cookies from and dump
+                                         cookie jar in
+    --cache-dir DIR                      Location in the filesystem where
+                                         youtube-dl can store some downloaded
+                                         information permanently. By default
+                                         $XDG_CACHE_HOME/youtube-dl or
+                                         ~/.cache/youtube-dl . At the moment,
+                                         only YouTube player files (for videos
+                                         with obfuscated signatures) are cached,
+                                         but that may change.
+    --no-cache-dir                       Disable filesystem caching
+    --rm-cache-dir                       Delete all filesystem cache files
+
+## Thumbnail Options:
+    --write-thumbnail                    Write thumbnail image to disk
+    --write-all-thumbnails               Write all thumbnail image formats to
+                                         disk
+    --list-thumbnails                    Simulate and list all available
+                                         thumbnail formats
 
 
 ## Verbosity / Simulation Options:
 ## Verbosity / Simulation Options:
-    -q, --quiet                      Activate quiet mode
-    --no-warnings                    Ignore warnings
-    -s, --simulate                   Do not download the video and do not write
-                                     anything to disk
-    --skip-download                  Do not download the video
-    -g, --get-url                    Simulate, quiet but print URL
-    -e, --get-title                  Simulate, quiet but print title
-    --get-id                         Simulate, quiet but print id
-    --get-thumbnail                  Simulate, quiet but print thumbnail URL
-    --get-description                Simulate, quiet but print video description
-    --get-duration                   Simulate, quiet but print video length
-    --get-filename                   Simulate, quiet but print output filename
-    --get-format                     Simulate, quiet but print output format
-    -j, --dump-json                  Simulate, quiet but print JSON information.
-                                     See the "OUTPUT TEMPLATE" for a description
-                                     of available keys.
-    -J, --dump-single-json           Simulate, quiet but print JSON information
-                                     for each command-line argument. If the URL
-                                     refers to a playlist, dump the whole
-                                     playlist information in a single line.
-    --print-json                     Be quiet and print the video information as
-                                     JSON (video is still being downloaded).
-    --newline                        Output progress bar as new lines
-    --no-progress                    Do not print progress bar
-    --console-title                  Display progress in console titlebar
-    -v, --verbose                    Print various debugging information
-    --dump-pages                     Print downloaded pages encoded using base64
-                                     to debug problems (very verbose)
-    --write-pages                    Write downloaded intermediary pages to
-                                     files in the current directory to debug
-                                     problems
-    --print-traffic                  Display sent and read HTTP traffic
-    -C, --call-home                  Contact the youtube-dl server for debugging
-    --no-call-home                   Do NOT contact the youtube-dl server for
-                                     debugging
+    -q, --quiet                          Activate quiet mode
+    --no-warnings                        Ignore warnings
+    -s, --simulate                       Do not download the video and do not
+                                         write anything to disk
+    --skip-download                      Do not download the video
+    -g, --get-url                        Simulate, quiet but print URL
+    -e, --get-title                      Simulate, quiet but print title
+    --get-id                             Simulate, quiet but print id
+    --get-thumbnail                      Simulate, quiet but print thumbnail URL
+    --get-description                    Simulate, quiet but print video
+                                         description
+    --get-duration                       Simulate, quiet but print video length
+    --get-filename                       Simulate, quiet but print output
+                                         filename
+    --get-format                         Simulate, quiet but print output format
+    -j, --dump-json                      Simulate, quiet but print JSON
+                                         information. See the "OUTPUT TEMPLATE"
+                                         for a description of available keys.
+    -J, --dump-single-json               Simulate, quiet but print JSON
+                                         information for each command-line
+                                         argument. If the URL refers to a
+                                         playlist, dump the whole playlist
+                                         information in a single line.
+    --print-json                         Be quiet and print the video
+                                         information as JSON (video is still
+                                         being downloaded).
+    --newline                            Output progress bar as new lines
+    --no-progress                        Do not print progress bar
+    --console-title                      Display progress in console titlebar
+    -v, --verbose                        Print various debugging information
+    --dump-pages                         Print downloaded pages encoded using
+                                         base64 to debug problems (very verbose)
+    --write-pages                        Write downloaded intermediary pages to
+                                         files in the current directory to debug
+                                         problems
+    --print-traffic                      Display sent and read HTTP traffic
+    -C, --call-home                      Contact the youtube-dl server for
+                                         debugging
+    --no-call-home                       Do NOT contact the youtube-dl server
+                                         for debugging
 
 
 ## Workarounds:
 ## Workarounds:
-    --encoding ENCODING              Force the specified encoding (experimental)
-    --no-check-certificate           Suppress HTTPS certificate validation
-    --prefer-insecure                Use an unencrypted connection to retrieve
-                                     information about the video. (Currently
-                                     supported only for YouTube)
-    --user-agent UA                  Specify a custom user agent
-    --referer URL                    Specify a custom referer, use if the video
-                                     access is restricted to one domain
-    --add-header FIELD:VALUE         Specify a custom HTTP header and its value,
-                                     separated by a colon ':'. You can use this
-                                     option multiple times
-    --bidi-workaround                Work around terminals that lack
-                                     bidirectional text support. Requires bidiv
-                                     or fribidi executable in PATH
-    --sleep-interval SECONDS         Number of seconds to sleep before each
-                                     download when used alone or a lower bound
-                                     of a range for randomized sleep before each
-                                     download (minimum possible number of
-                                     seconds to sleep) when used along with
-                                     --max-sleep-interval.
-    --max-sleep-interval SECONDS     Upper bound of a range for randomized sleep
-                                     before each download (maximum possible
-                                     number of seconds to sleep). Must only be
-                                     used along with --min-sleep-interval.
+    --encoding ENCODING                  Force the specified encoding
+                                         (experimental)
+    --no-check-certificate               Suppress HTTPS certificate validation
+    --prefer-insecure                    Use an unencrypted connection to
+                                         retrieve information about the video.
+                                         (Currently supported only for YouTube)
+    --user-agent UA                      Specify a custom user agent
+    --referer URL                        Specify a custom referer, use if the
+                                         video access is restricted to one
+                                         domain
+    --add-header FIELD:VALUE             Specify a custom HTTP header and its
+                                         value, separated by a colon ':'. You
+                                         can use this option multiple times
+    --bidi-workaround                    Work around terminals that lack
+                                         bidirectional text support. Requires
+                                         bidiv or fribidi executable in PATH
+    --sleep-interval SECONDS             Number of seconds to sleep before each
+                                         download when used alone or a lower
+                                         bound of a range for randomized sleep
+                                         before each download (minimum possible
+                                         number of seconds to sleep) when used
+                                         along with --max-sleep-interval.
+    --max-sleep-interval SECONDS         Upper bound of a range for randomized
+                                         sleep before each download (maximum
+                                         possible number of seconds to sleep).
+                                         Must only be used along with --min-
+                                         sleep-interval.
 
 
 ## Video Format Options:
 ## Video Format Options:
-    -f, --format FORMAT              Video format code, see the "FORMAT
-                                     SELECTION" for all the info
-    --all-formats                    Download all available video formats
-    --prefer-free-formats            Prefer free video formats unless a specific
-                                     one is requested
-    -F, --list-formats               List all available formats of requested
-                                     videos
-    --youtube-skip-dash-manifest     Do not download the DASH manifests and
-                                     related data on YouTube videos
-    --merge-output-format FORMAT     If a merge is required (e.g.
-                                     bestvideo+bestaudio), output to given
-                                     container format. One of mkv, mp4, ogg,
-                                     webm, flv. Ignored if no merge is required
+    -f, --format FORMAT                  Video format code, see the "FORMAT
+                                         SELECTION" for all the info
+    --all-formats                        Download all available video formats
+    --prefer-free-formats                Prefer free video formats unless a
+                                         specific one is requested
+    -F, --list-formats                   List all available formats of requested
+                                         videos
+    --youtube-skip-dash-manifest         Do not download the DASH manifests and
+                                         related data on YouTube videos
+    --merge-output-format FORMAT         If a merge is required (e.g.
+                                         bestvideo+bestaudio), output to given
+                                         container format. One of mkv, mp4, ogg,
+                                         webm, flv. Ignored if no merge is
+                                         required
 
 
 ## Subtitle Options:
 ## Subtitle Options:
-    --write-sub                      Write subtitle file
-    --write-auto-sub                 Write automatically generated subtitle file
-                                     (YouTube only)
-    --all-subs                       Download all the available subtitles of the
-                                     video
-    --list-subs                      List all available subtitles for the video
-    --sub-format FORMAT              Subtitle format, accepts formats
-                                     preference, for example: "srt" or
-                                     "ass/srt/best"
-    --sub-lang LANGS                 Languages of the subtitles to download
-                                     (optional) separated by commas, use --list-
-                                     subs for available language tags
+    --write-sub                          Write subtitle file
+    --write-auto-sub                     Write automatically generated subtitle
+                                         file (YouTube only)
+    --all-subs                           Download all the available subtitles of
+                                         the video
+    --list-subs                          List all available subtitles for the
+                                         video
+    --sub-format FORMAT                  Subtitle format, accepts formats
+                                         preference, for example: "srt" or
+                                         "ass/srt/best"
+    --sub-lang LANGS                     Languages of the subtitles to download
+                                         (optional) separated by commas, use
+                                         --list-subs for available language tags
 
 
 ## Authentication Options:
 ## Authentication Options:
-    -u, --username USERNAME          Login with this account ID
-    -p, --password PASSWORD          Account password. If this option is left
-                                     out, youtube-dl will ask interactively.
-    -2, --twofactor TWOFACTOR        Two-factor authentication code
-    -n, --netrc                      Use .netrc authentication data
-    --video-password PASSWORD        Video password (vimeo, youku)
+    -u, --username USERNAME              Login with this account ID
+    -p, --password PASSWORD              Account password. If this option is
+                                         left out, youtube-dl will ask
+                                         interactively.
+    -2, --twofactor TWOFACTOR            Two-factor authentication code
+    -n, --netrc                          Use .netrc authentication data
+    --video-password PASSWORD            Video password (vimeo, youku)
 
 
 ## Adobe Pass Options:
 ## Adobe Pass Options:
-    --ap-mso MSO                     Adobe Pass multiple-system operator (TV
-                                     provider) identifier, use --ap-list-mso for
-                                     a list of available MSOs
-    --ap-username USERNAME           Multiple-system operator account login
-    --ap-password PASSWORD           Multiple-system operator account password.
-                                     If this option is left out, youtube-dl will
-                                     ask interactively.
-    --ap-list-mso                    List all supported multiple-system
-                                     operators
+    --ap-mso MSO                         Adobe Pass multiple-system operator (TV
+                                         provider) identifier, use --ap-list-mso
+                                         for a list of available MSOs
+    --ap-username USERNAME               Multiple-system operator account login
+    --ap-password PASSWORD               Multiple-system operator account
+                                         password. If this option is left out,
+                                         youtube-dl will ask interactively.
+    --ap-list-mso                        List all supported multiple-system
+                                         operators
 
 
 ## Post-processing Options:
 ## Post-processing Options:
-    -x, --extract-audio              Convert video files to audio-only files
-                                     (requires ffmpeg or avconv and ffprobe or
-                                     avprobe)
-    --audio-format FORMAT            Specify audio format: "best", "aac",
-                                     "flac", "mp3", "m4a", "opus", "vorbis", or
-                                     "wav"; "best" by default; No effect without
-                                     -x
-    --audio-quality QUALITY          Specify ffmpeg/avconv audio quality, insert
-                                     a value between 0 (better) and 9 (worse)
-                                     for VBR or a specific bitrate like 128K
-                                     (default 5)
-    --recode-video FORMAT            Encode the video to another format if
-                                     necessary (currently supported:
-                                     mp4|flv|ogg|webm|mkv|avi)
-    --postprocessor-args ARGS        Give these arguments to the postprocessor
-    -k, --keep-video                 Keep the video file on disk after the post-
-                                     processing; the video is erased by default
-    --no-post-overwrites             Do not overwrite post-processed files; the
-                                     post-processed files are overwritten by
-                                     default
-    --embed-subs                     Embed subtitles in the video (only for mp4,
-                                     webm and mkv videos)
-    --embed-thumbnail                Embed thumbnail in the audio as cover art
-    --add-metadata                   Write metadata to the video file
-    --metadata-from-title FORMAT     Parse additional metadata like song title /
-                                     artist from the video title. The format
-                                     syntax is the same as --output. Regular
-                                     expression with named capture groups may
-                                     also be used. The parsed parameters replace
-                                     existing values. Example: --metadata-from-
-                                     title "%(artist)s - %(title)s" matches a
-                                     title like "Coldplay - Paradise". Example
-                                     (regex): --metadata-from-title
-                                     "(?P<artist>.+?) - (?P<title>.+)"
-    --xattrs                         Write metadata to the video file's xattrs
-                                     (using dublin core and xdg standards)
-    --fixup POLICY                   Automatically correct known faults of the
-                                     file. One of never (do nothing), warn (only
-                                     emit a warning), detect_or_warn (the
-                                     default; fix file if we can, warn
-                                     otherwise)
-    --prefer-avconv                  Prefer avconv over ffmpeg for running the
-                                     postprocessors
-    --prefer-ffmpeg                  Prefer ffmpeg over avconv for running the
-                                     postprocessors (default)
-    --ffmpeg-location PATH           Location of the ffmpeg/avconv binary;
-                                     either the path to the binary or its
-                                     containing directory.
-    --exec CMD                       Execute a command on the file after
-                                     downloading and post-processing, similar to
-                                     find's -exec syntax. Example: --exec 'adb
-                                     push {} /sdcard/Music/ && rm {}'
-    --convert-subs FORMAT            Convert the subtitles to other format
-                                     (currently supported: srt|ass|vtt|lrc)
+    -x, --extract-audio                  Convert video files to audio-only files
+                                         (requires ffmpeg/avconv and
+                                         ffprobe/avprobe)
+    --audio-format FORMAT                Specify audio format: "best", "aac",
+                                         "flac", "mp3", "m4a", "opus", "vorbis",
+                                         or "wav"; "best" by default; No effect
+                                         without -x
+    --audio-quality QUALITY              Specify ffmpeg/avconv audio quality,
+                                         insert a value between 0 (better) and 9
+                                         (worse) for VBR or a specific bitrate
+                                         like 128K (default 5)
+    --recode-video FORMAT                Encode the video to another format if
+                                         necessary (currently supported:
+                                         mp4|flv|ogg|webm|mkv|avi)
+    --postprocessor-args ARGS            Give these arguments to the
+                                         postprocessor
+    -k, --keep-video                     Keep the video file on disk after the
+                                         post-processing; the video is erased by
+                                         default
+    --no-post-overwrites                 Do not overwrite post-processed files;
+                                         the post-processed files are
+                                         overwritten by default
+    --embed-subs                         Embed subtitles in the video (only for
+                                         mp4, webm and mkv videos)
+    --embed-thumbnail                    Embed thumbnail in the audio as cover
+                                         art
+    --add-metadata                       Write metadata to the video file
+    --metadata-from-title FORMAT         Parse additional metadata like song
+                                         title / artist from the video title.
+                                         The format syntax is the same as
+                                         --output. Regular expression with named
+                                         capture groups may also be used. The
+                                         parsed parameters replace existing
+                                         values. Example: --metadata-from-title
+                                         "%(artist)s - %(title)s" matches a
+                                         title like "Coldplay - Paradise".
+                                         Example (regex): --metadata-from-title
+                                         "(?P<artist>.+?) - (?P<title>.+)"
+    --xattrs                             Write metadata to the video file's
+                                         xattrs (using dublin core and xdg
+                                         standards)
+    --fixup POLICY                       Automatically correct known faults of
+                                         the file. One of never (do nothing),
+                                         warn (only emit a warning),
+                                         detect_or_warn (the default; fix file
+                                         if we can, warn otherwise)
+    --prefer-avconv                      Prefer avconv over ffmpeg for running
+                                         the postprocessors
+    --prefer-ffmpeg                      Prefer ffmpeg over avconv for running
+                                         the postprocessors (default)
+    --ffmpeg-location PATH               Location of the ffmpeg/avconv binary;
+                                         either the path to the binary or its
+                                         containing directory.
+    --exec CMD                           Execute a command on the file after
+                                         downloading and post-processing,
+                                         similar to find's -exec syntax.
+                                         Example: --exec 'adb push {}
+                                         /sdcard/Music/ && rm {}'
+    --convert-subs FORMAT                Convert the subtitles to other format
+                                         (currently supported: srt|ass|vtt|lrc)
 
 
 # CONFIGURATION
 # CONFIGURATION
 
 
@@ -583,7 +620,7 @@ Available for the media that is a track or a part of a music album:
  - `disc_number` (numeric): Number of the disc or other physical medium the track belongs to
  - `disc_number` (numeric): Number of the disc or other physical medium the track belongs to
  - `release_year` (numeric): Year (YYYY) when the album was released
  - `release_year` (numeric): Year (YYYY) when the album was released
 
 
-Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with `NA`.
+Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default).
 
 
 For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `youtube-dl test video` and id `BaW_jenozKcj`, this will result in a `youtube-dl test video-BaW_jenozKcj.mp4` file created in the current directory.
 For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `youtube-dl test video` and id `BaW_jenozKcj`, this will result in a `youtube-dl test video-BaW_jenozKcj.mp4` file created in the current directory.
 
 
@@ -856,7 +893,7 @@ Since June 2012 ([#342](https://github.com/ytdl-org/youtube-dl/issues/342)) yout
 
 
 ### The exe throws an error due to missing `MSVCR100.dll`
 ### The exe throws an error due to missing `MSVCR100.dll`
 
 
-To run the exe you need to install first the [Microsoft Visual C++ 2010 Redistributable Package (x86)](https://www.microsoft.com/en-US/download/details.aspx?id=5555).
+To run the exe you need to install first the [Microsoft Visual C++ 2010 Service Pack 1 Redistributable Package (x86)](https://download.microsoft.com/download/1/6/5/165255E7-1014-4D0A-B094-B6A430A6BFFC/vcredist_x86.exe).
 
 
 ### On Windows, how should I set up ffmpeg and youtube-dl? Where should I put the exe files?
 ### On Windows, how should I set up ffmpeg and youtube-dl? Where should I put the exe files?
 
 

File diff suppressed because it is too large
+ 27 - 10
docs/supportedsites.md


+ 12 - 4
test/test_YoutubeDL.py

@@ -464,6 +464,7 @@ class TestFormatSelection(unittest.TestCase):
         assert_syntax_error('+bestaudio')
         assert_syntax_error('+bestaudio')
         assert_syntax_error('bestvideo+')
         assert_syntax_error('bestvideo+')
         assert_syntax_error('/')
         assert_syntax_error('/')
+        assert_syntax_error('bestvideo+bestvideo+bestaudio')
 
 
     def test_format_filtering(self):
     def test_format_filtering(self):
         formats = [
         formats = [
@@ -632,13 +633,20 @@ class TestYoutubeDL(unittest.TestCase):
             'title2': '%PATH%',
             'title2': '%PATH%',
         }
         }
 
 
-        def fname(templ):
-            ydl = YoutubeDL({'outtmpl': templ})
+        def fname(templ, na_placeholder='NA'):
+            params = {'outtmpl': templ}
+            if na_placeholder != 'NA':
+                params['outtmpl_na_placeholder'] = na_placeholder
+            ydl = YoutubeDL(params)
             return ydl.prepare_filename(info)
             return ydl.prepare_filename(info)
         self.assertEqual(fname('%(id)s.%(ext)s'), '1234.mp4')
         self.assertEqual(fname('%(id)s.%(ext)s'), '1234.mp4')
         self.assertEqual(fname('%(id)s-%(width)s.%(ext)s'), '1234-NA.mp4')
         self.assertEqual(fname('%(id)s-%(width)s.%(ext)s'), '1234-NA.mp4')
-        # Replace missing fields with 'NA'
-        self.assertEqual(fname('%(uploader_date)s-%(id)s.%(ext)s'), 'NA-1234.mp4')
+        NA_TEST_OUTTMPL = '%(uploader_date)s-%(width)d-%(id)s.%(ext)s'
+        # Replace missing fields with 'NA' by default
+        self.assertEqual(fname(NA_TEST_OUTTMPL), 'NA-NA-1234.mp4')
+        # Or by provided placeholder
+        self.assertEqual(fname(NA_TEST_OUTTMPL, na_placeholder='none'), 'none-none-1234.mp4')
+        self.assertEqual(fname(NA_TEST_OUTTMPL, na_placeholder=''), '--1234.mp4')
         self.assertEqual(fname('%(height)d.%(ext)s'), '1080.mp4')
         self.assertEqual(fname('%(height)d.%(ext)s'), '1080.mp4')
         self.assertEqual(fname('%(height)6d.%(ext)s'), '  1080.mp4')
         self.assertEqual(fname('%(height)6d.%(ext)s'), '  1080.mp4')
         self.assertEqual(fname('%(height)-6d.%(ext)s'), '1080  .mp4')
         self.assertEqual(fname('%(height)-6d.%(ext)s'), '1080  .mp4')

+ 0 - 9
test/test_all_urls.py

@@ -70,15 +70,6 @@ class TestAllURLsMatching(unittest.TestCase):
         self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
         self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
         self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
         self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
 
 
-    def test_youtube_extract(self):
-        assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
-        assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
-        assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
-        assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc')
-        assertExtractId('https://www.youtube.com/watch_popup?v=BaW_jenozKc', 'BaW_jenozKc')
-        assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc')
-        assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
-
     def test_facebook_matching(self):
     def test_facebook_matching(self):
         self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
         self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
         self.assertTrue(FacebookIE.suitable('https://www.facebook.com/cindyweather?fref=ts#!/photo.php?v=10152183998945793'))
         self.assertTrue(FacebookIE.suitable('https://www.facebook.com/cindyweather?fref=ts#!/photo.php?v=10152183998945793'))

+ 10 - 0
test/test_execution.py

@@ -39,6 +39,16 @@ class TestExecution(unittest.TestCase):
         _, stderr = p.communicate()
         _, stderr = p.communicate()
         self.assertFalse(stderr)
         self.assertFalse(stderr)
 
 
+    def test_lazy_extractors(self):
+        try:
+            subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', 'youtube_dl/extractor/lazy_extractors.py'], cwd=rootDir, stdout=_DEV_NULL)
+            subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=_DEV_NULL)
+        finally:
+            try:
+                os.remove('youtube_dl/extractor/lazy_extractors.py')
+            except (IOError, OSError):
+                pass
+
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':
     unittest.main()
     unittest.main()

File diff suppressed because it is too large
+ 0 - 20
test/test_youtube_chapters.py


+ 14 - 5
test/test_youtube_lists.py

@@ -12,6 +12,7 @@ from test.helper import FakeYDL
 
 
 from youtube_dl.extractor import (
 from youtube_dl.extractor import (
     YoutubePlaylistIE,
     YoutubePlaylistIE,
+    YoutubeTabIE,
     YoutubeIE,
     YoutubeIE,
 )
 )
 
 
@@ -57,14 +58,22 @@ class TestYoutubeLists(unittest.TestCase):
         entries = result['entries']
         entries = result['entries']
         self.assertEqual(len(entries), 100)
         self.assertEqual(len(entries), 100)
 
 
-    def test_youtube_flat_playlist_titles(self):
+    def test_youtube_flat_playlist_extraction(self):
         dl = FakeYDL()
         dl = FakeYDL()
         dl.params['extract_flat'] = True
         dl.params['extract_flat'] = True
-        ie = YoutubePlaylistIE(dl)
-        result = ie.extract('https://www.youtube.com/playlist?list=PL-KKIb8rvtMSrAO9YFbeM6UQrAqoFTUWv')
+        ie = YoutubeTabIE(dl)
+        result = ie.extract('https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc')
         self.assertIsPlaylist(result)
         self.assertIsPlaylist(result)
-        for entry in result['entries']:
-            self.assertTrue(entry.get('title'))
+        entries = list(result['entries'])
+        self.assertTrue(len(entries) == 1)
+        video = entries[0]
+        self.assertEqual(video['_type'], 'url_transparent')
+        self.assertEqual(video['ie_key'], 'Youtube')
+        self.assertEqual(video['id'], 'BaW_jenozKc')
+        self.assertEqual(video['url'], 'BaW_jenozKc')
+        self.assertEqual(video['title'], 'youtube-dl test video "\'/\\ä↭𝕐')
+        self.assertEqual(video['duration'], 10)
+        self.assertEqual(video['uploader'], 'Philipp Hagemeister')
 
 
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':

+ 26 - 0
test/test_youtube_misc.py

@@ -0,0 +1,26 @@
+#!/usr/bin/env python
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+from youtube_dl.extractor import YoutubeIE
+
+
+class TestYoutubeMisc(unittest.TestCase):
+    def test_youtube_extract(self):
+        assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
+        assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
+        assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
+        assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc')
+        assertExtractId('https://www.youtube.com/watch_popup?v=BaW_jenozKc', 'BaW_jenozKc')
+        assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc')
+        assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
+
+
+if __name__ == '__main__':
+    unittest.main()

+ 11 - 26
test/test_youtube_signature.py

@@ -19,55 +19,46 @@ from youtube_dl.compat import compat_str, compat_urlretrieve
 _TESTS = [
 _TESTS = [
     (
     (
         'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js',
         'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js',
-        'js',
         86,
         86,
         '>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321',
         '>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321',
     ),
     ),
     (
     (
         'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js',
         'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js',
-        'js',
         85,
         85,
         '3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@',
         '3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@',
     ),
     ),
     (
     (
         'https://s.ytimg.com/yts/jsbin/html5player-vfle-mVwz.js',
         'https://s.ytimg.com/yts/jsbin/html5player-vfle-mVwz.js',
-        'js',
         90,
         90,
         ']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',
         ']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',
     ),
     ),
     (
     (
         'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl0Cbn9e.js',
         'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl0Cbn9e.js',
-        'js',
         84,
         84,
         'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVW@YZ!"#$%&\'()*+,-./:;<=',
         'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVW@YZ!"#$%&\'()*+,-./:;<=',
     ),
     ),
     (
     (
         'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js',
         'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js',
-        'js',
         '2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA',
         '2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA',
         'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2',
         'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2',
     ),
     ),
     (
     (
         'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js',
         'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js',
-        'js',
         84,
         84,
         '123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>'
         '123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>'
     ),
     ),
     (
     (
         'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl9FYC6l.js',
         'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl9FYC6l.js',
-        'js',
         83,
         83,
         '123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F'
         '123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F'
     ),
     ),
     (
     (
         'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflCGk6yw/html5player.js',
         'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflCGk6yw/html5player.js',
-        'js',
         '4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288',
         '4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288',
         '82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B'
         '82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B'
     ),
     ),
     (
     (
         'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js',
         'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js',
-        'js',
         '312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12',
         '312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12',
         '112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3',
         '112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3',
     )
     )
@@ -78,6 +69,10 @@ class TestPlayerInfo(unittest.TestCase):
     def test_youtube_extract_player_info(self):
     def test_youtube_extract_player_info(self):
         PLAYER_URLS = (
         PLAYER_URLS = (
             ('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/en_US/base.js', '64dddad9'),
             ('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/en_US/base.js', '64dddad9'),
+            ('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/fr_FR/base.js', '64dddad9'),
+            ('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-en_US.vflset/base.js', '64dddad9'),
+            ('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-de_DE.vflset/base.js', '64dddad9'),
+            ('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-tablet-en_US.vflset/base.js', '64dddad9'),
             # obsolete
             # obsolete
             ('https://www.youtube.com/yts/jsbin/player_ias-vfle4-e03/en_US/base.js', 'vfle4-e03'),
             ('https://www.youtube.com/yts/jsbin/player_ias-vfle4-e03/en_US/base.js', 'vfle4-e03'),
             ('https://www.youtube.com/yts/jsbin/player_ias-vfl49f_g4/en_US/base.js', 'vfl49f_g4'),
             ('https://www.youtube.com/yts/jsbin/player_ias-vfl49f_g4/en_US/base.js', 'vfl49f_g4'),
@@ -86,13 +81,9 @@ class TestPlayerInfo(unittest.TestCase):
             ('https://www.youtube.com/yts/jsbin/player-en_US-vflaxXRn1/base.js', 'vflaxXRn1'),
             ('https://www.youtube.com/yts/jsbin/player-en_US-vflaxXRn1/base.js', 'vflaxXRn1'),
             ('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', 'vflXGBaUN'),
             ('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', 'vflXGBaUN'),
             ('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', 'vflKjOTVq'),
             ('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', 'vflKjOTVq'),
-            ('http://s.ytimg.com/yt/swfbin/watch_as3-vflrEm9Nq.swf', 'vflrEm9Nq'),
-            ('https://s.ytimg.com/yts/swfbin/player-vflenCdZL/watch_as3.swf', 'vflenCdZL'),
         )
         )
         for player_url, expected_player_id in PLAYER_URLS:
         for player_url, expected_player_id in PLAYER_URLS:
-            expected_player_type = player_url.split('.')[-1]
-            player_type, player_id = YoutubeIE._extract_player_info(player_url)
-            self.assertEqual(player_type, expected_player_type)
+            player_id = YoutubeIE._extract_player_info(player_url)
             self.assertEqual(player_id, expected_player_id)
             self.assertEqual(player_id, expected_player_id)
 
 
 
 
@@ -104,13 +95,13 @@ class TestSignature(unittest.TestCase):
             os.mkdir(self.TESTDATA_DIR)
             os.mkdir(self.TESTDATA_DIR)
 
 
 
 
-def make_tfunc(url, stype, sig_input, expected_sig):
+def make_tfunc(url, sig_input, expected_sig):
     m = re.match(r'.*-([a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$', url)
     m = re.match(r'.*-([a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$', url)
     assert m, '%r should follow URL format' % url
     assert m, '%r should follow URL format' % url
     test_id = m.group(1)
     test_id = m.group(1)
 
 
     def test_func(self):
     def test_func(self):
-        basename = 'player-%s.%s' % (test_id, stype)
+        basename = 'player-%s.js' % test_id
         fn = os.path.join(self.TESTDATA_DIR, basename)
         fn = os.path.join(self.TESTDATA_DIR, basename)
 
 
         if not os.path.exists(fn):
         if not os.path.exists(fn):
@@ -118,22 +109,16 @@ def make_tfunc(url, stype, sig_input, expected_sig):
 
 
         ydl = FakeYDL()
         ydl = FakeYDL()
         ie = YoutubeIE(ydl)
         ie = YoutubeIE(ydl)
-        if stype == 'js':
-            with io.open(fn, encoding='utf-8') as testf:
-                jscode = testf.read()
-            func = ie._parse_sig_js(jscode)
-        else:
-            assert stype == 'swf'
-            with open(fn, 'rb') as testf:
-                swfcode = testf.read()
-            func = ie._parse_sig_swf(swfcode)
+        with io.open(fn, encoding='utf-8') as testf:
+            jscode = testf.read()
+        func = ie._parse_sig_js(jscode)
         src_sig = (
         src_sig = (
             compat_str(string.printable[:sig_input])
             compat_str(string.printable[:sig_input])
             if isinstance(sig_input, int) else sig_input)
             if isinstance(sig_input, int) else sig_input)
         got_sig = func(src_sig)
         got_sig = func(src_sig)
         self.assertEqual(got_sig, expected_sig)
         self.assertEqual(got_sig, expected_sig)
 
 
-    test_func.__name__ = str('test_signature_' + stype + '_' + test_id)
+    test_func.__name__ = str('test_signature_js_' + test_id)
     setattr(TestSignature, test_func.__name__, test_func)
     setattr(TestSignature, test_func.__name__, test_func)
 
 
 
 

+ 163 - 123
youtube_dl/YoutubeDL.py

@@ -163,6 +163,7 @@ class YoutubeDL(object):
     simulate:          Do not download the video files.
     simulate:          Do not download the video files.
     format:            Video format code. See options.py for more information.
     format:            Video format code. See options.py for more information.
     outtmpl:           Template for output names.
     outtmpl:           Template for output names.
+    outtmpl_na_placeholder: Placeholder for unavailable meta fields.
     restrictfilenames: Do not allow "&" and spaces in file names
     restrictfilenames: Do not allow "&" and spaces in file names
     ignoreerrors:      Do not stop on download errors.
     ignoreerrors:      Do not stop on download errors.
     force_generic_extractor: Force downloader to use the generic extractor
     force_generic_extractor: Force downloader to use the generic extractor
@@ -338,6 +339,8 @@ class YoutubeDL(object):
     _pps = []
     _pps = []
     _download_retcode = None
     _download_retcode = None
     _num_downloads = None
     _num_downloads = None
+    _playlist_level = 0
+    _playlist_urls = set()
     _screen_file = None
     _screen_file = None
 
 
     def __init__(self, params=None, auto_init=True):
     def __init__(self, params=None, auto_init=True):
@@ -656,7 +659,7 @@ class YoutubeDL(object):
             template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
             template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
                                  for k, v in template_dict.items()
                                  for k, v in template_dict.items()
                                  if v is not None and not isinstance(v, (list, tuple, dict)))
                                  if v is not None and not isinstance(v, (list, tuple, dict)))
-            template_dict = collections.defaultdict(lambda: 'NA', template_dict)
+            template_dict = collections.defaultdict(lambda: self.params.get('outtmpl_na_placeholder', 'NA'), template_dict)
 
 
             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 
 
@@ -676,8 +679,8 @@ class YoutubeDL(object):
 
 
             # Missing numeric fields used together with integer presentation types
             # Missing numeric fields used together with integer presentation types
             # in format specification will break the argument substitution since
             # in format specification will break the argument substitution since
-            # string 'NA' is returned for missing fields. We will patch output
-            # template for missing fields to meet string presentation type.
+            # string NA placeholder is returned for missing fields. We will patch
+            # output template for missing fields to meet string presentation type.
             for numeric_field in self._NUMERIC_FIELDS:
             for numeric_field in self._NUMERIC_FIELDS:
                 if numeric_field not in template_dict:
                 if numeric_field not in template_dict:
                     # As of [1] format syntax is:
                     # As of [1] format syntax is:
@@ -770,11 +773,20 @@ class YoutubeDL(object):
 
 
     def extract_info(self, url, download=True, ie_key=None, extra_info={},
     def extract_info(self, url, download=True, ie_key=None, extra_info={},
                      process=True, force_generic_extractor=False):
                      process=True, force_generic_extractor=False):
-        '''
-        Returns a list with a dictionary for each video we find.
-        If 'download', also downloads the videos.
-        extra_info is a dict containing the extra values to add to each result
-        '''
+        """
+        Return a list with a dictionary for each video extracted.
+
+        Arguments:
+        url -- URL to extract
+
+        Keyword arguments:
+        download -- whether to download videos during extraction
+        ie_key -- extractor key hint
+        extra_info -- dictionary containing the extra values to add to each result
+        process -- whether to resolve all unresolved references (URLs, playlist items),
+            must be True for download to work.
+        force_generic_extractor -- force using the generic extractor
+        """
 
 
         if not ie_key and force_generic_extractor:
         if not ie_key and force_generic_extractor:
             ie_key = 'Generic'
             ie_key = 'Generic'
@@ -906,115 +918,23 @@ class YoutubeDL(object):
             return self.process_ie_result(
             return self.process_ie_result(
                 new_result, download=download, extra_info=extra_info)
                 new_result, download=download, extra_info=extra_info)
         elif result_type in ('playlist', 'multi_video'):
         elif result_type in ('playlist', 'multi_video'):
-            # We process each entry in the playlist
-            playlist = ie_result.get('title') or ie_result.get('id')
-            self.to_screen('[download] Downloading playlist: %s' % playlist)
-
-            playlist_results = []
-
-            playliststart = self.params.get('playliststart', 1) - 1
-            playlistend = self.params.get('playlistend')
-            # For backwards compatibility, interpret -1 as whole list
-            if playlistend == -1:
-                playlistend = None
-
-            playlistitems_str = self.params.get('playlist_items')
-            playlistitems = None
-            if playlistitems_str is not None:
-                def iter_playlistitems(format):
-                    for string_segment in format.split(','):
-                        if '-' in string_segment:
-                            start, end = string_segment.split('-')
-                            for item in range(int(start), int(end) + 1):
-                                yield int(item)
-                        else:
-                            yield int(string_segment)
-                playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
-
-            ie_entries = ie_result['entries']
-
-            def make_playlistitems_entries(list_ie_entries):
-                num_entries = len(list_ie_entries)
-                return [
-                    list_ie_entries[i - 1] for i in playlistitems
-                    if -num_entries <= i - 1 < num_entries]
-
-            def report_download(num_entries):
+            # Protect from infinite recursion due to recursively nested playlists
+            # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
+            webpage_url = ie_result['webpage_url']
+            if webpage_url in self._playlist_urls:
                 self.to_screen(
                 self.to_screen(
-                    '[%s] playlist %s: Downloading %d videos' %
-                    (ie_result['extractor'], playlist, num_entries))
-
-            if isinstance(ie_entries, list):
-                n_all_entries = len(ie_entries)
-                if playlistitems:
-                    entries = make_playlistitems_entries(ie_entries)
-                else:
-                    entries = ie_entries[playliststart:playlistend]
-                n_entries = len(entries)
-                self.to_screen(
-                    '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
-                    (ie_result['extractor'], playlist, n_all_entries, n_entries))
-            elif isinstance(ie_entries, PagedList):
-                if playlistitems:
-                    entries = []
-                    for item in playlistitems:
-                        entries.extend(ie_entries.getslice(
-                            item - 1, item
-                        ))
-                else:
-                    entries = ie_entries.getslice(
-                        playliststart, playlistend)
-                n_entries = len(entries)
-                report_download(n_entries)
-            else:  # iterable
-                if playlistitems:
-                    entries = make_playlistitems_entries(list(itertools.islice(
-                        ie_entries, 0, max(playlistitems))))
-                else:
-                    entries = list(itertools.islice(
-                        ie_entries, playliststart, playlistend))
-                n_entries = len(entries)
-                report_download(n_entries)
-
-            if self.params.get('playlistreverse', False):
-                entries = entries[::-1]
-
-            if self.params.get('playlistrandom', False):
-                random.shuffle(entries)
-
-            x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
-
-            for i, entry in enumerate(entries, 1):
-                self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
-                # This __x_forwarded_for_ip thing is a bit ugly but requires
-                # minimal changes
-                if x_forwarded_for:
-                    entry['__x_forwarded_for_ip'] = x_forwarded_for
-                extra = {
-                    'n_entries': n_entries,
-                    'playlist': playlist,
-                    'playlist_id': ie_result.get('id'),
-                    'playlist_title': ie_result.get('title'),
-                    'playlist_uploader': ie_result.get('uploader'),
-                    'playlist_uploader_id': ie_result.get('uploader_id'),
-                    'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
-                    'extractor': ie_result['extractor'],
-                    'webpage_url': ie_result['webpage_url'],
-                    'webpage_url_basename': url_basename(ie_result['webpage_url']),
-                    'extractor_key': ie_result['extractor_key'],
-                }
-
-                reason = self._match_entry(entry, incomplete=True)
-                if reason is not None:
-                    self.to_screen('[download] ' + reason)
-                    continue
+                    '[download] Skipping already downloaded playlist: %s'
+                    % ie_result.get('title') or ie_result.get('id'))
+                return
 
 
-                entry_result = self.__process_iterable_entry(entry, download, extra)
-                # TODO: skip failed (empty) entries?
-                playlist_results.append(entry_result)
-            ie_result['entries'] = playlist_results
-            self.to_screen('[download] Finished downloading playlist: %s' % playlist)
-            return ie_result
+            self._playlist_level += 1
+            self._playlist_urls.add(webpage_url)
+            try:
+                return self.__process_playlist(ie_result, download)
+            finally:
+                self._playlist_level -= 1
+                if not self._playlist_level:
+                    self._playlist_urls.clear()
         elif result_type == 'compat_list':
         elif result_type == 'compat_list':
             self.report_warning(
             self.report_warning(
                 'Extractor %s returned a compat_list result. '
                 'Extractor %s returned a compat_list result. '
@@ -1039,6 +959,118 @@ class YoutubeDL(object):
         else:
         else:
             raise Exception('Invalid result type: %s' % result_type)
             raise Exception('Invalid result type: %s' % result_type)
 
 
+    def __process_playlist(self, ie_result, download):
+        # We process each entry in the playlist
+        playlist = ie_result.get('title') or ie_result.get('id')
+
+        self.to_screen('[download] Downloading playlist: %s' % playlist)
+
+        playlist_results = []
+
+        playliststart = self.params.get('playliststart', 1) - 1
+        playlistend = self.params.get('playlistend')
+        # For backwards compatibility, interpret -1 as whole list
+        if playlistend == -1:
+            playlistend = None
+
+        playlistitems_str = self.params.get('playlist_items')
+        playlistitems = None
+        if playlistitems_str is not None:
+            def iter_playlistitems(format):
+                for string_segment in format.split(','):
+                    if '-' in string_segment:
+                        start, end = string_segment.split('-')
+                        for item in range(int(start), int(end) + 1):
+                            yield int(item)
+                    else:
+                        yield int(string_segment)
+            playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
+
+        ie_entries = ie_result['entries']
+
+        def make_playlistitems_entries(list_ie_entries):
+            num_entries = len(list_ie_entries)
+            return [
+                list_ie_entries[i - 1] for i in playlistitems
+                if -num_entries <= i - 1 < num_entries]
+
+        def report_download(num_entries):
+            self.to_screen(
+                '[%s] playlist %s: Downloading %d videos' %
+                (ie_result['extractor'], playlist, num_entries))
+
+        if isinstance(ie_entries, list):
+            n_all_entries = len(ie_entries)
+            if playlistitems:
+                entries = make_playlistitems_entries(ie_entries)
+            else:
+                entries = ie_entries[playliststart:playlistend]
+            n_entries = len(entries)
+            self.to_screen(
+                '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
+                (ie_result['extractor'], playlist, n_all_entries, n_entries))
+        elif isinstance(ie_entries, PagedList):
+            if playlistitems:
+                entries = []
+                for item in playlistitems:
+                    entries.extend(ie_entries.getslice(
+                        item - 1, item
+                    ))
+            else:
+                entries = ie_entries.getslice(
+                    playliststart, playlistend)
+            n_entries = len(entries)
+            report_download(n_entries)
+        else:  # iterable
+            if playlistitems:
+                entries = make_playlistitems_entries(list(itertools.islice(
+                    ie_entries, 0, max(playlistitems))))
+            else:
+                entries = list(itertools.islice(
+                    ie_entries, playliststart, playlistend))
+            n_entries = len(entries)
+            report_download(n_entries)
+
+        if self.params.get('playlistreverse', False):
+            entries = entries[::-1]
+
+        if self.params.get('playlistrandom', False):
+            random.shuffle(entries)
+
+        x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
+
+        for i, entry in enumerate(entries, 1):
+            self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
+            # This __x_forwarded_for_ip thing is a bit ugly but requires
+            # minimal changes
+            if x_forwarded_for:
+                entry['__x_forwarded_for_ip'] = x_forwarded_for
+            extra = {
+                'n_entries': n_entries,
+                'playlist': playlist,
+                'playlist_id': ie_result.get('id'),
+                'playlist_title': ie_result.get('title'),
+                'playlist_uploader': ie_result.get('uploader'),
+                'playlist_uploader_id': ie_result.get('uploader_id'),
+                'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
+                'extractor': ie_result['extractor'],
+                'webpage_url': ie_result['webpage_url'],
+                'webpage_url_basename': url_basename(ie_result['webpage_url']),
+                'extractor_key': ie_result['extractor_key'],
+            }
+
+            reason = self._match_entry(entry, incomplete=True)
+            if reason is not None:
+                self.to_screen('[download] ' + reason)
+                continue
+
+            entry_result = self.__process_iterable_entry(entry, download, extra)
+            # TODO: skip failed (empty) entries?
+            playlist_results.append(entry_result)
+        ie_result['entries'] = playlist_results
+        self.to_screen('[download] Finished downloading playlist: %s' % playlist)
+        return ie_result
+
     @__handle_extraction_exceptions
     @__handle_extraction_exceptions
     def __process_iterable_entry(self, entry, download, extra_info):
     def __process_iterable_entry(self, entry, download, extra_info):
         return self.process_ie_result(
         return self.process_ie_result(
@@ -1226,6 +1258,8 @@ class YoutubeDL(object):
                         group = _parse_format_selection(tokens, inside_group=True)
                         group = _parse_format_selection(tokens, inside_group=True)
                         current_selector = FormatSelector(GROUP, group, [])
                         current_selector = FormatSelector(GROUP, group, [])
                     elif string == '+':
                     elif string == '+':
+                        if inside_merge:
+                            raise syntax_error('Unexpected "+"', start)
                         video_selector = current_selector
                         video_selector = current_selector
                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
                         if not video_selector or not audio_selector:
                         if not video_selector or not audio_selector:
@@ -1486,14 +1520,18 @@ class YoutubeDL(object):
         if 'display_id' not in info_dict and 'id' in info_dict:
         if 'display_id' not in info_dict and 'id' in info_dict:
             info_dict['display_id'] = info_dict['id']
             info_dict['display_id'] = info_dict['id']
 
 
-        if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
-            # Working around out-of-range timestamp values (e.g. negative ones on Windows,
-            # see http://bugs.python.org/issue1646728)
-            try:
-                upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
-                info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
-            except (ValueError, OverflowError, OSError):
-                pass
+        for ts_key, date_key in (
+                ('timestamp', 'upload_date'),
+                ('release_timestamp', 'release_date'),
+        ):
+            if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
+                # Working around out-of-range timestamp values (e.g. negative ones on Windows,
+                # see http://bugs.python.org/issue1646728)
+                try:
+                    upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
+                    info_dict[date_key] = upload_date.strftime('%Y%m%d')
+                except (ValueError, OverflowError, OSError):
+                    pass
 
 
         # Auto generate title fields corresponding to the *_number fields when missing
         # Auto generate title fields corresponding to the *_number fields when missing
         # in order to always have clean titles. This is very common for TV series.
         # in order to always have clean titles. This is very common for TV series.
@@ -1777,6 +1815,8 @@ class YoutubeDL(object):
                     os.makedirs(dn)
                     os.makedirs(dn)
                 return True
                 return True
             except (OSError, IOError) as err:
             except (OSError, IOError) as err:
+                if isinstance(err, OSError) and err.errno == errno.EEXIST:
+                    return True
                 self.report_error('unable to create directory ' + error_to_compat_str(err))
                 self.report_error('unable to create directory ' + error_to_compat_str(err))
                 return False
                 return False
 
 

+ 1 - 0
youtube_dl/__init__.py

@@ -340,6 +340,7 @@ def _real_main(argv=None):
         'format': opts.format,
         'format': opts.format,
         'listformats': opts.listformats,
         'listformats': opts.listformats,
         'outtmpl': outtmpl,
         'outtmpl': outtmpl,
+        'outtmpl_na_placeholder': opts.outtmpl_na_placeholder,
         'autonumber_size': opts.autonumber_size,
         'autonumber_size': opts.autonumber_size,
         'autonumber_start': opts.autonumber_start,
         'autonumber_start': opts.autonumber_start,
         'restrictfilenames': opts.restrictfilenames,
         'restrictfilenames': opts.restrictfilenames,

+ 10 - 0
youtube_dl/compat.py

@@ -73,6 +73,15 @@ try:
 except ImportError:  # Python 2
 except ImportError:  # Python 2
     import Cookie as compat_cookies
     import Cookie as compat_cookies
 
 
+if sys.version_info[0] == 2:
+    class compat_cookies_SimpleCookie(compat_cookies.SimpleCookie):
+        def load(self, rawdata):
+            if isinstance(rawdata, compat_str):
+                rawdata = str(rawdata)
+            return super(compat_cookies_SimpleCookie, self).load(rawdata)
+else:
+    compat_cookies_SimpleCookie = compat_cookies.SimpleCookie
+
 try:
 try:
     import html.entities as compat_html_entities
     import html.entities as compat_html_entities
 except ImportError:  # Python 2
 except ImportError:  # Python 2
@@ -3000,6 +3009,7 @@ __all__ = [
     'compat_cookiejar',
     'compat_cookiejar',
     'compat_cookiejar_Cookie',
     'compat_cookiejar_Cookie',
     'compat_cookies',
     'compat_cookies',
+    'compat_cookies_SimpleCookie',
     'compat_ctypes_WINFUNCTYPE',
     'compat_ctypes_WINFUNCTYPE',
     'compat_etree_Element',
     'compat_etree_Element',
     'compat_etree_fromstring',
     'compat_etree_fromstring',

+ 72 - 62
youtube_dl/extractor/abcnews.py

@@ -1,14 +1,15 @@
 # coding: utf-8
 # coding: utf-8
 from __future__ import unicode_literals
 from __future__ import unicode_literals
 
 
-import calendar
 import re
 import re
-import time
 
 
 from .amp import AMPIE
 from .amp import AMPIE
 from .common import InfoExtractor
 from .common import InfoExtractor
-from .youtube import YoutubeIE
-from ..compat import compat_urlparse
+from ..utils import (
+    parse_duration,
+    parse_iso8601,
+    try_get,
+)
 
 
 
 
 class AbcNewsVideoIE(AMPIE):
 class AbcNewsVideoIE(AMPIE):
@@ -18,8 +19,8 @@ class AbcNewsVideoIE(AMPIE):
                         (?:
                         (?:
                             abcnews\.go\.com/
                             abcnews\.go\.com/
                             (?:
                             (?:
-                                [^/]+/video/(?P<display_id>[0-9a-z-]+)-|
-                                video/embed\?.*?\bid=
+                                (?:[^/]+/)*video/(?P<display_id>[0-9a-z-]+)-|
+                                video/(?:embed|itemfeed)\?.*?\bid=
                             )|
                             )|
                             fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/
                             fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/
                         )
                         )
@@ -36,6 +37,8 @@ class AbcNewsVideoIE(AMPIE):
             'description': 'George Stephanopoulos goes one-on-one with Iranian Foreign Minister Dr. Javad Zarif.',
             'description': 'George Stephanopoulos goes one-on-one with Iranian Foreign Minister Dr. Javad Zarif.',
             'duration': 180,
             'duration': 180,
             'thumbnail': r're:^https?://.*\.jpg$',
             'thumbnail': r're:^https?://.*\.jpg$',
+            'timestamp': 1380454200,
+            'upload_date': '20130929',
         },
         },
         'params': {
         'params': {
             # m3u8 download
             # m3u8 download
@@ -47,6 +50,12 @@ class AbcNewsVideoIE(AMPIE):
     }, {
     }, {
         'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478',
         'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478',
         'only_matching': True,
         'only_matching': True,
+    }, {
+        'url': 'http://abcnews.go.com/video/itemfeed?id=46979033',
+        'only_matching': True,
+    }, {
+        'url': 'https://abcnews.go.com/GMA/News/video/history-christmas-story-67894761',
+        'only_matching': True,
     }]
     }]
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
@@ -67,28 +76,23 @@ class AbcNewsIE(InfoExtractor):
     _VALID_URL = r'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)'
     _VALID_URL = r'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)'
 
 
     _TESTS = [{
     _TESTS = [{
-        'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY',
+        # Youtube Embeds
+        'url': 'https://abcnews.go.com/Entertainment/peter-billingsley-child-actor-christmas-story-hollywood-power/story?id=51286501',
         'info_dict': {
         'info_dict': {
-            'id': '10505354',
-            'ext': 'flv',
-            'display_id': 'dramatic-video-rare-death-job-america',
-            'title': 'Occupational Hazards',
-            'description': 'Nightline investigates the dangers that lurk at various jobs.',
-            'thumbnail': r're:^https?://.*\.jpg$',
-            'upload_date': '20100428',
-            'timestamp': 1272412800,
+            'id': '51286501',
+            'title': "Peter Billingsley: From child actor in 'A Christmas Story' to Hollywood power player",
+            'description': 'Billingsley went from a child actor to Hollywood power player.',
         },
         },
-        'add_ie': ['AbcNewsVideo'],
+        'playlist_count': 5,
     }, {
     }, {
         'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818',
         'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818',
         'info_dict': {
         'info_dict': {
             'id': '38897857',
             'id': '38897857',
             'ext': 'mp4',
             'ext': 'mp4',
-            'display_id': 'justin-timberlake-performs-stop-feeling-eurovision-2016',
             'title': 'Justin Timberlake Drops Hints For Secret Single',
             'title': 'Justin Timberlake Drops Hints For Secret Single',
             'description': 'Lara Spencer reports the buzziest stories of the day in "GMA" Pop News.',
             'description': 'Lara Spencer reports the buzziest stories of the day in "GMA" Pop News.',
-            'upload_date': '20160515',
-            'timestamp': 1463329500,
+            'upload_date': '20160505',
+            'timestamp': 1462442280,
         },
         },
         'params': {
         'params': {
             # m3u8 download
             # m3u8 download
@@ -100,49 +104,55 @@ class AbcNewsIE(InfoExtractor):
     }, {
     }, {
         'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
         'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
         'only_matching': True,
         'only_matching': True,
+    }, {
+        # inline.type == 'video'
+        'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
+        'only_matching': True,
     }]
     }]
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        display_id = mobj.group('display_id')
-        video_id = mobj.group('id')
-
-        webpage = self._download_webpage(url, video_id)
-        video_url = self._search_regex(
-            r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL')
-        full_video_url = compat_urlparse.urljoin(url, video_url)
-
-        youtube_url = YoutubeIE._extract_url(webpage)
-
-        timestamp = None
-        date_str = self._html_search_regex(
-            r'<span[^>]+class="timestamp">([^<]+)</span>',
-            webpage, 'timestamp', fatal=False)
-        if date_str:
-            tz_offset = 0
-            if date_str.endswith(' ET'):  # Eastern Time
-                tz_offset = -5
-                date_str = date_str[:-3]
-            date_formats = ['%b. %d, %Y', '%b %d, %Y, %I:%M %p']
-            for date_format in date_formats:
-                try:
-                    timestamp = calendar.timegm(time.strptime(date_str.strip(), date_format))
-                except ValueError:
-                    continue
-            if timestamp is not None:
-                timestamp -= tz_offset * 3600
-
-        entry = {
-            '_type': 'url_transparent',
-            'ie_key': AbcNewsVideoIE.ie_key(),
-            'url': full_video_url,
-            'id': video_id,
-            'display_id': display_id,
-            'timestamp': timestamp,
-        }
-
-        if youtube_url:
-            entries = [entry, self.url_result(youtube_url, ie=YoutubeIE.ie_key())]
-            return self.playlist_result(entries)
-
-        return entry
+        story_id = self._match_id(url)
+        webpage = self._download_webpage(url, story_id)
+        story = self._parse_json(self._search_regex(
+            r"window\['__abcnews__'\]\s*=\s*({.+?});",
+            webpage, 'data'), story_id)['page']['content']['story']['everscroll'][0]
+        article_contents = story.get('articleContents') or {}
+
+        def entries():
+            featured_video = story.get('featuredVideo') or {}
+            feed = try_get(featured_video, lambda x: x['video']['feed'])
+            if feed:
+                yield {
+                    '_type': 'url',
+                    'id': featured_video.get('id'),
+                    'title': featured_video.get('name'),
+                    'url': feed,
+                    'thumbnail': featured_video.get('images'),
+                    'description': featured_video.get('description'),
+                    'timestamp': parse_iso8601(featured_video.get('uploadDate')),
+                    'duration': parse_duration(featured_video.get('duration')),
+                    'ie_key': AbcNewsVideoIE.ie_key(),
+                }
+
+            for inline in (article_contents.get('inlines') or []):
+                inline_type = inline.get('type')
+                if inline_type == 'iframe':
+                    iframe_url = try_get(inline, lambda x: x['attrs']['src'])
+                    if iframe_url:
+                        yield self.url_result(iframe_url)
+                elif inline_type == 'video':
+                    video_id = inline.get('id')
+                    if video_id:
+                        yield {
+                            '_type': 'url',
+                            'id': video_id,
+                            'url': 'http://abcnews.go.com/video/embed?id=' + video_id,
+                            'thumbnail': inline.get('imgSrc') or inline.get('imgDefault'),
+                            'description': inline.get('description'),
+                            'duration': parse_duration(inline.get('duration')),
+                            'ie_key': AbcNewsVideoIE.ie_key(),
+                        }
+
+        return self.playlist_result(
+            entries(), story_id, article_contents.get('headline'),
+            article_contents.get('subHead'))

+ 130 - 68
youtube_dl/extractor/adn.py

@@ -10,6 +10,7 @@ import random
 from .common import InfoExtractor
 from .common import InfoExtractor
 from ..aes import aes_cbc_decrypt
 from ..aes import aes_cbc_decrypt
 from ..compat import (
 from ..compat import (
+    compat_HTTPError,
     compat_b64decode,
     compat_b64decode,
     compat_ord,
     compat_ord,
 )
 )
@@ -18,11 +19,14 @@ from ..utils import (
     bytes_to_long,
     bytes_to_long,
     ExtractorError,
     ExtractorError,
     float_or_none,
     float_or_none,
+    int_or_none,
     intlist_to_bytes,
     intlist_to_bytes,
     long_to_bytes,
     long_to_bytes,
     pkcs1pad,
     pkcs1pad,
     strip_or_none,
     strip_or_none,
-    urljoin,
+    try_get,
+    unified_strdate,
+    urlencode_postdata,
 )
 )
 
 
 
 
@@ -31,16 +35,30 @@ class ADNIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
     _VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
     _TEST = {
     _TEST = {
         'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
         'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
-        'md5': 'e497370d847fd79d9d4c74be55575c7a',
+        'md5': '0319c99885ff5547565cacb4f3f9348d',
         'info_dict': {
         'info_dict': {
             'id': '7778',
             'id': '7778',
             'ext': 'mp4',
             'ext': 'mp4',
-            'title': 'Blue Exorcist - Kyôto Saga - Épisode 1',
+            'title': 'Blue Exorcist - Kyôto Saga - Episode 1',
             'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5',
             'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5',
+            'series': 'Blue Exorcist - Kyôto Saga',
+            'duration': 1467,
+            'release_date': '20170106',
+            'comment_count': int,
+            'average_rating': float,
+            'season_number': 2,
+            'episode': 'Début des hostilités',
+            'episode_number': 1,
         }
         }
     }
     }
+
+    _NETRC_MACHINE = 'animedigitalnetwork'
     _BASE_URL = 'http://animedigitalnetwork.fr'
     _BASE_URL = 'http://animedigitalnetwork.fr'
-    _RSA_KEY = (0xc35ae1e4356b65a73b551493da94b8cb443491c0aa092a357a5aee57ffc14dda85326f42d716e539a34542a0d3f363adf16c5ec222d713d5997194030ee2e4f0d1fb328c01a81cf6868c090d50de8e169c6b13d1675b9eeed1cbc51e1fffca9b38af07f37abd790924cd3bee59d0257cfda4fe5f3f0534877e21ce5821447d1b, 65537)
+    _API_BASE_URL = 'https://gw.api.animedigitalnetwork.fr/'
+    _PLAYER_BASE_URL = _API_BASE_URL + 'player/'
+    _HEADERS = {}
+    _LOGIN_ERR_MESSAGE = 'Unable to log in'
+    _RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537)
     _POS_ALIGN_MAP = {
     _POS_ALIGN_MAP = {
         'start': 1,
         'start': 1,
         'end': 3,
         'end': 3,
@@ -54,26 +72,24 @@ class ADNIE(InfoExtractor):
     def _ass_subtitles_timecode(seconds):
     def _ass_subtitles_timecode(seconds):
         return '%01d:%02d:%02d.%02d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 100)
         return '%01d:%02d:%02d.%02d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 100)
 
 
-    def _get_subtitles(self, sub_path, video_id):
-        if not sub_path:
+    def _get_subtitles(self, sub_url, video_id):
+        if not sub_url:
             return None
             return None
 
 
         enc_subtitles = self._download_webpage(
         enc_subtitles = self._download_webpage(
-            urljoin(self._BASE_URL, sub_path),
-            video_id, 'Downloading subtitles location', fatal=False) or '{}'
+            sub_url, video_id, 'Downloading subtitles location', fatal=False) or '{}'
         subtitle_location = (self._parse_json(enc_subtitles, video_id, fatal=False) or {}).get('location')
         subtitle_location = (self._parse_json(enc_subtitles, video_id, fatal=False) or {}).get('location')
         if subtitle_location:
         if subtitle_location:
             enc_subtitles = self._download_webpage(
             enc_subtitles = self._download_webpage(
-                urljoin(self._BASE_URL, subtitle_location),
-                video_id, 'Downloading subtitles data', fatal=False,
-                headers={'Origin': 'https://animedigitalnetwork.fr'})
+                subtitle_location, video_id, 'Downloading subtitles data',
+                fatal=False, headers={'Origin': 'https://animedigitalnetwork.fr'})
         if not enc_subtitles:
         if not enc_subtitles:
             return None
             return None
 
 
         # http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
         # http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
         dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
         dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
             bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
             bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
-            bytes_to_intlist(binascii.unhexlify(self._K + '4b8ef13ec1872730')),
+            bytes_to_intlist(binascii.unhexlify(self._K + 'ab9f52f5baae7c72')),
             bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
             bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
         ))
         ))
         subtitles_json = self._parse_json(
         subtitles_json = self._parse_json(
@@ -117,61 +133,100 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
             }])
             }])
         return subtitles
         return subtitles
 
 
+    def _real_initialize(self):
+        username, password = self._get_login_info()
+        if not username:
+            return
+        try:
+            access_token = (self._download_json(
+                self._API_BASE_URL + 'authentication/login', None,
+                'Logging in', self._LOGIN_ERR_MESSAGE, fatal=False,
+                data=urlencode_postdata({
+                    'password': password,
+                    'rememberMe': False,
+                    'source': 'Web',
+                    'username': username,
+                })) or {}).get('accessToken')
+            if access_token:
+                self._HEADERS = {'authorization': 'Bearer ' + access_token}
+        except ExtractorError as e:
+            message = None
+            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+                resp = self._parse_json(
+                    e.cause.read().decode(), None, fatal=False) or {}
+                message = resp.get('message') or resp.get('code')
+            self.report_warning(message or self._LOGIN_ERR_MESSAGE)
+
     def _real_extract(self, url):
     def _real_extract(self, url):
         video_id = self._match_id(url)
         video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
-        player_config = self._parse_json(self._search_regex(
-            r'playerConfig\s*=\s*({.+});', webpage,
-            'player config', default='{}'), video_id, fatal=False)
-        if not player_config:
-            config_url = urljoin(self._BASE_URL, self._search_regex(
-                r'(?:id="player"|class="[^"]*adn-player-container[^"]*")[^>]+data-url="([^"]+)"',
-                webpage, 'config url'))
-            player_config = self._download_json(
-                config_url, video_id,
-                'Downloading player config JSON metadata')['player']
-
-        video_info = {}
-        video_info_str = self._search_regex(
-            r'videoInfo\s*=\s*({.+});', webpage,
-            'video info', fatal=False)
-        if video_info_str:
-            video_info = self._parse_json(
-                video_info_str, video_id, fatal=False) or {}
-
-        options = player_config.get('options') or {}
-        metas = options.get('metas') or {}
-        links = player_config.get('links') or {}
-        sub_path = player_config.get('subtitles')
-        error = None
-        if not links:
-            links_url = player_config.get('linksurl') or options['videoUrl']
-            token = options['token']
-            self._K = ''.join([random.choice('0123456789abcdef') for _ in range(16)])
-            message = bytes_to_intlist(json.dumps({
-                'k': self._K,
-                'e': 60,
-                't': token,
-            }))
+        video_base_url = self._PLAYER_BASE_URL + 'video/%s/' % video_id
+        player = self._download_json(
+            video_base_url + 'configuration', video_id,
+            'Downloading player config JSON metadata',
+            headers=self._HEADERS)['player']
+        options = player['options']
+
+        user = options['user']
+        if not user.get('hasAccess'):
+            self.raise_login_required()
+
+        token = self._download_json(
+            user.get('refreshTokenUrl') or (self._PLAYER_BASE_URL + 'refresh/token'),
+            video_id, 'Downloading access token', headers={
+                'x-player-refresh-token': user['refreshToken']
+            }, data=b'')['token']
+
+        links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link')
+        self._K = ''.join([random.choice('0123456789abcdef') for _ in range(16)])
+        message = bytes_to_intlist(json.dumps({
+            'k': self._K,
+            't': token,
+        }))
+
+        # Sometimes authentication fails for no good reason, retry with
+        # a different random padding
+        links_data = None
+        for _ in range(3):
             padded_message = intlist_to_bytes(pkcs1pad(message, 128))
             padded_message = intlist_to_bytes(pkcs1pad(message, 128))
             n, e = self._RSA_KEY
             n, e = self._RSA_KEY
             encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n))
             encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n))
             authorization = base64.b64encode(encrypted_message).decode()
             authorization = base64.b64encode(encrypted_message).decode()
-            links_data = self._download_json(
-                urljoin(self._BASE_URL, links_url), video_id,
-                'Downloading links JSON metadata', headers={
-                    'Authorization': 'Bearer ' + authorization,
-                })
-            links = links_data.get('links') or {}
-            metas = metas or links_data.get('meta') or {}
-            sub_path = sub_path or links_data.get('subtitles') or \
-                'index.php?option=com_vodapi&task=subtitles.getJSON&format=json&id=' + video_id
-            sub_path += '&token=' + token
-            error = links_data.get('error')
-        title = metas.get('title') or video_info['title']
+
+            try:
+                links_data = self._download_json(
+                    links_url, video_id, 'Downloading links JSON metadata', headers={
+                        'X-Player-Token': authorization
+                    }, query={
+                        'freeWithAds': 'true',
+                        'adaptive': 'false',
+                        'withMetadata': 'true',
+                        'source': 'Web'
+                    })
+                break
+            except ExtractorError as e:
+                if not isinstance(e.cause, compat_HTTPError):
+                    raise e
+
+                if e.cause.code == 401:
+                    # This usually goes away with a different random pkcs1pad, so retry
+                    continue
+
+                error = self._parse_json(e.cause.read(), video_id)
+                message = error.get('message')
+                if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country':
+                    self.raise_geo_restricted(msg=message)
+                raise ExtractorError(message)
+        else:
+            raise ExtractorError('Giving up retrying')
+
+        links = links_data.get('links') or {}
+        metas = links_data.get('metadata') or {}
+        sub_url = (links.get('subtitles') or {}).get('all')
+        video_info = links_data.get('video') or {}
+        title = metas['title']
 
 
         formats = []
         formats = []
-        for format_id, qualities in links.items():
+        for format_id, qualities in (links.get('streaming') or {}).items():
             if not isinstance(qualities, dict):
             if not isinstance(qualities, dict):
                 continue
                 continue
             for quality, load_balancer_url in qualities.items():
             for quality, load_balancer_url in qualities.items():
@@ -189,19 +244,26 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
                     for f in m3u8_formats:
                     for f in m3u8_formats:
                         f['language'] = 'fr'
                         f['language'] = 'fr'
                 formats.extend(m3u8_formats)
                 formats.extend(m3u8_formats)
-        if not error:
-            error = options.get('error')
-        if not formats and error:
-            raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
         self._sort_formats(formats)
         self._sort_formats(formats)
 
 
+        video = (self._download_json(
+            self._API_BASE_URL + 'video/%s' % video_id, video_id,
+            'Downloading additional video metadata', fatal=False) or {}).get('video') or {}
+        show = video.get('show') or {}
+
         return {
         return {
             'id': video_id,
             'id': video_id,
             'title': title,
             'title': title,
-            'description': strip_or_none(metas.get('summary') or video_info.get('resume')),
-            'thumbnail': video_info.get('image'),
+            'description': strip_or_none(metas.get('summary') or video.get('summary')),
+            'thumbnail': video_info.get('image') or player.get('image'),
             'formats': formats,
             'formats': formats,
-            'subtitles': self.extract_subtitles(sub_path, video_id),
-            'episode': metas.get('subtitle') or video_info.get('videoTitle'),
-            'series': video_info.get('playlistTitle'),
+            'subtitles': self.extract_subtitles(sub_url, video_id),
+            'episode': metas.get('subtitle') or video.get('name'),
+            'episode_number': int_or_none(video.get('shortNumber')),
+            'series': show.get('title'),
+            'season_number': int_or_none(video.get('season')),
+            'duration': int_or_none(video_info.get('duration') or video.get('duration')),
+            'release_date': unified_strdate(video.get('releaseDate')),
+            'average_rating': float_or_none(video.get('rating') or metas.get('rating')),
+            'comment_count': int_or_none(video.get('commentsCount')),
         }
         }

+ 2 - 2
youtube_dl/extractor/aenetworks.py

@@ -252,11 +252,11 @@ class AENetworksShowIE(AENetworksListBaseIE):
     _TESTS = [{
     _TESTS = [{
         'url': 'http://www.history.com/shows/ancient-aliens',
         'url': 'http://www.history.com/shows/ancient-aliens',
         'info_dict': {
         'info_dict': {
-            'id': 'SH012427480000',
+            'id': 'SERIES1574',
             'title': 'Ancient Aliens',
             'title': 'Ancient Aliens',
             'description': 'md5:3f6d74daf2672ff3ae29ed732e37ea7f',
             'description': 'md5:3f6d74daf2672ff3ae29ed732e37ea7f',
         },
         },
-        'playlist_mincount': 168,
+        'playlist_mincount': 150,
     }]
     }]
     _RESOURCE = 'series'
     _RESOURCE = 'series'
     _ITEMS_KEY = 'episodes'
     _ITEMS_KEY = 'episodes'

+ 32 - 9
youtube_dl/extractor/aljazeera.py

@@ -1,13 +1,16 @@
 from __future__ import unicode_literals
 from __future__ import unicode_literals
 
 
+import json
+import re
+
 from .common import InfoExtractor
 from .common import InfoExtractor
 
 
 
 
 class AlJazeeraIE(InfoExtractor):
 class AlJazeeraIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?aljazeera\.com/(?:programmes|video)/.*?/(?P<id>[^/]+)\.html'
+    _VALID_URL = r'https?://(?:www\.)?aljazeera\.com/(?P<type>program/[^/]+|(?:feature|video)s)/\d{4}/\d{1,2}/\d{1,2}/(?P<id>[^/?&#]+)'
 
 
     _TESTS = [{
     _TESTS = [{
-        'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html',
+        'url': 'https://www.aljazeera.com/program/episode/2014/9/19/deliverance',
         'info_dict': {
         'info_dict': {
             'id': '3792260579001',
             'id': '3792260579001',
             'ext': 'mp4',
             'ext': 'mp4',
@@ -20,14 +23,34 @@ class AlJazeeraIE(InfoExtractor):
         'add_ie': ['BrightcoveNew'],
         'add_ie': ['BrightcoveNew'],
         'skip': 'Not accessible from Travis CI server',
         'skip': 'Not accessible from Travis CI server',
     }, {
     }, {
-        'url': 'http://www.aljazeera.com/video/news/2017/05/sierra-leone-709-carat-diamond-auctioned-170511100111930.html',
+        'url': 'https://www.aljazeera.com/videos/2017/5/11/sierra-leone-709-carat-diamond-to-be-auctioned-off',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.aljazeera.com/features/2017/8/21/transforming-pakistans-buses-into-art',
         'only_matching': True,
         'only_matching': True,
     }]
     }]
-    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/665003303001/default_default/index.html?videoId=%s'
+    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
-        program_name = self._match_id(url)
-        webpage = self._download_webpage(url, program_name)
-        brightcove_id = self._search_regex(
-            r'RenderPagesVideo\(\'(.+?)\'', webpage, 'brightcove id')
-        return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
+        post_type, name = re.match(self._VALID_URL, url).groups()
+        post_type = {
+            'features': 'post',
+            'program': 'episode',
+            'videos': 'video',
+        }[post_type.split('/')[0]]
+        video = self._download_json(
+            'https://www.aljazeera.com/graphql', name, query={
+                'operationName': 'SingleArticleQuery',
+                'variables': json.dumps({
+                    'name': name,
+                    'postType': post_type,
+                }),
+            }, headers={
+                'wp-site': 'aje',
+            })['data']['article']['video']
+        video_id = video['id']
+        account_id = video.get('accountId') or '665003303001'
+        player_id = video.get('playerId') or 'BkeSH5BDb'
+        return self.url_result(
+            self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id),
+            'BrightcoveNew', video_id)

+ 95 - 2
youtube_dl/extractor/americastestkitchen.py

@@ -1,13 +1,16 @@
 # coding: utf-8
 # coding: utf-8
 from __future__ import unicode_literals
 from __future__ import unicode_literals
 
 
+import json
 import re
 import re
 
 
 from .common import InfoExtractor
 from .common import InfoExtractor
 from ..utils import (
 from ..utils import (
     clean_html,
     clean_html,
+    int_or_none,
     try_get,
     try_get,
     unified_strdate,
     unified_strdate,
+    unified_timestamp,
 )
 )
 
 
 
 
@@ -22,8 +25,8 @@ class AmericasTestKitchenIE(InfoExtractor):
             'ext': 'mp4',
             'ext': 'mp4',
             'description': 'md5:64e606bfee910627efc4b5f050de92b3',
             'description': 'md5:64e606bfee910627efc4b5f050de92b3',
             'thumbnail': r're:^https?://',
             'thumbnail': r're:^https?://',
-            'timestamp': 1523664000,
-            'upload_date': '20180414',
+            'timestamp': 1523318400,
+            'upload_date': '20180410',
             'release_date': '20180410',
             'release_date': '20180410',
             'series': "America's Test Kitchen",
             'series': "America's Test Kitchen",
             'season_number': 18,
             'season_number': 18,
@@ -33,6 +36,27 @@ class AmericasTestKitchenIE(InfoExtractor):
         'params': {
         'params': {
             'skip_download': True,
             'skip_download': True,
         },
         },
+    }, {
+        # Metadata parsing behaves differently for newer episodes (705) as opposed to older episodes (582 above)
+        'url': 'https://www.americastestkitchen.com/episode/705-simple-chicken-dinner',
+        'md5': '06451608c57651e985a498e69cec17e5',
+        'info_dict': {
+            'id': '5fbe8c61bda2010001c6763b',
+            'title': 'Simple Chicken Dinner',
+            'ext': 'mp4',
+            'description': 'md5:eb68737cc2fd4c26ca7db30139d109e7',
+            'thumbnail': r're:^https?://',
+            'timestamp': 1610755200,
+            'upload_date': '20210116',
+            'release_date': '20210116',
+            'series': "America's Test Kitchen",
+            'season_number': 21,
+            'episode': 'Simple Chicken Dinner',
+            'episode_number': 3,
+        },
+        'params': {
+            'skip_download': True,
+        },
     }, {
     }, {
         'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
         'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
         'only_matching': True,
         'only_matching': True,
@@ -60,7 +84,76 @@ class AmericasTestKitchenIE(InfoExtractor):
             'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % video['zypeId'],
             'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % video['zypeId'],
             'ie_key': 'Zype',
             'ie_key': 'Zype',
             'description': clean_html(video.get('description')),
             'description': clean_html(video.get('description')),
+            'timestamp': unified_timestamp(video.get('publishDate')),
             'release_date': unified_strdate(video.get('publishDate')),
             'release_date': unified_strdate(video.get('publishDate')),
+            'episode_number': int_or_none(episode.get('number')),
+            'season_number': int_or_none(episode.get('season')),
             'series': try_get(episode, lambda x: x['show']['title']),
             'series': try_get(episode, lambda x: x['show']['title']),
             'episode': episode.get('title'),
             'episode': episode.get('title'),
         }
         }
+
+
+class AmericasTestKitchenSeasonIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?(?P<show>americastestkitchen|cookscountry)\.com/episodes/browse/season_(?P<id>\d+)'
+    _TESTS = [{
+        # ATK Season
+        'url': 'https://www.americastestkitchen.com/episodes/browse/season_1',
+        'info_dict': {
+            'id': 'season_1',
+            'title': 'Season 1',
+        },
+        'playlist_count': 13,
+    }, {
+        # Cooks Country Season
+        'url': 'https://www.cookscountry.com/episodes/browse/season_12',
+        'info_dict': {
+            'id': 'season_12',
+            'title': 'Season 12',
+        },
+        'playlist_count': 13,
+    }]
+
+    def _real_extract(self, url):
+        show_name, season_number = re.match(self._VALID_URL, url).groups()
+        season_number = int(season_number)
+
+        slug = 'atk' if show_name == 'americastestkitchen' else 'cco'
+
+        season = 'Season %d' % season_number
+
+        season_search = self._download_json(
+            'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug,
+            season, headers={
+                'Origin': 'https://www.%s.com' % show_name,
+                'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
+                'X-Algolia-Application-Id': 'Y1FNZXUI30',
+            }, query={
+                'facetFilters': json.dumps([
+                    'search_season_list:' + season,
+                    'search_document_klass:episode',
+                    'search_show_slug:' + slug,
+                ]),
+                'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title' % slug,
+                'attributesToHighlight': '',
+                'hitsPerPage': 1000,
+            })
+
+        def entries():
+            for episode in (season_search.get('hits') or []):
+                search_url = episode.get('search_url')
+                if not search_url:
+                    continue
+                yield {
+                    '_type': 'url',
+                    'url': 'https://www.%s.com%s' % (show_name, search_url),
+                    'id': try_get(episode, lambda e: e['objectID'].split('_')[-1]),
+                    'title': episode.get('title'),
+                    'description': episode.get('description'),
+                    'timestamp': unified_timestamp(episode.get('search_document_date')),
+                    'season_number': season_number,
+                    'episode_number': int_or_none(episode.get('search_%s_episode_number' % slug)),
+                    'ie_key': AmericasTestKitchenIE.ie_key(),
+                }
+
+        return self.playlist_result(
+            entries(), 'season_%d' % season_number, season)

+ 2 - 1
youtube_dl/extractor/amp.py

@@ -8,6 +8,7 @@ from ..utils import (
     int_or_none,
     int_or_none,
     mimetype2ext,
     mimetype2ext,
     parse_iso8601,
     parse_iso8601,
+    unified_timestamp,
     url_or_none,
     url_or_none,
 )
 )
 
 
@@ -88,7 +89,7 @@ class AMPIE(InfoExtractor):
 
 
         self._sort_formats(formats)
         self._sort_formats(formats)
 
 
-        timestamp = parse_iso8601(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date'))
+        timestamp = unified_timestamp(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date'))
 
 
         return {
         return {
             'id': video_id,
             'id': video_id,

+ 16 - 10
youtube_dl/extractor/animeondemand.py

@@ -116,8 +116,6 @@ class AnimeOnDemandIE(InfoExtractor):
             r'(?s)<div[^>]+itemprop="description"[^>]*>(.+?)</div>',
             r'(?s)<div[^>]+itemprop="description"[^>]*>(.+?)</div>',
             webpage, 'anime description', default=None)
             webpage, 'anime description', default=None)
 
 
-        entries = []
-
         def extract_info(html, video_id, num=None):
         def extract_info(html, video_id, num=None):
             title, description = [None] * 2
             title, description = [None] * 2
             formats = []
             formats = []
@@ -233,7 +231,7 @@ class AnimeOnDemandIE(InfoExtractor):
                 self._sort_formats(info['formats'])
                 self._sort_formats(info['formats'])
                 f = common_info.copy()
                 f = common_info.copy()
                 f.update(info)
                 f.update(info)
-                entries.append(f)
+                yield f
 
 
             # Extract teaser/trailer only when full episode is not available
             # Extract teaser/trailer only when full episode is not available
             if not info['formats']:
             if not info['formats']:
@@ -247,7 +245,7 @@ class AnimeOnDemandIE(InfoExtractor):
                         'title': m.group('title'),
                         'title': m.group('title'),
                         'url': urljoin(url, m.group('href')),
                         'url': urljoin(url, m.group('href')),
                     })
                     })
-                    entries.append(f)
+                    yield f
 
 
         def extract_episodes(html):
         def extract_episodes(html):
             for num, episode_html in enumerate(re.findall(
             for num, episode_html in enumerate(re.findall(
@@ -275,7 +273,8 @@ class AnimeOnDemandIE(InfoExtractor):
                     'episode_number': episode_number,
                     'episode_number': episode_number,
                 }
                 }
 
 
-                extract_entries(episode_html, video_id, common_info)
+                for e in extract_entries(episode_html, video_id, common_info):
+                    yield e
 
 
         def extract_film(html, video_id):
         def extract_film(html, video_id):
             common_info = {
             common_info = {
@@ -283,11 +282,18 @@ class AnimeOnDemandIE(InfoExtractor):
                 'title': anime_title,
                 'title': anime_title,
                 'description': anime_description,
                 'description': anime_description,
             }
             }
-            extract_entries(html, video_id, common_info)
+            for e in extract_entries(html, video_id, common_info):
+                yield e
 
 
-        extract_episodes(webpage)
+        def entries():
+            has_episodes = False
+            for e in extract_episodes(webpage):
+                has_episodes = True
+                yield e
 
 
-        if not entries:
-            extract_film(webpage, anime_id)
+            if not has_episodes:
+                for e in extract_film(webpage, anime_id):
+                    yield e
 
 
-        return self.playlist_result(entries, anime_id, anime_title, anime_description)
+        return self.playlist_result(
+            entries(), anime_id, anime_title, anime_description)

+ 9 - 3
youtube_dl/extractor/aol.py

@@ -3,7 +3,7 @@ from __future__ import unicode_literals
 
 
 import re
 import re
 
 
-from .common import InfoExtractor
+from .yahoo import YahooIE
 from ..compat import (
 from ..compat import (
     compat_parse_qs,
     compat_parse_qs,
     compat_urllib_parse_urlparse,
     compat_urllib_parse_urlparse,
@@ -15,9 +15,9 @@ from ..utils import (
 )
 )
 
 
 
 
-class AolIE(InfoExtractor):
+class AolIE(YahooIE):
     IE_NAME = 'aol.com'
     IE_NAME = 'aol.com'
-    _VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>[0-9a-f]+)'
+    _VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>\d{9}|[0-9a-f]{24}|[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12})'
 
 
     _TESTS = [{
     _TESTS = [{
         # video with 5min ID
         # video with 5min ID
@@ -76,10 +76,16 @@ class AolIE(InfoExtractor):
     }, {
     }, {
         'url': 'https://www.aol.jp/video/playlist/5a28e936a1334d000137da0c/5a28f3151e642219fde19831/',
         'url': 'https://www.aol.jp/video/playlist/5a28e936a1334d000137da0c/5a28f3151e642219fde19831/',
         'only_matching': True,
         'only_matching': True,
+    }, {
+        # Yahoo video
+        'url': 'https://www.aol.com/video/play/991e6700-ac02-11ea-99ff-357400036f61/24bbc846-3e30-3c46-915e-fe8ccd7fcc46/',
+        'only_matching': True,
     }]
     }]
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
         video_id = self._match_id(url)
         video_id = self._match_id(url)
+        if '-' in video_id:
+            return self._extract_yahoo_video(video_id, 'us')
 
 
         response = self._download_json(
         response = self._download_json(
             'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/%s/details' % video_id,
             'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/%s/details' % video_id,

+ 24 - 23
youtube_dl/extractor/apa.py

@@ -6,25 +6,21 @@ import re
 from .common import InfoExtractor
 from .common import InfoExtractor
 from ..utils import (
 from ..utils import (
     determine_ext,
     determine_ext,
-    js_to_json,
+    int_or_none,
     url_or_none,
     url_or_none,
 )
 )
 
 
 
 
 class APAIE(InfoExtractor):
 class APAIE(InfoExtractor):
-    _VALID_URL = r'https?://[^/]+\.apa\.at/embed/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+    _VALID_URL = r'(?P<base_url>https?://[^/]+\.apa\.at)/embed/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
     _TESTS = [{
     _TESTS = [{
         'url': 'http://uvp.apa.at/embed/293f6d17-692a-44e3-9fd5-7b178f3a1029',
         'url': 'http://uvp.apa.at/embed/293f6d17-692a-44e3-9fd5-7b178f3a1029',
         'md5': '2b12292faeb0a7d930c778c7a5b4759b',
         'md5': '2b12292faeb0a7d930c778c7a5b4759b',
         'info_dict': {
         'info_dict': {
-            'id': 'jjv85FdZ',
+            'id': '293f6d17-692a-44e3-9fd5-7b178f3a1029',
             'ext': 'mp4',
             'ext': 'mp4',
-            'title': '"Blau ist mysteriös": Die Blue Man Group im Interview',
-            'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+            'title': '293f6d17-692a-44e3-9fd5-7b178f3a1029',
             'thumbnail': r're:^https?://.*\.jpg$',
             'thumbnail': r're:^https?://.*\.jpg$',
-            'duration': 254,
-            'timestamp': 1519211149,
-            'upload_date': '20180221',
         },
         },
     }, {
     }, {
         'url': 'https://uvp-apapublisher.sf.apa.at/embed/2f94e9e6-d945-4db2-9548-f9a41ebf7b78',
         'url': 'https://uvp-apapublisher.sf.apa.at/embed/2f94e9e6-d945-4db2-9548-f9a41ebf7b78',
@@ -46,9 +42,11 @@ class APAIE(InfoExtractor):
                 webpage)]
                 webpage)]
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
-        video_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
+        video_id, base_url = mobj.group('id', 'base_url')
 
 
-        webpage = self._download_webpage(url, video_id)
+        webpage = self._download_webpage(
+            '%s/player/%s' % (base_url, video_id), video_id)
 
 
         jwplatform_id = self._search_regex(
         jwplatform_id = self._search_regex(
             r'media[iI]d\s*:\s*["\'](?P<id>[a-zA-Z0-9]{8})', webpage,
             r'media[iI]d\s*:\s*["\'](?P<id>[a-zA-Z0-9]{8})', webpage,
@@ -59,16 +57,18 @@ class APAIE(InfoExtractor):
                 'jwplatform:' + jwplatform_id, ie='JWPlatform',
                 'jwplatform:' + jwplatform_id, ie='JWPlatform',
                 video_id=video_id)
                 video_id=video_id)
 
 
-        sources = self._parse_json(
-            self._search_regex(
-                r'sources\s*=\s*(\[.+?\])\s*;', webpage, 'sources'),
-            video_id, transform_source=js_to_json)
+        def extract(field, name=None):
+            return self._search_regex(
+                r'\b%s["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % field,
+                webpage, name or field, default=None, group='value')
+
+        title = extract('title') or video_id
+        description = extract('description')
+        thumbnail = extract('poster', 'thumbnail')
 
 
         formats = []
         formats = []
-        for source in sources:
-            if not isinstance(source, dict):
-                continue
-            source_url = url_or_none(source.get('file'))
+        for format_id in ('hls', 'progressive'):
+            source_url = url_or_none(extract(format_id))
             if not source_url:
             if not source_url:
                 continue
                 continue
             ext = determine_ext(source_url)
             ext = determine_ext(source_url)
@@ -77,18 +77,19 @@ class APAIE(InfoExtractor):
                     source_url, video_id, 'mp4', entry_protocol='m3u8_native',
                     source_url, video_id, 'mp4', entry_protocol='m3u8_native',
                     m3u8_id='hls', fatal=False))
                     m3u8_id='hls', fatal=False))
             else:
             else:
+                height = int_or_none(self._search_regex(
+                    r'(\d+)\.mp4', source_url, 'height', default=None))
                 formats.append({
                 formats.append({
                     'url': source_url,
                     'url': source_url,
+                    'format_id': format_id,
+                    'height': height,
                 })
                 })
         self._sort_formats(formats)
         self._sort_formats(formats)
 
 
-        thumbnail = self._search_regex(
-            r'image\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
-            'thumbnail', fatal=False, group='url')
-
         return {
         return {
             'id': video_id,
             'id': video_id,
-            'title': video_id,
+            'title': title,
+            'description': description,
             'thumbnail': thumbnail,
             'thumbnail': thumbnail,
             'formats': formats,
             'formats': formats,
         }
         }

+ 8 - 5
youtube_dl/extractor/appleconnect.py

@@ -9,10 +9,10 @@ from ..utils import (
 
 
 
 
 class AppleConnectIE(InfoExtractor):
 class AppleConnectIE(InfoExtractor):
-    _VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/idsa\.(?P<id>[\w-]+)'
-    _TEST = {
+    _VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/(?:id)?sa\.(?P<id>[\w-]+)'
+    _TESTS = [{
         'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
         'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
-        'md5': 'e7c38568a01ea45402570e6029206723',
+        'md5': 'c1d41f72c8bcaf222e089434619316e4',
         'info_dict': {
         'info_dict': {
             'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
             'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
             'ext': 'm4v',
             'ext': 'm4v',
@@ -22,7 +22,10 @@ class AppleConnectIE(InfoExtractor):
             'upload_date': '20150710',
             'upload_date': '20150710',
             'timestamp': 1436545535,
             'timestamp': 1436545535,
         },
         },
-    }
+    }, {
+        'url': 'https://itunes.apple.com/us/post/sa.0fe0229f-2457-11e5-9f40-1bb645f2d5d9',
+        'only_matching': True,
+    }]
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
         video_id = self._match_id(url)
         video_id = self._match_id(url)
@@ -36,7 +39,7 @@ class AppleConnectIE(InfoExtractor):
 
 
         video_data = self._parse_json(video_json, video_id)
         video_data = self._parse_json(video_json, video_id)
         timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp'))
         timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp'))
-        like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count'))
+        like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count', default=None))
 
 
         return {
         return {
             'id': video_id,
             'id': video_id,

+ 1 - 0
youtube_dl/extractor/applepodcasts.py

@@ -42,6 +42,7 @@ class ApplePodcastsIE(InfoExtractor):
         ember_data = self._parse_json(self._search_regex(
         ember_data = self._parse_json(self._search_regex(
             r'id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
             r'id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
             webpage, 'ember data'), episode_id)
             webpage, 'ember data'), episode_id)
+        ember_data = ember_data.get(episode_id) or ember_data
         episode = ember_data['data']['attributes']
         episode = ember_data['data']['attributes']
         description = episode.get('description') or {}
         description = episode.get('description') or {}
 
 

+ 42 - 12
youtube_dl/extractor/archiveorg.py

@@ -2,15 +2,17 @@ from __future__ import unicode_literals
 
 
 from .common import InfoExtractor
 from .common import InfoExtractor
 from ..utils import (
 from ..utils import (
-    unified_strdate,
     clean_html,
     clean_html,
+    extract_attributes,
+    unified_strdate,
+    unified_timestamp,
 )
 )
 
 
 
 
 class ArchiveOrgIE(InfoExtractor):
 class ArchiveOrgIE(InfoExtractor):
     IE_NAME = 'archive.org'
     IE_NAME = 'archive.org'
     IE_DESC = 'archive.org videos'
     IE_DESC = 'archive.org videos'
-    _VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^/?#]+)(?:[?].*)?$'
+    _VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^/?#&]+)'
     _TESTS = [{
     _TESTS = [{
         'url': 'http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
         'url': 'http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
         'md5': '8af1d4cf447933ed3c7f4871162602db',
         'md5': '8af1d4cf447933ed3c7f4871162602db',
@@ -19,8 +21,11 @@ class ArchiveOrgIE(InfoExtractor):
             'ext': 'ogg',
             'ext': 'ogg',
             'title': '1968 Demo - FJCC Conference Presentation Reel #1',
             'title': '1968 Demo - FJCC Conference Presentation Reel #1',
             'description': 'md5:da45c349df039f1cc8075268eb1b5c25',
             'description': 'md5:da45c349df039f1cc8075268eb1b5c25',
-            'upload_date': '19681210',
-            'uploader': 'SRI International'
+            'creator': 'SRI International',
+            'release_date': '19681210',
+            'uploader': 'SRI International',
+            'timestamp': 1268695290,
+            'upload_date': '20100315',
         }
         }
     }, {
     }, {
         'url': 'https://archive.org/details/Cops1922',
         'url': 'https://archive.org/details/Cops1922',
@@ -29,22 +34,43 @@ class ArchiveOrgIE(InfoExtractor):
             'id': 'Cops1922',
             'id': 'Cops1922',
             'ext': 'mp4',
             'ext': 'mp4',
             'title': 'Buster Keaton\'s "Cops" (1922)',
             'title': 'Buster Keaton\'s "Cops" (1922)',
-            'description': 'md5:89e7c77bf5d965dd5c0372cfb49470f6',
+            'description': 'md5:43a603fd6c5b4b90d12a96b921212b9c',
+            'timestamp': 1387699629,
+            'upload_date': '20131222',
         }
         }
     }, {
     }, {
         'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
         'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
         'only_matching': True,
         'only_matching': True,
+    }, {
+        'url': 'https://archive.org/details/MSNBCW_20131125_040000_To_Catch_a_Predator/',
+        'only_matching': True,
     }]
     }]
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
         video_id = self._match_id(url)
         video_id = self._match_id(url)
         webpage = self._download_webpage(
         webpage = self._download_webpage(
             'http://archive.org/embed/' + video_id, video_id)
             'http://archive.org/embed/' + video_id, video_id)
-        jwplayer_playlist = self._parse_json(self._search_regex(
-            r"(?s)Play\('[^']+'\s*,\s*(\[.+\])\s*,\s*{.*?}\)",
-            webpage, 'jwplayer playlist'), video_id)
-        info = self._parse_jwplayer_data(
-            {'playlist': jwplayer_playlist}, video_id, base_url=url)
+
+        playlist = None
+        play8 = self._search_regex(
+            r'(<[^>]+\bclass=["\']js-play8-playlist[^>]+>)', webpage,
+            'playlist', default=None)
+        if play8:
+            attrs = extract_attributes(play8)
+            playlist = attrs.get('value')
+        if not playlist:
+            # Old jwplayer fallback
+            playlist = self._search_regex(
+                r"(?s)Play\('[^']+'\s*,\s*(\[.+\])\s*,\s*{.*?}\)",
+                webpage, 'jwplayer playlist', default='[]')
+        jwplayer_playlist = self._parse_json(playlist, video_id, fatal=False)
+        if jwplayer_playlist:
+            info = self._parse_jwplayer_data(
+                {'playlist': jwplayer_playlist}, video_id, base_url=url)
+        else:
+            # HTML5 media fallback
+            info = self._parse_html5_media_entries(url, webpage, video_id)[0]
+            info['id'] = video_id
 
 
         def get_optional(metadata, field):
         def get_optional(metadata, field):
             return metadata.get(field, [None])[0]
             return metadata.get(field, [None])[0]
@@ -58,8 +84,12 @@ class ArchiveOrgIE(InfoExtractor):
             'description': clean_html(get_optional(metadata, 'description')),
             'description': clean_html(get_optional(metadata, 'description')),
         })
         })
         if info.get('_type') != 'playlist':
         if info.get('_type') != 'playlist':
+            creator = get_optional(metadata, 'creator')
             info.update({
             info.update({
-                'uploader': get_optional(metadata, 'creator'),
-                'upload_date': unified_strdate(get_optional(metadata, 'date')),
+                'creator': creator,
+                'release_date': unified_strdate(get_optional(metadata, 'date')),
+                'uploader': get_optional(metadata, 'publisher') or creator,
+                'timestamp': unified_timestamp(get_optional(metadata, 'publicdate')),
+                'language': get_optional(metadata, 'language'),
             })
             })
         return info
         return info

+ 74 - 44
youtube_dl/extractor/ard.py

@@ -187,13 +187,13 @@ class ARDMediathekIE(ARDMediathekBaseIE):
             if doc.tag == 'rss':
             if doc.tag == 'rss':
                 return GenericIE()._extract_rss(url, video_id, doc)
                 return GenericIE()._extract_rss(url, video_id, doc)
 
 
-        title = self._html_search_regex(
+        title = self._og_search_title(webpage, default=None) or self._html_search_regex(
             [r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
             [r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
              r'<meta name="dcterms\.title" content="(.*?)"/>',
              r'<meta name="dcterms\.title" content="(.*?)"/>',
              r'<h4 class="headline">(.*?)</h4>',
              r'<h4 class="headline">(.*?)</h4>',
              r'<title[^>]*>(.*?)</title>'],
              r'<title[^>]*>(.*?)</title>'],
             webpage, 'title')
             webpage, 'title')
-        description = self._html_search_meta(
+        description = self._og_search_description(webpage, default=None) or self._html_search_meta(
             'dcterms.abstract', webpage, 'description', default=None)
             'dcterms.abstract', webpage, 'description', default=None)
         if description is None:
         if description is None:
             description = self._html_search_meta(
             description = self._html_search_meta(
@@ -249,31 +249,40 @@ class ARDMediathekIE(ARDMediathekBaseIE):
 
 
 
 
 class ARDIE(InfoExtractor):
 class ARDIE(InfoExtractor):
-    _VALID_URL = r'(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos(?:extern)?/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
+    _VALID_URL = r'(?P<mainurl>https?://(?:www\.)?daserste\.de/(?:[^/?#&]+/)+(?P<id>[^/?#&]+))\.html'
     _TESTS = [{
     _TESTS = [{
-        # available till 14.02.2019
-        'url': 'http://www.daserste.de/information/talk/maischberger/videos/das-groko-drama-zerlegen-sich-die-volksparteien-video-102.html',
-        'md5': '8e4ec85f31be7c7fc08a26cdbc5a1f49',
+        # available till 7.01.2022
+        'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-die-woche-video100.html',
+        'md5': '867d8aa39eeaf6d76407c5ad1bb0d4c1',
         'info_dict': {
         'info_dict': {
-            'display_id': 'das-groko-drama-zerlegen-sich-die-volksparteien-video',
-            'id': '102',
+            'id': 'maischberger-die-woche-video100',
+            'display_id': 'maischberger-die-woche-video100',
             'ext': 'mp4',
             'ext': 'mp4',
-            'duration': 4435.0,
-            'title': 'Das GroKo-Drama: Zerlegen sich die Volksparteien?',
-            'upload_date': '20180214',
+            'duration': 3687.0,
+            'title': 'maischberger. die woche vom 7. Januar 2021',
+            'upload_date': '20210107',
             'thumbnail': r're:^https?://.*\.jpg$',
             'thumbnail': r're:^https?://.*\.jpg$',
         },
         },
     }, {
     }, {
-        'url': 'https://www.daserste.de/information/reportage-dokumentation/erlebnis-erde/videosextern/woelfe-und-herdenschutzhunde-ungleiche-brueder-102.html',
+        'url': 'https://www.daserste.de/information/politik-weltgeschehen/morgenmagazin/videosextern/dominik-kahun-aus-der-nhl-direkt-zur-weltmeisterschaft-100.html',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.daserste.de/information/nachrichten-wetter/tagesthemen/videosextern/tagesthemen-17736.html',
         'only_matching': True,
         'only_matching': True,
     }, {
     }, {
         'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
         'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
         'only_matching': True,
         'only_matching': True,
+    }, {
+        'url': 'https://www.daserste.de/unterhaltung/serie/in-aller-freundschaft-die-jungen-aerzte/Drehpause-100.html',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.daserste.de/unterhaltung/film/filmmittwoch-im-ersten/videos/making-ofwendezeit-video-100.html',
+        'only_matching': True,
     }]
     }]
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         mobj = re.match(self._VALID_URL, url)
-        display_id = mobj.group('display_id')
+        display_id = mobj.group('id')
 
 
         player_url = mobj.group('mainurl') + '~playerXml.xml'
         player_url = mobj.group('mainurl') + '~playerXml.xml'
         doc = self._download_xml(player_url, display_id)
         doc = self._download_xml(player_url, display_id)
@@ -284,25 +293,47 @@ class ARDIE(InfoExtractor):
 
 
         formats = []
         formats = []
         for a in video_node.findall('.//asset'):
         for a in video_node.findall('.//asset'):
+            file_name = xpath_text(a, './fileName', default=None)
+            if not file_name:
+                continue
+            format_type = a.attrib.get('type')
+            format_url = url_or_none(file_name)
+            if format_url:
+                ext = determine_ext(file_name)
+                if ext == 'm3u8':
+                    formats.extend(self._extract_m3u8_formats(
+                        format_url, display_id, 'mp4', entry_protocol='m3u8_native',
+                        m3u8_id=format_type or 'hls', fatal=False))
+                    continue
+                elif ext == 'f4m':
+                    formats.extend(self._extract_f4m_formats(
+                        update_url_query(format_url, {'hdcore': '3.7.0'}),
+                        display_id, f4m_id=format_type or 'hds', fatal=False))
+                    continue
             f = {
             f = {
-                'format_id': a.attrib['type'],
-                'width': int_or_none(a.find('./frameWidth').text),
-                'height': int_or_none(a.find('./frameHeight').text),
-                'vbr': int_or_none(a.find('./bitrateVideo').text),
-                'abr': int_or_none(a.find('./bitrateAudio').text),
-                'vcodec': a.find('./codecVideo').text,
-                'tbr': int_or_none(a.find('./totalBitrate').text),
+                'format_id': format_type,
+                'width': int_or_none(xpath_text(a, './frameWidth')),
+                'height': int_or_none(xpath_text(a, './frameHeight')),
+                'vbr': int_or_none(xpath_text(a, './bitrateVideo')),
+                'abr': int_or_none(xpath_text(a, './bitrateAudio')),
+                'vcodec': xpath_text(a, './codecVideo'),
+                'tbr': int_or_none(xpath_text(a, './totalBitrate')),
             }
             }
-            if a.find('./serverPrefix').text:
-                f['url'] = a.find('./serverPrefix').text
-                f['playpath'] = a.find('./fileName').text
+            server_prefix = xpath_text(a, './serverPrefix', default=None)
+            if server_prefix:
+                f.update({
+                    'url': server_prefix,
+                    'playpath': file_name,
+                })
             else:
             else:
-                f['url'] = a.find('./fileName').text
+                if not format_url:
+                    continue
+                f['url'] = format_url
             formats.append(f)
             formats.append(f)
         self._sort_formats(formats)
         self._sort_formats(formats)
 
 
         return {
         return {
-            'id': mobj.group('id'),
+            'id': xpath_text(video_node, './videoId', default=display_id),
             'formats': formats,
             'formats': formats,
             'display_id': display_id,
             'display_id': display_id,
             'title': video_node.find('./title').text,
             'title': video_node.find('./title').text,
@@ -313,19 +344,19 @@ class ARDIE(InfoExtractor):
 
 
 
 
 class ARDBetaMediathekIE(ARDMediathekBaseIE):
 class ARDBetaMediathekIE(ARDMediathekBaseIE):
-    _VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(?P<client>[^/]+)/(?:player|live|video)/(?P<display_id>(?:[^/]+/)*)(?P<video_id>[a-zA-Z0-9]+)'
+    _VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(?:[^/]+/)?(?:player|live|video)/(?:[^/]+/)*(?P<id>Y3JpZDovL[a-zA-Z0-9]+)'
     _TESTS = [{
     _TESTS = [{
-        'url': 'https://ardmediathek.de/ard/video/die-robuste-roswita/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
-        'md5': 'dfdc87d2e7e09d073d5a80770a9ce88f',
+        'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/',
+        'md5': 'a1dc75a39c61601b980648f7c9f9f71d',
         'info_dict': {
         'info_dict': {
             'display_id': 'die-robuste-roswita',
             'display_id': 'die-robuste-roswita',
-            'id': '70153354',
+            'id': '78566716',
             'title': 'Die robuste Roswita',
             'title': 'Die robuste Roswita',
-            'description': r're:^Der Mord.*trüber ist als die Ilm.',
+            'description': r're:^Der Mord.*totgeglaubte Ehefrau Roswita',
             'duration': 5316,
             'duration': 5316,
-            'thumbnail': 'https://img.ardmediathek.de/standard/00/70/15/33/90/-1852531467/16x9/960?mandant=ard',
-            'timestamp': 1577047500,
-            'upload_date': '20191222',
+            'thumbnail': 'https://img.ardmediathek.de/standard/00/78/56/67/84/575672121/16x9/960?mandant=ard',
+            'timestamp': 1596658200,
+            'upload_date': '20200805',
             'ext': 'mp4',
             'ext': 'mp4',
         },
         },
     }, {
     }, {
@@ -343,22 +374,22 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
     }, {
     }, {
         'url': 'https://www.ardmediathek.de/swr/live/Y3JpZDovL3N3ci5kZS8xMzQ4MTA0Mg',
         'url': 'https://www.ardmediathek.de/swr/live/Y3JpZDovL3N3ci5kZS8xMzQ4MTA0Mg',
         'only_matching': True,
         'only_matching': True,
+    }, {
+        'url': 'https://www.ardmediathek.de/video/coronavirus-update-ndr-info/astrazeneca-kurz-lockdown-und-pims-syndrom-81/ndr/Y3JpZDovL25kci5kZS84NzE0M2FjNi0wMWEwLTQ5ODEtOTE5NS1mOGZhNzdhOTFmOTI/',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3dkci5kZS9CZWl0cmFnLWQ2NDJjYWEzLTMwZWYtNGI4NS1iMTI2LTU1N2UxYTcxOGIzOQ/tatort-duo-koeln-leipzig-ihr-kinderlein-kommet',
+        'only_matching': True,
     }]
     }]
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('video_id')
-        display_id = mobj.group('display_id')
-        if display_id:
-            display_id = display_id.rstrip('/')
-        if not display_id:
-            display_id = video_id
+        video_id = self._match_id(url)
 
 
         player_page = self._download_json(
         player_page = self._download_json(
             'https://api.ardmediathek.de/public-gateway',
             'https://api.ardmediathek.de/public-gateway',
-            display_id, data=json.dumps({
+            video_id, data=json.dumps({
                 'query': '''{
                 'query': '''{
-  playerPage(client:"%s", clipId: "%s") {
+  playerPage(client: "ard", clipId: "%s") {
     blockedByFsk
     blockedByFsk
     broadcastedOn
     broadcastedOn
     maturityContentRating
     maturityContentRating
@@ -388,7 +419,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
       }
       }
     }
     }
   }
   }
-}''' % (mobj.group('client'), video_id),
+}''' % video_id,
             }).encode(), headers={
             }).encode(), headers={
                 'Content-Type': 'application/json'
                 'Content-Type': 'application/json'
             })['data']['playerPage']
             })['data']['playerPage']
@@ -413,7 +444,6 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
                 r'\(FSK\s*(\d+)\)\s*$', description, 'age limit', default=None))
                 r'\(FSK\s*(\d+)\)\s*$', description, 'age limit', default=None))
         info.update({
         info.update({
             'age_limit': age_limit,
             'age_limit': age_limit,
-            'display_id': display_id,
             'title': title,
             'title': title,
             'description': description,
             'description': description,
             'timestamp': unified_timestamp(player_page.get('broadcastedOn')),
             'timestamp': unified_timestamp(player_page.get('broadcastedOn')),

+ 101 - 0
youtube_dl/extractor/arnes.py

@@ -0,0 +1,101 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+    compat_parse_qs,
+    compat_urllib_parse_urlparse,
+)
+from ..utils import (
+    float_or_none,
+    int_or_none,
+    parse_iso8601,
+    remove_start,
+)
+
+
+class ArnesIE(InfoExtractor):
+    IE_NAME = 'video.arnes.si'
+    IE_DESC = 'Arnes Video'
+    _VALID_URL = r'https?://video\.arnes\.si/(?:[a-z]{2}/)?(?:watch|embed|api/(?:asset|public/video))/(?P<id>[0-9a-zA-Z]{12})'
+    _TESTS = [{
+        'url': 'https://video.arnes.si/watch/a1qrWTOQfVoU?t=10',
+        'md5': '4d0f4d0a03571b33e1efac25fd4a065d',
+        'info_dict': {
+            'id': 'a1qrWTOQfVoU',
+            'ext': 'mp4',
+            'title': 'Linearna neodvisnost, definicija',
+            'description': 'Linearna neodvisnost, definicija',
+            'license': 'PRIVATE',
+            'creator': 'Polona Oblak',
+            'timestamp': 1585063725,
+            'upload_date': '20200324',
+            'channel': 'Polona Oblak',
+            'channel_id': 'q6pc04hw24cj',
+            'channel_url': 'https://video.arnes.si/?channel=q6pc04hw24cj',
+            'duration': 596.75,
+            'view_count': int,
+            'tags': ['linearna_algebra'],
+            'start_time': 10,
+        }
+    }, {
+        'url': 'https://video.arnes.si/api/asset/s1YjnV7hadlC/play.mp4',
+        'only_matching': True,
+    }, {
+        'url': 'https://video.arnes.si/embed/s1YjnV7hadlC',
+        'only_matching': True,
+    }, {
+        'url': 'https://video.arnes.si/en/watch/s1YjnV7hadlC',
+        'only_matching': True,
+    }, {
+        'url': 'https://video.arnes.si/embed/s1YjnV7hadlC?t=123&hideRelated=1',
+        'only_matching': True,
+    }, {
+        'url': 'https://video.arnes.si/api/public/video/s1YjnV7hadlC',
+        'only_matching': True,
+    }]
+    _BASE_URL = 'https://video.arnes.si'
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        video = self._download_json(
+            self._BASE_URL + '/api/public/video/' + video_id, video_id)['data']
+        title = video['title']
+
+        formats = []
+        for media in (video.get('media') or []):
+            media_url = media.get('url')
+            if not media_url:
+                continue
+            formats.append({
+                'url': self._BASE_URL + media_url,
+                'format_id': remove_start(media.get('format'), 'FORMAT_'),
+                'format_note': media.get('formatTranslation'),
+                'width': int_or_none(media.get('width')),
+                'height': int_or_none(media.get('height')),
+            })
+        self._sort_formats(formats)
+
+        channel = video.get('channel') or {}
+        channel_id = channel.get('url')
+        thumbnail = video.get('thumbnailUrl')
+
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': formats,
+            'thumbnail': self._BASE_URL + thumbnail,
+            'description': video.get('description'),
+            'license': video.get('license'),
+            'creator': video.get('author'),
+            'timestamp': parse_iso8601(video.get('creationTime')),
+            'channel': channel.get('name'),
+            'channel_id': channel_id,
+            'channel_url': self._BASE_URL + '/?channel=' + channel_id if channel_id else None,
+            'duration': float_or_none(video.get('duration'), 1000),
+            'view_count': int_or_none(video.get('views')),
+            'tags': video.get('hashtags'),
+            'start_time': int_or_none(compat_parse_qs(
+                compat_urllib_parse_urlparse(url).query).get('t', [None])[0]),
+        }

+ 2 - 0
youtube_dl/extractor/awaan.py

@@ -48,6 +48,7 @@ class AWAANBaseIE(InfoExtractor):
             'duration': int_or_none(video_data.get('duration')),
             'duration': int_or_none(video_data.get('duration')),
             'timestamp': parse_iso8601(video_data.get('create_time'), ' '),
             'timestamp': parse_iso8601(video_data.get('create_time'), ' '),
             'is_live': is_live,
             'is_live': is_live,
+            'uploader_id': video_data.get('user_id'),
         }
         }
 
 
 
 
@@ -107,6 +108,7 @@ class AWAANLiveIE(AWAANBaseIE):
             'title': 're:Dubai Al Oula [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
             'title': 're:Dubai Al Oula [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
             'upload_date': '20150107',
             'upload_date': '20150107',
             'timestamp': 1420588800,
             'timestamp': 1420588800,
+            'uploader_id': '71',
         },
         },
         'params': {
         'params': {
             # m3u8 download
             # m3u8 download

+ 1 - 1
youtube_dl/extractor/azmedien.py

@@ -47,7 +47,7 @@ class AZMedienIE(InfoExtractor):
         'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1',
         'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1',
         'only_matching': True
         'only_matching': True
     }]
     }]
-    _API_TEMPL = 'https://www.%s/api/pub/gql/%s/NewsArticleTeaser/cb9f2f81ed22e9b47f4ca64ea3cc5a5d13e88d1d'
+    _API_TEMPL = 'https://www.%s/api/pub/gql/%s/NewsArticleTeaser/a4016f65fe62b81dc6664dd9f4910e4ab40383be'
     _PARTNER_ID = '1719221'
     _PARTNER_ID = '1719221'
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):

+ 37 - 0
youtube_dl/extractor/bandaichannel.py

@@ -0,0 +1,37 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .brightcove import BrightcoveNewIE
+from ..utils import extract_attributes
+
+
+class BandaiChannelIE(BrightcoveNewIE):
+    IE_NAME = 'bandaichannel'
+    _VALID_URL = r'https?://(?:www\.)?b-ch\.com/titles/(?P<id>\d+/\d+)'
+    _TESTS = [{
+        'url': 'https://www.b-ch.com/titles/514/001',
+        'md5': 'a0f2d787baa5729bed71108257f613a4',
+        'info_dict': {
+            'id': '6128044564001',
+            'ext': 'mp4',
+            'title': 'メタルファイターMIKU 第1話',
+            'timestamp': 1580354056,
+            'uploader_id': '5797077852001',
+            'upload_date': '20200130',
+            'duration': 1387.733,
+        },
+        'params': {
+            'format': 'bestvideo',
+            'skip_download': True,
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        attrs = extract_attributes(self._search_regex(
+            r'(<video-js[^>]+\bid="bcplayer"[^>]*>)', webpage, 'player'))
+        bc = self._download_json(
+            'https://pbifcd.b-ch.com/v1/playbackinfo/ST/70/' + attrs['data-info'],
+            video_id, headers={'X-API-KEY': attrs['data-auth'].strip()})['bc']
+        return self._parse_brightcove_metadata(bc, bc['id'])

+ 3 - 1
youtube_dl/extractor/bandcamp.py

@@ -49,6 +49,7 @@ class BandcampIE(InfoExtractor):
             'uploader': 'Ben Prunty',
             'uploader': 'Ben Prunty',
             'timestamp': 1396508491,
             'timestamp': 1396508491,
             'upload_date': '20140403',
             'upload_date': '20140403',
+            'release_timestamp': 1396483200,
             'release_date': '20140403',
             'release_date': '20140403',
             'duration': 260.877,
             'duration': 260.877,
             'track': 'Lanius (Battle)',
             'track': 'Lanius (Battle)',
@@ -69,6 +70,7 @@ class BandcampIE(InfoExtractor):
             'uploader': 'Mastodon',
             'uploader': 'Mastodon',
             'timestamp': 1322005399,
             'timestamp': 1322005399,
             'upload_date': '20111122',
             'upload_date': '20111122',
+            'release_timestamp': 1076112000,
             'release_date': '20040207',
             'release_date': '20040207',
             'duration': 120.79,
             'duration': 120.79,
             'track': 'Hail to Fire',
             'track': 'Hail to Fire',
@@ -197,7 +199,7 @@ class BandcampIE(InfoExtractor):
             'thumbnail': thumbnail,
             'thumbnail': thumbnail,
             'uploader': artist,
             'uploader': artist,
             'timestamp': timestamp,
             'timestamp': timestamp,
-            'release_date': unified_strdate(tralbum.get('album_release_date')),
+            'release_timestamp': unified_timestamp(tralbum.get('album_release_date')),
             'duration': duration,
             'duration': duration,
             'track': track,
             'track': track,
             'track_number': track_number,
             'track_number': track_number,

+ 273 - 24
youtube_dl/extractor/bbc.py

@@ -1,31 +1,39 @@
 # coding: utf-8
 # coding: utf-8
 from __future__ import unicode_literals
 from __future__ import unicode_literals
 
 
+import functools
 import itertools
 import itertools
+import json
 import re
 import re
 
 
 from .common import InfoExtractor
 from .common import InfoExtractor
+from ..compat import (
+    compat_etree_Element,
+    compat_HTTPError,
+    compat_parse_qs,
+    compat_str,
+    compat_urllib_parse_urlparse,
+    compat_urlparse,
+)
 from ..utils import (
 from ..utils import (
+    ExtractorError,
+    OnDemandPagedList,
     clean_html,
     clean_html,
     dict_get,
     dict_get,
-    ExtractorError,
     float_or_none,
     float_or_none,
     get_element_by_class,
     get_element_by_class,
     int_or_none,
     int_or_none,
     js_to_json,
     js_to_json,
     parse_duration,
     parse_duration,
     parse_iso8601,
     parse_iso8601,
+    strip_or_none,
     try_get,
     try_get,
     unescapeHTML,
     unescapeHTML,
+    unified_timestamp,
     url_or_none,
     url_or_none,
     urlencode_postdata,
     urlencode_postdata,
     urljoin,
     urljoin,
 )
 )
-from ..compat import (
-    compat_etree_Element,
-    compat_HTTPError,
-    compat_urlparse,
-)
 
 
 
 
 class BBCCoUkIE(InfoExtractor):
 class BBCCoUkIE(InfoExtractor):
@@ -756,8 +764,17 @@ class BBCIE(BBCCoUkIE):
         'only_matching': True,
         'only_matching': True,
     }, {
     }, {
         # custom redirection to www.bbc.com
         # custom redirection to www.bbc.com
+        # also, video with window.__INITIAL_DATA__
         'url': 'http://www.bbc.co.uk/news/science-environment-33661876',
         'url': 'http://www.bbc.co.uk/news/science-environment-33661876',
-        'only_matching': True,
+        'info_dict': {
+            'id': 'p02xzws1',
+            'ext': 'mp4',
+            'title': "Pluto may have 'nitrogen glaciers'",
+            'description': 'md5:6a95b593f528d7a5f2605221bc56912f',
+            'thumbnail': r're:https?://.+/.+\.jpg',
+            'timestamp': 1437785037,
+            'upload_date': '20150725',
+        },
     }, {
     }, {
         # single video article embedded with data-media-vpid
         # single video article embedded with data-media-vpid
         'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
         'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
@@ -793,11 +810,25 @@ class BBCIE(BBCCoUkIE):
             'description': 'Learn English words and phrases from this story',
             'description': 'Learn English words and phrases from this story',
         },
         },
         'add_ie': [BBCCoUkIE.ie_key()],
         'add_ie': [BBCCoUkIE.ie_key()],
+    }, {
+        # BBC Reel
+        'url': 'https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness',
+        'info_dict': {
+            'id': 'p07c6sb9',
+            'ext': 'mp4',
+            'title': 'How positive thinking is harming your happiness',
+            'alt_title': 'The downsides of positive thinking',
+            'description': 'md5:fad74b31da60d83b8265954ee42d85b4',
+            'duration': 235,
+            'thumbnail': r're:https?://.+/p07c9dsr.jpg',
+            'upload_date': '20190604',
+            'categories': ['Psychology'],
+        },
     }]
     }]
 
 
     @classmethod
     @classmethod
     def suitable(cls, url):
     def suitable(cls, url):
-        EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerPlaylistIE, BBCCoUkPlaylistIE)
+        EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerEpisodesIE, BBCCoUkIPlayerGroupIE, BBCCoUkPlaylistIE)
         return (False if any(ie.suitable(url) for ie in EXCLUDE_IE)
         return (False if any(ie.suitable(url) for ie in EXCLUDE_IE)
                 else super(BBCIE, cls).suitable(url))
                 else super(BBCIE, cls).suitable(url))
 
 
@@ -929,7 +960,7 @@ class BBCIE(BBCCoUkIE):
                                     else:
                                     else:
                                         entry['title'] = info['title']
                                         entry['title'] = info['title']
                                         entry['formats'].extend(info['formats'])
                                         entry['formats'].extend(info['formats'])
-                                except Exception as e:
+                                except ExtractorError as e:
                                     # Some playlist URL may fail with 500, at the same time
                                     # Some playlist URL may fail with 500, at the same time
                                     # the other one may work fine (e.g.
                                     # the other one may work fine (e.g.
                                     # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
                                     # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
@@ -980,6 +1011,37 @@ class BBCIE(BBCCoUkIE):
                 'subtitles': subtitles,
                 'subtitles': subtitles,
             }
             }
 
 
+        # bbc reel (e.g. https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness)
+        initial_data = self._parse_json(self._html_search_regex(
+            r'<script[^>]+id=(["\'])initial-data\1[^>]+data-json=(["\'])(?P<json>(?:(?!\2).)+)',
+            webpage, 'initial data', default='{}', group='json'), playlist_id, fatal=False)
+        if initial_data:
+            init_data = try_get(
+                initial_data, lambda x: x['initData']['items'][0], dict) or {}
+            smp_data = init_data.get('smpData') or {}
+            clip_data = try_get(smp_data, lambda x: x['items'][0], dict) or {}
+            version_id = clip_data.get('versionID')
+            if version_id:
+                title = smp_data['title']
+                formats, subtitles = self._download_media_selector(version_id)
+                self._sort_formats(formats)
+                image_url = smp_data.get('holdingImageURL')
+                display_date = init_data.get('displayDate')
+                topic_title = init_data.get('topicTitle')
+
+                return {
+                    'id': version_id,
+                    'title': title,
+                    'formats': formats,
+                    'alt_title': init_data.get('shortTitle'),
+                    'thumbnail': image_url.replace('$recipe', 'raw') if image_url else None,
+                    'description': smp_data.get('summary') or init_data.get('shortSummary'),
+                    'upload_date': display_date.replace('-', '') if display_date else None,
+                    'subtitles': subtitles,
+                    'duration': int_or_none(clip_data.get('duration')),
+                    'categories': [topic_title] if topic_title else None,
+                }
+
         # Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
         # Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
         # There are several setPayload calls may be present but the video
         # There are several setPayload calls may be present but the video
         # seems to be always related to the first one
         # seems to be always related to the first one
@@ -1041,7 +1103,7 @@ class BBCIE(BBCCoUkIE):
                 thumbnail = None
                 thumbnail = None
                 image_url = current_programme.get('image_url')
                 image_url = current_programme.get('image_url')
                 if image_url:
                 if image_url:
-                    thumbnail = image_url.replace('{recipe}', '1920x1920')
+                    thumbnail = image_url.replace('{recipe}', 'raw')
                 return {
                 return {
                     'id': programme_id,
                     'id': programme_id,
                     'title': title,
                     'title': title,
@@ -1114,12 +1176,29 @@ class BBCIE(BBCCoUkIE):
                         continue
                         continue
                     formats, subtitles = self._download_media_selector(item_id)
                     formats, subtitles = self._download_media_selector(item_id)
                     self._sort_formats(formats)
                     self._sort_formats(formats)
+                    item_desc = None
+                    blocks = try_get(media, lambda x: x['summary']['blocks'], list)
+                    if blocks:
+                        summary = []
+                        for block in blocks:
+                            text = try_get(block, lambda x: x['model']['text'], compat_str)
+                            if text:
+                                summary.append(text)
+                        if summary:
+                            item_desc = '\n\n'.join(summary)
+                    item_time = None
+                    for meta in try_get(media, lambda x: x['metadata']['items'], list) or []:
+                        if try_get(meta, lambda x: x['label']) == 'Published':
+                            item_time = unified_timestamp(meta.get('timestamp'))
+                            break
                     entries.append({
                     entries.append({
                         'id': item_id,
                         'id': item_id,
                         'title': item_title,
                         'title': item_title,
                         'thumbnail': item.get('holdingImageUrl'),
                         'thumbnail': item.get('holdingImageUrl'),
                         'formats': formats,
                         'formats': formats,
                         'subtitles': subtitles,
                         'subtitles': subtitles,
+                        'timestamp': item_time,
+                        'description': strip_or_none(item_desc),
                     })
                     })
             for resp in (initial_data.get('data') or {}).values():
             for resp in (initial_data.get('data') or {}).values():
                 name = resp.get('name')
                 name = resp.get('name')
@@ -1293,21 +1372,149 @@ class BBCCoUkPlaylistBaseIE(InfoExtractor):
             playlist_id, title, description)
             playlist_id, title, description)
 
 
 
 
-class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE):
-    IE_NAME = 'bbc.co.uk:iplayer:playlist'
-    _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/(?:episodes|group)/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
-    _URL_TEMPLATE = 'http://www.bbc.co.uk/iplayer/episode/%s'
-    _VIDEO_ID_TEMPLATE = r'data-ip-id=["\'](%s)'
+class BBCCoUkIPlayerPlaylistBaseIE(InfoExtractor):
+    _VALID_URL_TMPL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/%%s/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
+
+    @staticmethod
+    def _get_default(episode, key, default_key='default'):
+        return try_get(episode, lambda x: x[key][default_key])
+
+    def _get_description(self, data):
+        synopsis = data.get(self._DESCRIPTION_KEY) or {}
+        return dict_get(synopsis, ('large', 'medium', 'small'))
+
+    def _fetch_page(self, programme_id, per_page, series_id, page):
+        elements = self._get_elements(self._call_api(
+            programme_id, per_page, page + 1, series_id))
+        for element in elements:
+            episode = self._get_episode(element)
+            episode_id = episode.get('id')
+            if not episode_id:
+                continue
+            thumbnail = None
+            image = self._get_episode_image(episode)
+            if image:
+                thumbnail = image.replace('{recipe}', 'raw')
+            category = self._get_default(episode, 'labels', 'category')
+            yield {
+                '_type': 'url',
+                'id': episode_id,
+                'title': self._get_episode_field(episode, 'subtitle'),
+                'url': 'https://www.bbc.co.uk/iplayer/episode/' + episode_id,
+                'thumbnail': thumbnail,
+                'description': self._get_description(episode),
+                'categories': [category] if category else None,
+                'series': self._get_episode_field(episode, 'title'),
+                'ie_key': BBCCoUkIE.ie_key(),
+            }
+
+    def _real_extract(self, url):
+        pid = self._match_id(url)
+        qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+        series_id = qs.get('seriesId', [None])[0]
+        page = qs.get('page', [None])[0]
+        per_page = 36 if page else self._PAGE_SIZE
+        fetch_page = functools.partial(self._fetch_page, pid, per_page, series_id)
+        entries = fetch_page(int(page) - 1) if page else OnDemandPagedList(fetch_page, self._PAGE_SIZE)
+        playlist_data = self._get_playlist_data(self._call_api(pid, 1))
+        return self.playlist_result(
+            entries, pid, self._get_playlist_title(playlist_data),
+            self._get_description(playlist_data))
+
+
+class BBCCoUkIPlayerEpisodesIE(BBCCoUkIPlayerPlaylistBaseIE):
+    IE_NAME = 'bbc.co.uk:iplayer:episodes'
+    _VALID_URL = BBCCoUkIPlayerPlaylistBaseIE._VALID_URL_TMPL % 'episodes'
     _TESTS = [{
     _TESTS = [{
         'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v',
         'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v',
         'info_dict': {
         'info_dict': {
             'id': 'b05rcz9v',
             'id': 'b05rcz9v',
             'title': 'The Disappearance',
             'title': 'The Disappearance',
-            'description': 'French thriller serial about a missing teenager.',
+            'description': 'md5:58eb101aee3116bad4da05f91179c0cb',
+        },
+        'playlist_mincount': 8,
+    }, {
+        # all seasons
+        'url': 'https://www.bbc.co.uk/iplayer/episodes/b094m5t9/doctor-foster',
+        'info_dict': {
+            'id': 'b094m5t9',
+            'title': 'Doctor Foster',
+            'description': 'md5:5aa9195fad900e8e14b52acd765a9fd6',
+        },
+        'playlist_mincount': 10,
+    }, {
+        # explicit season
+        'url': 'https://www.bbc.co.uk/iplayer/episodes/b094m5t9/doctor-foster?seriesId=b094m6nv',
+        'info_dict': {
+            'id': 'b094m5t9',
+            'title': 'Doctor Foster',
+            'description': 'md5:5aa9195fad900e8e14b52acd765a9fd6',
         },
         },
-        'playlist_mincount': 6,
-        'skip': 'This programme is not currently available on BBC iPlayer',
+        'playlist_mincount': 5,
     }, {
     }, {
+        # all pages
+        'url': 'https://www.bbc.co.uk/iplayer/episodes/m0004c4v/beechgrove',
+        'info_dict': {
+            'id': 'm0004c4v',
+            'title': 'Beechgrove',
+            'description': 'Gardening show that celebrates Scottish horticulture and growing conditions.',
+        },
+        'playlist_mincount': 37,
+    }, {
+        # explicit page
+        'url': 'https://www.bbc.co.uk/iplayer/episodes/m0004c4v/beechgrove?page=2',
+        'info_dict': {
+            'id': 'm0004c4v',
+            'title': 'Beechgrove',
+            'description': 'Gardening show that celebrates Scottish horticulture and growing conditions.',
+        },
+        'playlist_mincount': 1,
+    }]
+    _PAGE_SIZE = 100
+    _DESCRIPTION_KEY = 'synopsis'
+
+    def _get_episode_image(self, episode):
+        return self._get_default(episode, 'image')
+
+    def _get_episode_field(self, episode, field):
+        return self._get_default(episode, field)
+
+    @staticmethod
+    def _get_elements(data):
+        return data['entities']['results']
+
+    @staticmethod
+    def _get_episode(element):
+        return element.get('episode') or {}
+
+    def _call_api(self, pid, per_page, page=1, series_id=None):
+        variables = {
+            'id': pid,
+            'page': page,
+            'perPage': per_page,
+        }
+        if series_id:
+            variables['sliceId'] = series_id
+        return self._download_json(
+            'https://graph.ibl.api.bbc.co.uk/', pid, headers={
+                'Content-Type': 'application/json'
+            }, data=json.dumps({
+                'id': '5692d93d5aac8d796a0305e895e61551',
+                'variables': variables,
+            }).encode('utf-8'))['data']['programme']
+
+    @staticmethod
+    def _get_playlist_data(data):
+        return data
+
+    def _get_playlist_title(self, data):
+        return self._get_default(data, 'title')
+
+
+class BBCCoUkIPlayerGroupIE(BBCCoUkIPlayerPlaylistBaseIE):
+    IE_NAME = 'bbc.co.uk:iplayer:group'
+    _VALID_URL = BBCCoUkIPlayerPlaylistBaseIE._VALID_URL_TMPL % 'group'
+    _TESTS = [{
         # Available for over a year unlike 30 days for most other programmes
         # Available for over a year unlike 30 days for most other programmes
         'url': 'http://www.bbc.co.uk/iplayer/group/p02tcc32',
         'url': 'http://www.bbc.co.uk/iplayer/group/p02tcc32',
         'info_dict': {
         'info_dict': {
@@ -1316,14 +1523,56 @@ class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE):
             'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7',
             'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7',
         },
         },
         'playlist_mincount': 10,
         'playlist_mincount': 10,
+    }, {
+        # all pages
+        'url': 'https://www.bbc.co.uk/iplayer/group/p081d7j7',
+        'info_dict': {
+            'id': 'p081d7j7',
+            'title': 'Music in Scotland',
+            'description': 'Perfomances in Scotland and programmes featuring Scottish acts.',
+        },
+        'playlist_mincount': 47,
+    }, {
+        # explicit page
+        'url': 'https://www.bbc.co.uk/iplayer/group/p081d7j7?page=2',
+        'info_dict': {
+            'id': 'p081d7j7',
+            'title': 'Music in Scotland',
+            'description': 'Perfomances in Scotland and programmes featuring Scottish acts.',
+        },
+        'playlist_mincount': 11,
     }]
     }]
+    _PAGE_SIZE = 200
+    _DESCRIPTION_KEY = 'synopses'
 
 
-    def _extract_title_and_description(self, webpage):
-        title = self._search_regex(r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
-        description = self._search_regex(
-            r'<p[^>]+class=(["\'])subtitle\1[^>]*>(?P<value>[^<]+)</p>',
-            webpage, 'description', fatal=False, group='value')
-        return title, description
+    def _get_episode_image(self, episode):
+        return self._get_default(episode, 'images', 'standard')
+
+    def _get_episode_field(self, episode, field):
+        return episode.get(field)
+
+    @staticmethod
+    def _get_elements(data):
+        return data['elements']
+
+    @staticmethod
+    def _get_episode(element):
+        return element
+
+    def _call_api(self, pid, per_page, page=1, series_id=None):
+        return self._download_json(
+            'http://ibl.api.bbc.co.uk/ibl/v1/groups/%s/episodes' % pid,
+            pid, query={
+                'page': page,
+                'per_page': per_page,
+            })['group_episodes']
+
+    @staticmethod
+    def _get_playlist_data(data):
+        return data['group']
+
+    def _get_playlist_title(self, data):
+        return data.get('title')
 
 
 
 
 class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE):
 class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE):

+ 2 - 1
youtube_dl/extractor/bilibili.py

@@ -156,6 +156,7 @@ class BiliBiliIE(InfoExtractor):
             cid = js['result']['cid']
             cid = js['result']['cid']
 
 
         headers = {
         headers = {
+            'Accept': 'application/json',
             'Referer': url
             'Referer': url
         }
         }
         headers.update(self.geo_verification_headers())
         headers.update(self.geo_verification_headers())
@@ -232,7 +233,7 @@ class BiliBiliIE(InfoExtractor):
             webpage)
             webpage)
         if uploader_mobj:
         if uploader_mobj:
             info.update({
             info.update({
-                'uploader': uploader_mobj.group('name'),
+                'uploader': uploader_mobj.group('name').strip(),
                 'uploader_id': uploader_mobj.group('id'),
                 'uploader_id': uploader_mobj.group('id'),
             })
             })
         if not info.get('uploader'):
         if not info.get('uploader'):

+ 8 - 2
youtube_dl/extractor/bleacherreport.py

@@ -90,13 +90,19 @@ class BleacherReportCMSIE(AMPIE):
     _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36}|\d{5})'
     _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36}|\d{5})'
     _TESTS = [{
     _TESTS = [{
         'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1&library=video-cms',
         'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1&library=video-cms',
-        'md5': '2e4b0a997f9228ffa31fada5c53d1ed1',
+        'md5': '670b2d73f48549da032861130488c681',
         'info_dict': {
         'info_dict': {
             'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
             'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': 'Cena vs. Rollins Would Expose the Heavyweight Division',
             'title': 'Cena vs. Rollins Would Expose the Heavyweight Division',
             'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e',
             'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e',
+            'upload_date': '20150723',
+            'timestamp': 1437679032,
+
         },
         },
+        'expected_warnings': [
+            'Unable to download f4m manifest'
+        ]
     }]
     }]
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):

+ 0 - 86
youtube_dl/extractor/blinkx.py

@@ -1,86 +0,0 @@
-from __future__ import unicode_literals
-
-import json
-
-from .common import InfoExtractor
-from ..utils import (
-    remove_start,
-    int_or_none,
-)
-
-
-class BlinkxIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)'
-    IE_NAME = 'blinkx'
-
-    _TEST = {
-        'url': 'http://www.blinkx.com/ce/Da0Gw3xc5ucpNduzLuDDlv4WC9PuI4fDi1-t6Y3LyfdY2SZS5Urbvn-UPJvrvbo8LTKTc67Wu2rPKSQDJyZeeORCR8bYkhs8lI7eqddznH2ofh5WEEdjYXnoRtj7ByQwt7atMErmXIeYKPsSDuMAAqJDlQZ-3Ff4HJVeH_s3Gh8oQ',
-        'md5': '337cf7a344663ec79bf93a526a2e06c7',
-        'info_dict': {
-            'id': 'Da0Gw3xc',
-            'ext': 'mp4',
-            'title': 'No Daily Show for John Oliver; HBO Show Renewed - IGN News',
-            'uploader': 'IGN News',
-            'upload_date': '20150217',
-            'timestamp': 1424215740,
-            'description': 'HBO has renewed Last Week Tonight With John Oliver for two more seasons.',
-            'duration': 47.743333,
-        },
-    }
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        display_id = video_id[:8]
-
-        api_url = ('https://apib4.blinkx.com/api.php?action=play_video&'
-                   + 'video=%s' % video_id)
-        data_json = self._download_webpage(api_url, display_id)
-        data = json.loads(data_json)['api']['results'][0]
-        duration = None
-        thumbnails = []
-        formats = []
-        for m in data['media']:
-            if m['type'] == 'jpg':
-                thumbnails.append({
-                    'url': m['link'],
-                    'width': int(m['w']),
-                    'height': int(m['h']),
-                })
-            elif m['type'] == 'original':
-                duration = float(m['d'])
-            elif m['type'] == 'youtube':
-                yt_id = m['link']
-                self.to_screen('Youtube video detected: %s' % yt_id)
-                return self.url_result(yt_id, 'Youtube', video_id=yt_id)
-            elif m['type'] in ('flv', 'mp4'):
-                vcodec = remove_start(m['vcodec'], 'ff')
-                acodec = remove_start(m['acodec'], 'ff')
-                vbr = int_or_none(m.get('vbr') or m.get('vbitrate'), 1000)
-                abr = int_or_none(m.get('abr') or m.get('abitrate'), 1000)
-                tbr = vbr + abr if vbr and abr else None
-                format_id = '%s-%sk-%s' % (vcodec, tbr, m['w'])
-                formats.append({
-                    'format_id': format_id,
-                    'url': m['link'],
-                    'vcodec': vcodec,
-                    'acodec': acodec,
-                    'abr': abr,
-                    'vbr': vbr,
-                    'tbr': tbr,
-                    'width': int_or_none(m.get('w')),
-                    'height': int_or_none(m.get('h')),
-                })
-
-        self._sort_formats(formats)
-
-        return {
-            'id': display_id,
-            'fullid': video_id,
-            'title': data['title'],
-            'formats': formats,
-            'uploader': data['channel_name'],
-            'timestamp': data['pubdate_epoch'],
-            'description': data.get('description'),
-            'thumbnails': thumbnails,
-            'duration': duration,
-        }

+ 10 - 4
youtube_dl/extractor/bravotv.py

@@ -12,7 +12,7 @@ from ..utils import (
 
 
 
 
 class BravoTVIE(AdobePassIE):
 class BravoTVIE(AdobePassIE):
-    _VALID_URL = r'https?://(?:www\.)?bravotv\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?P<req_id>bravotv|oxygen)\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
     _TESTS = [{
     _TESTS = [{
         'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is',
         'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is',
         'md5': 'e34684cfea2a96cd2ee1ef3a60909de9',
         'md5': 'e34684cfea2a96cd2ee1ef3a60909de9',
@@ -28,10 +28,13 @@ class BravoTVIE(AdobePassIE):
     }, {
     }, {
         'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
         'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
         'only_matching': True,
         'only_matching': True,
+    }, {
+        'url': 'https://www.oxygen.com/in-ice-cold-blood/season-2/episode-16/videos/handling-the-horwitz-house-after-the-murder-season-2',
+        'only_matching': True,
     }]
     }]
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
-        display_id = self._match_id(url)
+        site, display_id = re.match(self._VALID_URL, url).groups()
         webpage = self._download_webpage(url, display_id)
         webpage = self._download_webpage(url, display_id)
         settings = self._parse_json(self._search_regex(
         settings = self._parse_json(self._search_regex(
             r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>({.+?})</script>', webpage, 'drupal settings'),
             r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>({.+?})</script>', webpage, 'drupal settings'),
@@ -53,11 +56,14 @@ class BravoTVIE(AdobePassIE):
                 tp_path = release_pid = tve['release_pid']
                 tp_path = release_pid = tve['release_pid']
             if tve.get('entitlement') == 'auth':
             if tve.get('entitlement') == 'auth':
                 adobe_pass = settings.get('tve_adobe_auth', {})
                 adobe_pass = settings.get('tve_adobe_auth', {})
+                if site == 'bravotv':
+                    site = 'bravo'
                 resource = self._get_mvpd_resource(
                 resource = self._get_mvpd_resource(
-                    adobe_pass.get('adobePassResourceId', 'bravo'),
+                    adobe_pass.get('adobePassResourceId') or site,
                     tve['title'], release_pid, tve.get('rating'))
                     tve['title'], release_pid, tve.get('rating'))
                 query['auth'] = self._extract_mvpd_auth(
                 query['auth'] = self._extract_mvpd_auth(
-                    url, release_pid, adobe_pass.get('adobePassRequestorId', 'bravo'), resource)
+                    url, release_pid,
+                    adobe_pass.get('adobePassRequestorId') or site, resource)
         else:
         else:
             shared_playlist = settings['ls_playlist']
             shared_playlist = settings['ls_playlist']
             account_pid = shared_playlist['account_pid']
             account_pid = shared_playlist['account_pid']

+ 53 - 3
youtube_dl/extractor/canvas.py

@@ -7,19 +7,21 @@ from .common import InfoExtractor
 from .gigya import GigyaBaseIE
 from .gigya import GigyaBaseIE
 from ..compat import compat_HTTPError
 from ..compat import compat_HTTPError
 from ..utils import (
 from ..utils import (
-    extract_attributes,
     ExtractorError,
     ExtractorError,
-    strip_or_none,
+    clean_html,
+    extract_attributes,
     float_or_none,
     float_or_none,
+    get_element_by_class,
     int_or_none,
     int_or_none,
     merge_dicts,
     merge_dicts,
     str_or_none,
     str_or_none,
+    strip_or_none,
     url_or_none,
     url_or_none,
 )
 )
 
 
 
 
 class CanvasIE(InfoExtractor):
 class CanvasIE(InfoExtractor):
-    _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrt(?:video|nieuws)|sporza)/assets/(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrt(?:video|nieuws)|sporza|dako)/assets/(?P<id>[^/?#&]+)'
     _TESTS = [{
     _TESTS = [{
         'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
         'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
         'md5': '68993eda72ef62386a15ea2cf3c93107',
         'md5': '68993eda72ef62386a15ea2cf3c93107',
@@ -332,3 +334,51 @@ class VrtNUIE(GigyaBaseIE):
             'display_id': display_id,
             'display_id': display_id,
             'season_number': int_or_none(page.get('episode_season')),
             'season_number': int_or_none(page.get('episode_season')),
         })
         })
+
+
+class DagelijkseKostIE(InfoExtractor):
+    IE_DESC = 'dagelijksekost.een.be'
+    _VALID_URL = r'https?://dagelijksekost\.een\.be/gerechten/(?P<id>[^/?#&]+)'
+    _TEST = {
+        'url': 'https://dagelijksekost.een.be/gerechten/hachis-parmentier-met-witloof',
+        'md5': '30bfffc323009a3e5f689bef6efa2365',
+        'info_dict': {
+            'id': 'md-ast-27a4d1ff-7d7b-425e-b84f-a4d227f592fa',
+            'display_id': 'hachis-parmentier-met-witloof',
+            'ext': 'mp4',
+            'title': 'Hachis parmentier met witloof',
+            'description': 'md5:9960478392d87f63567b5b117688cdc5',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'duration': 283.02,
+        },
+        'expected_warnings': ['is not a supported codec'],
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+
+        title = strip_or_none(get_element_by_class(
+            'dish-metadata__title', webpage
+        ) or self._html_search_meta(
+            'twitter:title', webpage))
+
+        description = clean_html(get_element_by_class(
+            'dish-description', webpage)
+        ) or self._html_search_meta(
+            ('description', 'twitter:description', 'og:description'),
+            webpage)
+
+        video_id = self._html_search_regex(
+            r'data-url=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id',
+            group='id')
+
+        return {
+            '_type': 'url_transparent',
+            'url': 'https://mediazone.vrt.be/api/v1/dako/assets/%s' % video_id,
+            'ie_key': CanvasIE.ie_key(),
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'description': description,
+        }

+ 4 - 1
youtube_dl/extractor/cbs.py

@@ -27,7 +27,7 @@ class CBSBaseIE(ThePlatformFeedIE):
 
 
 
 
 class CBSIE(CBSBaseIE):
 class CBSIE(CBSBaseIE):
-    _VALID_URL = r'(?:cbs:|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P<id>[\w-]+)'
+    _VALID_URL = r'(?:cbs:|https?://(?:www\.)?(?:(?:cbs|paramountplus)\.com/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P<id>[\w-]+)'
 
 
     _TESTS = [{
     _TESTS = [{
         'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
         'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
@@ -52,6 +52,9 @@ class CBSIE(CBSBaseIE):
     }, {
     }, {
         'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/',
         'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/',
         'only_matching': True,
         'only_matching': True,
+    }, {
+        'url': 'https://www.paramountplus.com/shows/all-rise/video/QmR1WhNkh1a_IrdHZrbcRklm176X_rVc/all-rise-space/',
+        'only_matching': True,
     }]
     }]
 
 
     def _extract_video_info(self, content_id, site='cbs', mpx_acc=2198311517):
     def _extract_video_info(self, content_id, site='cbs', mpx_acc=2198311517):

+ 1 - 1
youtube_dl/extractor/cbsnews.py

@@ -26,7 +26,7 @@ class CBSNewsEmbedIE(CBSIE):
     def _real_extract(self, url):
     def _real_extract(self, url):
         item = self._parse_json(zlib.decompress(compat_b64decode(
         item = self._parse_json(zlib.decompress(compat_b64decode(
             compat_urllib_parse_unquote(self._match_id(url))),
             compat_urllib_parse_unquote(self._match_id(url))),
-            -zlib.MAX_WBITS), None)['video']['items'][0]
+            -zlib.MAX_WBITS).decode('utf-8'), None)['video']['items'][0]
         return self._extract_video_info(item['mpxRefId'], 'cbsnews')
         return self._extract_video_info(item['mpxRefId'], 'cbsnews')
 
 
 
 

+ 99 - 24
youtube_dl/extractor/cbssports.py

@@ -1,38 +1,113 @@
 from __future__ import unicode_literals
 from __future__ import unicode_literals
 
 
-from .cbs import CBSBaseIE
+import re
 
 
+# from .cbs import CBSBaseIE
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    try_get,
+)
 
 
-class CBSSportsIE(CBSBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?cbssports\.com/[^/]+/(?:video|news)/(?P<id>[^/?#&]+)'
 
 
+# class CBSSportsEmbedIE(CBSBaseIE):
+class CBSSportsEmbedIE(InfoExtractor):
+    IE_NAME = 'cbssports:embed'
+    _VALID_URL = r'''(?ix)https?://(?:(?:www\.)?cbs|embed\.247)sports\.com/player/embed.+?
+        (?:
+            ids%3D(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})|
+            pcid%3D(?P<pcid>\d+)
+        )'''
     _TESTS = [{
     _TESTS = [{
-        'url': 'https://www.cbssports.com/nba/video/donovan-mitchell-flashes-star-potential-in-game-2-victory-over-thunder/',
-        'info_dict': {
-            'id': '1214315075735',
-            'ext': 'mp4',
-            'title': 'Donovan Mitchell flashes star potential in Game 2 victory over Thunder',
-            'description': 'md5:df6f48622612c2d6bd2e295ddef58def',
-            'timestamp': 1524111457,
-            'upload_date': '20180419',
-            'uploader': 'CBSI-NEW',
-        },
-        'params': {
-            # m3u8 download
-            'skip_download': True,
-        }
+        'url': 'https://www.cbssports.com/player/embed/?args=player_id%3Db56c03a6-231a-4bbe-9c55-af3c8a8e9636%26ids%3Db56c03a6-231a-4bbe-9c55-af3c8a8e9636%26resizable%3D1%26autoplay%3Dtrue%26domain%3Dcbssports.com%26comp_ads_enabled%3Dfalse%26watchAndRead%3D0%26startTime%3D0%26env%3Dprod',
+        'only_matching': True,
     }, {
     }, {
-        'url': 'https://www.cbssports.com/nba/news/nba-playoffs-2018-watch-76ers-vs-heat-game-3-series-schedule-tv-channel-online-stream/',
+        'url': 'https://embed.247sports.com/player/embed/?args=%3fplayer_id%3d1827823171591%26channel%3dcollege-football-recruiting%26pcid%3d1827823171591%26width%3d640%26height%3d360%26autoplay%3dTrue%26comp_ads_enabled%3dFalse%26uvpc%3dhttps%253a%252f%252fwww.cbssports.com%252fapi%252fcontent%252fvideo%252fconfig%252f%253fcfg%253duvp_247sports_v4%2526partner%253d247%26uvpc_m%3dhttps%253a%252f%252fwww.cbssports.com%252fapi%252fcontent%252fvideo%252fconfig%252f%253fcfg%253duvp_247sports_m_v4%2526partner_m%253d247_mobile%26utag%3d247sportssite%26resizable%3dTrue',
         'only_matching': True,
         'only_matching': True,
     }]
     }]
 
 
-    def _extract_video_info(self, filter_query, video_id):
-        return self._extract_feed_info('dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id)
+    # def _extract_video_info(self, filter_query, video_id):
+    #     return self._extract_feed_info('dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id)
 
 
+    def _real_extract(self, url):
+        uuid, pcid = re.match(self._VALID_URL, url).groups()
+        query = {'id': uuid} if uuid else {'pcid': pcid}
+        video = self._download_json(
+            'https://www.cbssports.com/api/content/video/',
+            uuid or pcid, query=query)[0]
+        video_id = video['id']
+        title = video['title']
+        metadata = video.get('metaData') or {}
+        # return self._extract_video_info('byId=%d' % metadata['mpxOutletId'], video_id)
+        # return self._extract_video_info('byGuid=' + metadata['mpxRefId'], video_id)
+
+        formats = self._extract_m3u8_formats(
+            metadata['files'][0]['url'], video_id, 'mp4',
+            'm3u8_native', m3u8_id='hls', fatal=False)
+        self._sort_formats(formats)
+
+        image = video.get('image')
+        thumbnails = None
+        if image:
+            image_path = image.get('path')
+            if image_path:
+                thumbnails = [{
+                    'url': image_path,
+                    'width': int_or_none(image.get('width')),
+                    'height': int_or_none(image.get('height')),
+                    'filesize': int_or_none(image.get('size')),
+                }]
+
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': formats,
+            'thumbnails': thumbnails,
+            'description': video.get('description'),
+            'timestamp': int_or_none(try_get(video, lambda x: x['dateCreated']['epoch'])),
+            'duration': int_or_none(metadata.get('duration')),
+        }
+
+
+class CBSSportsBaseIE(InfoExtractor):
     def _real_extract(self, url):
     def _real_extract(self, url):
         display_id = self._match_id(url)
         display_id = self._match_id(url)
         webpage = self._download_webpage(url, display_id)
         webpage = self._download_webpage(url, display_id)
-        video_id = self._search_regex(
-            [r'(?:=|%26)pcid%3D(\d+)', r'embedVideo(?:Container)?_(\d+)'],
-            webpage, 'video id')
-        return self._extract_video_info('byId=%s' % video_id, video_id)
+        iframe_url = self._search_regex(
+            r'<iframe[^>]+(?:data-)?src="(https?://[^/]+/player/embed[^"]+)"',
+            webpage, 'embed url')
+        return self.url_result(iframe_url, CBSSportsEmbedIE.ie_key())
+
+
+class CBSSportsIE(CBSSportsBaseIE):
+    IE_NAME = 'cbssports'
+    _VALID_URL = r'https?://(?:www\.)?cbssports\.com/[^/]+/video/(?P<id>[^/?#&]+)'
+    _TESTS = [{
+        'url': 'https://www.cbssports.com/college-football/video/cover-3-stanford-spring-gleaning/',
+        'info_dict': {
+            'id': 'b56c03a6-231a-4bbe-9c55-af3c8a8e9636',
+            'ext': 'mp4',
+            'title': 'Cover 3: Stanford Spring Gleaning',
+            'description': 'The Cover 3 crew break down everything you need to know about the Stanford Cardinal this spring.',
+            'timestamp': 1617218398,
+            'upload_date': '20210331',
+            'duration': 502,
+        },
+    }]
+
+
+class TwentyFourSevenSportsIE(CBSSportsBaseIE):
+    IE_NAME = '247sports'
+    _VALID_URL = r'https?://(?:www\.)?247sports\.com/Video/(?:[^/?#&]+-)?(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://247sports.com/Video/2021-QB-Jake-Garcia-senior-highlights-through-five-games-10084854/',
+        'info_dict': {
+            'id': '4f1265cb-c3b5-44a8-bb1d-1914119a0ccc',
+            'ext': 'mp4',
+            'title': '2021 QB Jake Garcia senior highlights through five games',
+            'description': 'md5:8cb67ebed48e2e6adac1701e0ff6e45b',
+            'timestamp': 1607114223,
+            'upload_date': '20201204',
+            'duration': 208,
+        },
+    }]

+ 57 - 11
youtube_dl/extractor/ccma.py

@@ -1,15 +1,18 @@
 # coding: utf-8
 # coding: utf-8
 from __future__ import unicode_literals
 from __future__ import unicode_literals
 
 
+import calendar
+import datetime
 import re
 import re
 
 
 from .common import InfoExtractor
 from .common import InfoExtractor
 from ..utils import (
 from ..utils import (
     clean_html,
     clean_html,
+    extract_timezone,
     int_or_none,
     int_or_none,
     parse_duration,
     parse_duration,
-    parse_iso8601,
     parse_resolution,
     parse_resolution,
+    try_get,
     url_or_none,
     url_or_none,
 )
 )
 
 
@@ -24,8 +27,9 @@ class CCMAIE(InfoExtractor):
             'ext': 'mp4',
             'ext': 'mp4',
             'title': 'L\'espot de La Marató de TV3',
             'title': 'L\'espot de La Marató de TV3',
             'description': 'md5:f12987f320e2f6e988e9908e4fe97765',
             'description': 'md5:f12987f320e2f6e988e9908e4fe97765',
-            'timestamp': 1470918540,
-            'upload_date': '20160811',
+            'timestamp': 1478608140,
+            'upload_date': '20161108',
+            'age_limit': 0,
         }
         }
     }, {
     }, {
         'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/',
         'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/',
@@ -35,8 +39,24 @@ class CCMAIE(InfoExtractor):
             'ext': 'mp3',
             'ext': 'mp3',
             'title': 'El Consell de Savis analitza el derbi',
             'title': 'El Consell de Savis analitza el derbi',
             'description': 'md5:e2a3648145f3241cb9c6b4b624033e53',
             'description': 'md5:e2a3648145f3241cb9c6b4b624033e53',
-            'upload_date': '20171205',
-            'timestamp': 1512507300,
+            'upload_date': '20170512',
+            'timestamp': 1494622500,
+            'vcodec': 'none',
+            'categories': ['Esports'],
+        }
+    }, {
+        'url': 'http://www.ccma.cat/tv3/alacarta/crims/crims-josep-tallada-lespereu-me-capitol-1/video/6031387/',
+        'md5': 'b43c3d3486f430f3032b5b160d80cbc3',
+        'info_dict': {
+            'id': '6031387',
+            'ext': 'mp4',
+            'title': 'Crims - Josep Talleda, l\'"Espereu-me" (capítol 1)',
+            'description': 'md5:7cbdafb640da9d0d2c0f62bad1e74e60',
+            'timestamp': 1582577700,
+            'upload_date': '20200224',
+            'subtitles': 'mincount:4',
+            'age_limit': 16,
+            'series': 'Crims',
         }
         }
     }]
     }]
 
 
@@ -72,17 +92,28 @@ class CCMAIE(InfoExtractor):
 
 
         informacio = media['informacio']
         informacio = media['informacio']
         title = informacio['titol']
         title = informacio['titol']
-        durada = informacio.get('durada', {})
+        durada = informacio.get('durada') or {}
         duration = int_or_none(durada.get('milisegons'), 1000) or parse_duration(durada.get('text'))
         duration = int_or_none(durada.get('milisegons'), 1000) or parse_duration(durada.get('text'))
-        timestamp = parse_iso8601(informacio.get('data_emissio', {}).get('utc'))
+        tematica = try_get(informacio, lambda x: x['tematica']['text'])
+
+        timestamp = None
+        data_utc = try_get(informacio, lambda x: x['data_emissio']['utc'])
+        try:
+            timezone, data_utc = extract_timezone(data_utc)
+            timestamp = calendar.timegm((datetime.datetime.strptime(
+                data_utc, '%Y-%d-%mT%H:%M:%S') - timezone).timetuple())
+        except TypeError:
+            pass
 
 
         subtitles = {}
         subtitles = {}
-        subtitols = media.get('subtitols', {})
-        if subtitols:
-            sub_url = subtitols.get('url')
+        subtitols = media.get('subtitols') or []
+        if isinstance(subtitols, dict):
+            subtitols = [subtitols]
+        for st in subtitols:
+            sub_url = st.get('url')
             if sub_url:
             if sub_url:
                 subtitles.setdefault(
                 subtitles.setdefault(
-                    subtitols.get('iso') or subtitols.get('text') or 'ca', []).append({
+                    st.get('iso') or st.get('text') or 'ca', []).append({
                         'url': sub_url,
                         'url': sub_url,
                     })
                     })
 
 
@@ -97,6 +128,16 @@ class CCMAIE(InfoExtractor):
                     'height': int_or_none(imatges.get('alcada')),
                     'height': int_or_none(imatges.get('alcada')),
                 }]
                 }]
 
 
+        age_limit = None
+        codi_etic = try_get(informacio, lambda x: x['codi_etic']['id'])
+        if codi_etic:
+            codi_etic_s = codi_etic.split('_')
+            if len(codi_etic_s) == 2:
+                if codi_etic_s[1] == 'TP':
+                    age_limit = 0
+                else:
+                    age_limit = int_or_none(codi_etic_s[1])
+
         return {
         return {
             'id': media_id,
             'id': media_id,
             'title': title,
             'title': title,
@@ -106,4 +147,9 @@ class CCMAIE(InfoExtractor):
             'thumbnails': thumbnails,
             'thumbnails': thumbnails,
             'subtitles': subtitles,
             'subtitles': subtitles,
             'formats': formats,
             'formats': formats,
+            'age_limit': age_limit,
+            'alt_title': informacio.get('titol_complet'),
+            'episode_number': int_or_none(informacio.get('capitol')),
+            'categories': [tematica] if tematica else None,
+            'series': informacio.get('programa'),
         }
         }

+ 7 - 4
youtube_dl/extractor/cda.py

@@ -95,8 +95,11 @@ class CDAIE(InfoExtractor):
         if 'Ten film jest dostępny dla użytkowników premium' in webpage:
         if 'Ten film jest dostępny dla użytkowników premium' in webpage:
             raise ExtractorError('This video is only available for premium users.', expected=True)
             raise ExtractorError('This video is only available for premium users.', expected=True)
 
 
+        if re.search(r'niedostępn[ey] w(?:&nbsp;|\s+)Twoim kraju\s*<', webpage):
+            self.raise_geo_restricted()
+
         need_confirm_age = False
         need_confirm_age = False
-        if self._html_search_regex(r'(<form[^>]+action="/a/validatebirth")',
+        if self._html_search_regex(r'(<form[^>]+action="[^"]*/a/validatebirth[^"]*")',
                                    webpage, 'birthday validate form', default=None):
                                    webpage, 'birthday validate form', default=None):
             webpage = self._download_age_confirm_page(
             webpage = self._download_age_confirm_page(
                 url, video_id, note='Confirming age')
                 url, video_id, note='Confirming age')
@@ -130,6 +133,8 @@ class CDAIE(InfoExtractor):
             'age_limit': 18 if need_confirm_age else 0,
             'age_limit': 18 if need_confirm_age else 0,
         }
         }
 
 
+        info = self._search_json_ld(webpage, video_id, default={})
+
         # Source: https://www.cda.pl/js/player.js?t=1606154898
         # Source: https://www.cda.pl/js/player.js?t=1606154898
         def decrypt_file(a):
         def decrypt_file(a):
             for p in ('_XDDD', '_CDA', '_ADC', '_CXD', '_QWE', '_Q5', '_IKSDE'):
             for p in ('_XDDD', '_CDA', '_ADC', '_CXD', '_QWE', '_Q5', '_IKSDE'):
@@ -194,7 +199,7 @@ class CDAIE(InfoExtractor):
                 handler = self._download_webpage
                 handler = self._download_webpage
 
 
             webpage = handler(
             webpage = handler(
-                self._BASE_URL + href, video_id,
+                urljoin(self._BASE_URL, href), video_id,
                 'Downloading %s version information' % resolution, fatal=False)
                 'Downloading %s version information' % resolution, fatal=False)
             if not webpage:
             if not webpage:
                 # Manually report warning because empty page is returned when
                 # Manually report warning because empty page is returned when
@@ -206,6 +211,4 @@ class CDAIE(InfoExtractor):
 
 
         self._sort_formats(formats)
         self._sort_formats(formats)
 
 
-        info = self._search_json_ld(webpage, video_id, default={})
-
         return merge_dicts(info_dict, info)
         return merge_dicts(info_dict, info)

+ 26 - 117
youtube_dl/extractor/comedycentral.py

@@ -1,142 +1,51 @@
 from __future__ import unicode_literals
 from __future__ import unicode_literals
 
 
 from .mtv import MTVServicesInfoExtractor
 from .mtv import MTVServicesInfoExtractor
-from .common import InfoExtractor
 
 
 
 
 class ComedyCentralIE(MTVServicesInfoExtractor):
 class ComedyCentralIE(MTVServicesInfoExtractor):
-    _VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
-        (video-clips|episodes|cc-studios|video-collections|shows(?=/[^/]+/(?!full-episodes)))
-        /(?P<title>.*)'''
+    _VALID_URL = r'https?://(?:www\.)?cc\.com/(?:episodes|video(?:-clips)?)/(?P<id>[0-9a-z]{6})'
     _FEED_URL = 'http://comedycentral.com/feeds/mrss/'
     _FEED_URL = 'http://comedycentral.com/feeds/mrss/'
 
 
     _TESTS = [{
     _TESTS = [{
-        'url': 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
-        'md5': 'c4f48e9eda1b16dd10add0744344b6d8',
+        'url': 'http://www.cc.com/video-clips/5ke9v2/the-daily-show-with-trevor-noah-doc-rivers-and-steve-ballmer---the-nba-player-strike',
+        'md5': 'b8acb347177c680ff18a292aa2166f80',
         'info_dict': {
         'info_dict': {
-            'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354',
+            'id': '89ccc86e-1b02-4f83-b0c9-1d9592ecd025',
             'ext': 'mp4',
             'ext': 'mp4',
-            'title': 'CC:Stand-Up|August 18, 2013|1|0101|Uncensored - Too Good of a Mother',
-            'description': 'After a certain point, breastfeeding becomes c**kblocking.',
-            'timestamp': 1376798400,
-            'upload_date': '20130818',
+            'title': 'The Daily Show with Trevor Noah|August 28, 2020|25|25149|Doc Rivers and Steve Ballmer - The NBA Player Strike',
+            'description': 'md5:5334307c433892b85f4f5e5ac9ef7498',
+            'timestamp': 1598670000,
+            'upload_date': '20200829',
         },
         },
     }, {
     }, {
-        'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/interviews/6yx39d/exclusive-rand-paul-extended-interview',
+        'url': 'http://www.cc.com/episodes/pnzzci/drawn-together--american-idol--parody-clip-show-season-3-ep-314',
         'only_matching': True,
         'only_matching': True,
-    }]
-
-
-class ComedyCentralFullEpisodesIE(MTVServicesInfoExtractor):
-    _VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
-        (?:full-episodes|shows(?=/[^/]+/full-episodes))
-        /(?P<id>[^?]+)'''
-    _FEED_URL = 'http://comedycentral.com/feeds/mrss/'
-
-    _TESTS = [{
-        'url': 'http://www.cc.com/full-episodes/pv391a/the-daily-show-with-trevor-noah-november-28--2016---ryan-speedo-green-season-22-ep-22028',
-        'info_dict': {
-            'description': 'Donald Trump is accused of exploiting his president-elect status for personal gain, Cuban leader Fidel Castro dies, and Ryan Speedo Green discusses "Sing for Your Life."',
-            'title': 'November 28, 2016 - Ryan Speedo Green',
-        },
-        'playlist_count': 4,
     }, {
     }, {
-        'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
-        'only_matching': True,
-    }]
-
-    def _real_extract(self, url):
-        playlist_id = self._match_id(url)
-        webpage = self._download_webpage(url, playlist_id)
-        mgid = self._extract_triforce_mgid(webpage, data_zone='t2_lc_promo1')
-        videos_info = self._get_videos_info(mgid)
-        return videos_info
-
-
-class ToshIE(MTVServicesInfoExtractor):
-    IE_DESC = 'Tosh.0'
-    _VALID_URL = r'^https?://tosh\.cc\.com/video-(?:clips|collections)/[^/]+/(?P<videotitle>[^/?#]+)'
-    _FEED_URL = 'http://tosh.cc.com/feeds/mrss'
-
-    _TESTS = [{
-        'url': 'http://tosh.cc.com/video-clips/68g93d/twitter-users-share-summer-plans',
-        'info_dict': {
-            'description': 'Tosh asked fans to share their summer plans.',
-            'title': 'Twitter Users Share Summer Plans',
-        },
-        'playlist': [{
-            'md5': 'f269e88114c1805bb6d7653fecea9e06',
-            'info_dict': {
-                'id': '90498ec2-ed00-11e0-aca6-0026b9414f30',
-                'ext': 'mp4',
-                'title': 'Tosh.0|June 9, 2077|2|211|Twitter Users Share Summer Plans',
-                'description': 'Tosh asked fans to share their summer plans.',
-                'thumbnail': r're:^https?://.*\.jpg',
-                # It's really reported to be published on year 2077
-                'upload_date': '20770610',
-                'timestamp': 3390510600,
-                'subtitles': {
-                    'en': 'mincount:3',
-                },
-            },
-        }]
-    }, {
-        'url': 'http://tosh.cc.com/video-collections/x2iz7k/just-plain-foul/m5q4fp',
+        'url': 'https://www.cc.com/video/k3sdvm/the-daily-show-with-jon-stewart-exclusive-the-fourth-estate',
         'only_matching': True,
         'only_matching': True,
     }]
     }]
 
 
 
 
 class ComedyCentralTVIE(MTVServicesInfoExtractor):
 class ComedyCentralTVIE(MTVServicesInfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/(?:staffeln|shows)/(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/folgen/(?P<id>[0-9a-z]{6})'
     _TESTS = [{
     _TESTS = [{
-        'url': 'http://www.comedycentral.tv/staffeln/7436-the-mindy-project-staffel-4',
+        'url': 'https://www.comedycentral.tv/folgen/pxdpec/josh-investigates-klimawandel-staffel-1-ep-1',
         'info_dict': {
         'info_dict': {
-            'id': 'local_playlist-f99b626bdfe13568579a',
-            'ext': 'flv',
-            'title': 'Episode_the-mindy-project_shows_season-4_episode-3_full-episode_part1',
-        },
-        'params': {
-            # rtmp download
-            'skip_download': True,
+            'id': '15907dc3-ec3c-11e8-a442-0e40cf2fc285',
+            'ext': 'mp4',
+            'title': 'Josh Investigates',
+            'description': 'Steht uns das Ende der Welt bevor?',
         },
         },
-    }, {
-        'url': 'http://www.comedycentral.tv/shows/1074-workaholics',
-        'only_matching': True,
-    }, {
-        'url': 'http://www.comedycentral.tv/shows/1727-the-mindy-project/bonus',
-        'only_matching': True,
     }]
     }]
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, video_id)
-
-        mrss_url = self._search_regex(
-            r'data-mrss=(["\'])(?P<url>(?:(?!\1).)+)\1',
-            webpage, 'mrss url', group='url')
-
-        return self._get_videos_info_from_url(mrss_url, video_id)
-
-
-class ComedyCentralShortnameIE(InfoExtractor):
-    _VALID_URL = r'^:(?P<id>tds|thedailyshow|theopposition)$'
-    _TESTS = [{
-        'url': ':tds',
-        'only_matching': True,
-    }, {
-        'url': ':thedailyshow',
-        'only_matching': True,
-    }, {
-        'url': ':theopposition',
-        'only_matching': True,
-    }]
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        shortcut_map = {
-            'tds': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
-            'thedailyshow': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
-            'theopposition': 'http://www.cc.com/shows/the-opposition-with-jordan-klepper/full-episodes',
+    _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
+    _GEO_COUNTRIES = ['DE']
+
+    def _get_feed_query(self, uri):
+        return {
+            'accountOverride': 'intl.mtvi.com',
+            'arcEp': 'web.cc.tv',
+            'ep': 'b9032c3a',
+            'imageEp': 'web.cc.tv',
+            'mgid': uri,
         }
         }
-        return self.url_result(shortcut_map[video_id])

+ 16 - 18
youtube_dl/extractor/common.py

@@ -17,7 +17,7 @@ import math
 
 
 from ..compat import (
 from ..compat import (
     compat_cookiejar_Cookie,
     compat_cookiejar_Cookie,
-    compat_cookies,
+    compat_cookies_SimpleCookie,
     compat_etree_Element,
     compat_etree_Element,
     compat_etree_fromstring,
     compat_etree_fromstring,
     compat_getpass,
     compat_getpass,
@@ -230,8 +230,10 @@ class InfoExtractor(object):
     uploader:       Full name of the video uploader.
     uploader:       Full name of the video uploader.
     license:        License name the video is licensed under.
     license:        License name the video is licensed under.
     creator:        The creator of the video.
     creator:        The creator of the video.
+    release_timestamp: UNIX timestamp of the moment the video was released.
     release_date:   The date (YYYYMMDD) when the video was released.
     release_date:   The date (YYYYMMDD) when the video was released.
-    timestamp:      UNIX timestamp of the moment the video became available.
+    timestamp:      UNIX timestamp of the moment the video became available
+                    (uploaded).
     upload_date:    Video upload date (YYYYMMDD).
     upload_date:    Video upload date (YYYYMMDD).
                     If not explicitly set, calculated from timestamp.
                     If not explicitly set, calculated from timestamp.
     uploader_id:    Nickname or id of the video uploader.
     uploader_id:    Nickname or id of the video uploader.
@@ -1273,6 +1275,7 @@ class InfoExtractor(object):
 
 
         def extract_video_object(e):
         def extract_video_object(e):
             assert e['@type'] == 'VideoObject'
             assert e['@type'] == 'VideoObject'
+            author = e.get('author')
             info.update({
             info.update({
                 'url': url_or_none(e.get('contentUrl')),
                 'url': url_or_none(e.get('contentUrl')),
                 'title': unescapeHTML(e.get('name')),
                 'title': unescapeHTML(e.get('name')),
@@ -1280,7 +1283,11 @@ class InfoExtractor(object):
                 'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
                 'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
                 'duration': parse_duration(e.get('duration')),
                 'duration': parse_duration(e.get('duration')),
                 'timestamp': unified_timestamp(e.get('uploadDate')),
                 'timestamp': unified_timestamp(e.get('uploadDate')),
-                'uploader': str_or_none(e.get('author')),
+                # author can be an instance of 'Organization' or 'Person' types.
+                # both types can have 'name' property(inherited from 'Thing' type). [1]
+                # however some websites are using 'Text' type instead.
+                # 1. https://schema.org/VideoObject
+                'uploader': author.get('name') if isinstance(author, dict) else author if isinstance(author, compat_str) else None,
                 'filesize': float_or_none(e.get('contentSize')),
                 'filesize': float_or_none(e.get('contentSize')),
                 'tbr': int_or_none(e.get('bitrate')),
                 'tbr': int_or_none(e.get('bitrate')),
                 'width': int_or_none(e.get('width')),
                 'width': int_or_none(e.get('width')),
@@ -2064,7 +2071,7 @@ class InfoExtractor(object):
             })
             })
         return entries
         return entries
 
 
-    def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}, data=None, headers={}, query={}):
+    def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
         res = self._download_xml_handle(
         res = self._download_xml_handle(
             mpd_url, video_id,
             mpd_url, video_id,
             note=note or 'Downloading MPD manifest',
             note=note or 'Downloading MPD manifest',
@@ -2078,10 +2085,9 @@ class InfoExtractor(object):
         mpd_base_url = base_url(urlh.geturl())
         mpd_base_url = base_url(urlh.geturl())
 
 
         return self._parse_mpd_formats(
         return self._parse_mpd_formats(
-            mpd_doc, mpd_id=mpd_id, mpd_base_url=mpd_base_url,
-            formats_dict=formats_dict, mpd_url=mpd_url)
+            mpd_doc, mpd_id, mpd_base_url, mpd_url)
 
 
-    def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}, mpd_url=None):
+    def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
         """
         """
         Parse formats from MPD manifest.
         Parse formats from MPD manifest.
         References:
         References:
@@ -2359,15 +2365,7 @@ class InfoExtractor(object):
                         else:
                         else:
                             # Assuming direct URL to unfragmented media.
                             # Assuming direct URL to unfragmented media.
                             f['url'] = base_url
                             f['url'] = base_url
-
-                        # According to [1, 5.3.5.2, Table 7, page 35] @id of Representation
-                        # is not necessarily unique within a Period thus formats with
-                        # the same `format_id` are quite possible. There are numerous examples
-                        # of such manifests (see https://github.com/ytdl-org/youtube-dl/issues/15111,
-                        # https://github.com/ytdl-org/youtube-dl/issues/13919)
-                        full_info = formats_dict.get(representation_id, {}).copy()
-                        full_info.update(f)
-                        formats.append(full_info)
+                        formats.append(f)
                     else:
                     else:
                         self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
                         self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
         return formats
         return formats
@@ -2903,10 +2901,10 @@ class InfoExtractor(object):
         self._downloader.cookiejar.set_cookie(cookie)
         self._downloader.cookiejar.set_cookie(cookie)
 
 
     def _get_cookies(self, url):
     def _get_cookies(self, url):
-        """ Return a compat_cookies.SimpleCookie with the cookies for the url """
+        """ Return a compat_cookies_SimpleCookie with the cookies for the url """
         req = sanitized_Request(url)
         req = sanitized_Request(url)
         self._downloader.cookiejar.add_cookie_header(req)
         self._downloader.cookiejar.add_cookie_header(req)
-        return compat_cookies.SimpleCookie(req.get_header('Cookie'))
+        return compat_cookies_SimpleCookie(req.get_header('Cookie'))
 
 
     def _apply_first_set_cookie_header(self, url_handle, cookie):
     def _apply_first_set_cookie_header(self, url_handle, cookie):
         """
         """

+ 26 - 1
youtube_dl/extractor/cspan.py

@@ -8,11 +8,14 @@ from ..utils import (
     ExtractorError,
     ExtractorError,
     extract_attributes,
     extract_attributes,
     find_xpath_attr,
     find_xpath_attr,
+    get_element_by_attribute,
     get_element_by_class,
     get_element_by_class,
     int_or_none,
     int_or_none,
     js_to_json,
     js_to_json,
     merge_dicts,
     merge_dicts,
+    parse_iso8601,
     smuggle_url,
     smuggle_url,
+    str_to_int,
     unescapeHTML,
     unescapeHTML,
 )
 )
 from .senateisvp import SenateISVPIE
 from .senateisvp import SenateISVPIE
@@ -116,8 +119,30 @@ class CSpanIE(InfoExtractor):
                 jwsetup, video_id, require_title=False, m3u8_id='hls',
                 jwsetup, video_id, require_title=False, m3u8_id='hls',
                 base_url=url)
                 base_url=url)
             add_referer(info['formats'])
             add_referer(info['formats'])
+            for subtitles in info['subtitles'].values():
+                for subtitle in subtitles:
+                    ext = determine_ext(subtitle['url'])
+                    if ext == 'php':
+                        ext = 'vtt'
+                    subtitle['ext'] = ext
             ld_info = self._search_json_ld(webpage, video_id, default={})
             ld_info = self._search_json_ld(webpage, video_id, default={})
-            return merge_dicts(info, ld_info)
+            title = get_element_by_class('video-page-title', webpage) or \
+                self._og_search_title(webpage)
+            description = get_element_by_attribute('itemprop', 'description', webpage) or \
+                self._html_search_meta(['og:description', 'description'], webpage)
+            return merge_dicts(info, ld_info, {
+                'title': title,
+                'thumbnail': get_element_by_attribute('itemprop', 'thumbnailUrl', webpage),
+                'description': description,
+                'timestamp': parse_iso8601(get_element_by_attribute('itemprop', 'uploadDate', webpage)),
+                'location': get_element_by_attribute('itemprop', 'contentLocation', webpage),
+                'duration': int_or_none(self._search_regex(
+                    r'jwsetup\.seclength\s*=\s*(\d+);',
+                    webpage, 'duration', fatal=False)),
+                'view_count': str_to_int(self._search_regex(
+                    r"<span[^>]+class='views'[^>]*>([\d,]+)\s+Views</span>",
+                    webpage, 'views', fatal=False)),
+            })
 
 
         # Obsolete
         # Obsolete
         # We first look for clipid, because clipprog always appears before
         # We first look for clipid, because clipprog always appears before

+ 62 - 46
youtube_dl/extractor/curiositystream.py

@@ -25,12 +25,12 @@ class CuriosityStreamBaseIE(InfoExtractor):
             raise ExtractorError(
             raise ExtractorError(
                 '%s said: %s' % (self.IE_NAME, error), expected=True)
                 '%s said: %s' % (self.IE_NAME, error), expected=True)
 
 
-    def _call_api(self, path, video_id):
+    def _call_api(self, path, video_id, query=None):
         headers = {}
         headers = {}
         if self._auth_token:
         if self._auth_token:
             headers['X-Auth-Token'] = self._auth_token
             headers['X-Auth-Token'] = self._auth_token
         result = self._download_json(
         result = self._download_json(
-            self._API_BASE_URL + path, video_id, headers=headers)
+            self._API_BASE_URL + path, video_id, headers=headers, query=query)
         self._handle_errors(result)
         self._handle_errors(result)
         return result['data']
         return result['data']
 
 
@@ -52,62 +52,75 @@ class CuriosityStreamIE(CuriosityStreamBaseIE):
     _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/video/(?P<id>\d+)'
     _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/video/(?P<id>\d+)'
     _TEST = {
     _TEST = {
         'url': 'https://app.curiositystream.com/video/2',
         'url': 'https://app.curiositystream.com/video/2',
-        'md5': '262bb2f257ff301115f1973540de8983',
         'info_dict': {
         'info_dict': {
             'id': '2',
             'id': '2',
             'ext': 'mp4',
             'ext': 'mp4',
             'title': 'How Did You Develop The Internet?',
             'title': 'How Did You Develop The Internet?',
             'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
             'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
-        }
+        },
+        'params': {
+            'format': 'bestvideo',
+            # m3u8 download
+            'skip_download': True,
+        },
     }
     }
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
         video_id = self._match_id(url)
         video_id = self._match_id(url)
-        media = self._call_api('media/' + video_id, video_id)
-        title = media['title']
 
 
         formats = []
         formats = []
-        for encoding in media.get('encodings', []):
-            m3u8_url = encoding.get('master_playlist_url')
-            if m3u8_url:
-                formats.extend(self._extract_m3u8_formats(
-                    m3u8_url, video_id, 'mp4', 'm3u8_native',
-                    m3u8_id='hls', fatal=False))
-            encoding_url = encoding.get('url')
-            file_url = encoding.get('file_url')
-            if not encoding_url and not file_url:
-                continue
-            f = {
-                'width': int_or_none(encoding.get('width')),
-                'height': int_or_none(encoding.get('height')),
-                'vbr': int_or_none(encoding.get('video_bitrate')),
-                'abr': int_or_none(encoding.get('audio_bitrate')),
-                'filesize': int_or_none(encoding.get('size_in_bytes')),
-                'vcodec': encoding.get('video_codec'),
-                'acodec': encoding.get('audio_codec'),
-                'container': encoding.get('container_type'),
-            }
-            for f_url in (encoding_url, file_url):
-                if not f_url:
+        for encoding_format in ('m3u8', 'mpd'):
+            media = self._call_api('media/' + video_id, video_id, query={
+                'encodingsNew': 'true',
+                'encodingsFormat': encoding_format,
+            })
+            for encoding in media.get('encodings', []):
+                playlist_url = encoding.get('master_playlist_url')
+                if encoding_format == 'm3u8':
+                    # use `m3u8` entry_protocol until EXT-X-MAP is properly supported by `m3u8_native` entry_protocol
+                    formats.extend(self._extract_m3u8_formats(
+                        playlist_url, video_id, 'mp4',
+                        m3u8_id='hls', fatal=False))
+                elif encoding_format == 'mpd':
+                    formats.extend(self._extract_mpd_formats(
+                        playlist_url, video_id, mpd_id='dash', fatal=False))
+                encoding_url = encoding.get('url')
+                file_url = encoding.get('file_url')
+                if not encoding_url and not file_url:
                     continue
                     continue
-                fmt = f.copy()
-                rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', f_url)
-                if rtmp:
-                    fmt.update({
-                        'url': rtmp.group('url'),
-                        'play_path': rtmp.group('playpath'),
-                        'app': rtmp.group('app'),
-                        'ext': 'flv',
-                        'format_id': 'rtmp',
-                    })
-                else:
-                    fmt.update({
-                        'url': f_url,
-                        'format_id': 'http',
-                    })
-                formats.append(fmt)
+                f = {
+                    'width': int_or_none(encoding.get('width')),
+                    'height': int_or_none(encoding.get('height')),
+                    'vbr': int_or_none(encoding.get('video_bitrate')),
+                    'abr': int_or_none(encoding.get('audio_bitrate')),
+                    'filesize': int_or_none(encoding.get('size_in_bytes')),
+                    'vcodec': encoding.get('video_codec'),
+                    'acodec': encoding.get('audio_codec'),
+                    'container': encoding.get('container_type'),
+                }
+                for f_url in (encoding_url, file_url):
+                    if not f_url:
+                        continue
+                    fmt = f.copy()
+                    rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', f_url)
+                    if rtmp:
+                        fmt.update({
+                            'url': rtmp.group('url'),
+                            'play_path': rtmp.group('playpath'),
+                            'app': rtmp.group('app'),
+                            'ext': 'flv',
+                            'format_id': 'rtmp',
+                        })
+                    else:
+                        fmt.update({
+                            'url': f_url,
+                            'format_id': 'http',
+                        })
+                    formats.append(fmt)
         self._sort_formats(formats)
         self._sort_formats(formats)
 
 
+        title = media['title']
+
         subtitles = {}
         subtitles = {}
         for closed_caption in media.get('closed_captions', []):
         for closed_caption in media.get('closed_captions', []):
             sub_url = closed_caption.get('file')
             sub_url = closed_caption.get('file')
@@ -132,7 +145,7 @@ class CuriosityStreamIE(CuriosityStreamBaseIE):
 
 
 class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
 class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
     IE_NAME = 'curiositystream:collection'
     IE_NAME = 'curiositystream:collection'
-    _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:collection|series)/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:collections?|series)/(?P<id>\d+)'
     _TESTS = [{
     _TESTS = [{
         'url': 'https://app.curiositystream.com/collection/2',
         'url': 'https://app.curiositystream.com/collection/2',
         'info_dict': {
         'info_dict': {
@@ -140,10 +153,13 @@ class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
             'title': 'Curious Minds: The Internet',
             'title': 'Curious Minds: The Internet',
             'description': 'How is the internet shaping our lives in the 21st Century?',
             'description': 'How is the internet shaping our lives in the 21st Century?',
         },
         },
-        'playlist_mincount': 17,
+        'playlist_mincount': 16,
     }, {
     }, {
         'url': 'https://curiositystream.com/series/2',
         'url': 'https://curiositystream.com/series/2',
         'only_matching': True,
         'only_matching': True,
+    }, {
+        'url': 'https://curiositystream.com/collections/36',
+        'only_matching': True,
     }]
     }]
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):

+ 26 - 20
youtube_dl/extractor/dispeak.py

@@ -32,6 +32,18 @@ class DigitallySpeakingIE(InfoExtractor):
         # From http://www.gdcvault.com/play/1013700/Advanced-Material
         # From http://www.gdcvault.com/play/1013700/Advanced-Material
         'url': 'http://sevt.dispeak.com/ubm/gdc/eur10/xml/11256_1282118587281VNIT.xml',
         'url': 'http://sevt.dispeak.com/ubm/gdc/eur10/xml/11256_1282118587281VNIT.xml',
         'only_matching': True,
         'only_matching': True,
+    }, {
+        # From https://gdcvault.com/play/1016624, empty speakerVideo
+        'url': 'https://sevt.dispeak.com/ubm/gdc/online12/xml/201210-822101_1349794556671DDDD.xml',
+        'info_dict': {
+            'id': '201210-822101_1349794556671DDDD',
+            'ext': 'flv',
+            'title': 'Pre-launch - Preparing to Take the Plunge',
+        },
+    }, {
+        # From http://www.gdcvault.com/play/1014846/Conference-Keynote-Shigeru, empty slideVideo
+        'url': 'http://events.digitallyspeaking.com/gdc/project25/xml/p25-miyamoto1999_1282467389849HSVB.xml',
+        'only_matching': True,
     }]
     }]
 
 
     def _parse_mp4(self, metadata):
     def _parse_mp4(self, metadata):
@@ -84,26 +96,20 @@ class DigitallySpeakingIE(InfoExtractor):
                 'vcodec': 'none',
                 'vcodec': 'none',
                 'format_id': audio.get('code'),
                 'format_id': audio.get('code'),
             })
             })
-        slide_video_path = xpath_text(metadata, './slideVideo', fatal=True)
-        formats.append({
-            'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
-            'play_path': remove_end(slide_video_path, '.flv'),
-            'ext': 'flv',
-            'format_note': 'slide deck video',
-            'quality': -2,
-            'preference': -2,
-            'format_id': 'slides',
-        })
-        speaker_video_path = xpath_text(metadata, './speakerVideo', fatal=True)
-        formats.append({
-            'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
-            'play_path': remove_end(speaker_video_path, '.flv'),
-            'ext': 'flv',
-            'format_note': 'speaker video',
-            'quality': -1,
-            'preference': -1,
-            'format_id': 'speaker',
-        })
+        for video_key, format_id, preference in (
+                ('slide', 'slides', -2), ('speaker', 'speaker', -1)):
+            video_path = xpath_text(metadata, './%sVideo' % video_key)
+            if not video_path:
+                continue
+            formats.append({
+                'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
+                'play_path': remove_end(video_path, '.flv'),
+                'ext': 'flv',
+                'format_note': '%s video' % video_key,
+                'quality': preference,
+                'preference': preference,
+                'format_id': format_id,
+            })
         return formats
         return formats
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):

+ 129 - 30
youtube_dl/extractor/dplay.py

@@ -1,6 +1,7 @@
 # coding: utf-8
 # coding: utf-8
 from __future__ import unicode_literals
 from __future__ import unicode_literals
 
 
+import json
 import re
 import re
 
 
 from .common import InfoExtractor
 from .common import InfoExtractor
@@ -10,11 +11,13 @@ from ..utils import (
     ExtractorError,
     ExtractorError,
     float_or_none,
     float_or_none,
     int_or_none,
     int_or_none,
+    strip_or_none,
     unified_timestamp,
     unified_timestamp,
 )
 )
 
 
 
 
 class DPlayIE(InfoExtractor):
 class DPlayIE(InfoExtractor):
+    _PATH_REGEX = r'/(?P<id>[^/]+/[^/?#]+)'
     _VALID_URL = r'''(?x)https?://
     _VALID_URL = r'''(?x)https?://
         (?P<domain>
         (?P<domain>
             (?:www\.)?(?P<host>d
             (?:www\.)?(?P<host>d
@@ -24,7 +27,7 @@ class DPlayIE(InfoExtractor):
                 )
                 )
             )|
             )|
             (?P<subdomain_country>es|it)\.dplay\.com
             (?P<subdomain_country>es|it)\.dplay\.com
-        )/[^/]+/(?P<id>[^/]+/[^/?#]+)'''
+        )/[^/]+''' + _PATH_REGEX
 
 
     _TESTS = [{
     _TESTS = [{
         # non geo restricted, via secure api, unsigned download hls URL
         # non geo restricted, via secure api, unsigned download hls URL
@@ -151,56 +154,79 @@ class DPlayIE(InfoExtractor):
         'only_matching': True,
         'only_matching': True,
     }]
     }]
 
 
+    def _process_errors(self, e, geo_countries):
+        info = self._parse_json(e.cause.read().decode('utf-8'), None)
+        error = info['errors'][0]
+        error_code = error.get('code')
+        if error_code == 'access.denied.geoblocked':
+            self.raise_geo_restricted(countries=geo_countries)
+        elif error_code in ('access.denied.missingpackage', 'invalid.token'):
+            raise ExtractorError(
+                'This video is only available for registered users. You may want to use --cookies.', expected=True)
+        raise ExtractorError(info['errors'][0]['detail'], expected=True)
+
+    def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
+        headers['Authorization'] = 'Bearer ' + self._download_json(
+            disco_base + 'token', display_id, 'Downloading token',
+            query={
+                'realm': realm,
+            })['data']['attributes']['token']
+
+    def _download_video_playback_info(self, disco_base, video_id, headers):
+        streaming = self._download_json(
+            disco_base + 'playback/videoPlaybackInfo/' + video_id,
+            video_id, headers=headers)['data']['attributes']['streaming']
+        streaming_list = []
+        for format_id, format_dict in streaming.items():
+            streaming_list.append({
+                'type': format_id,
+                'url': format_dict.get('url'),
+            })
+        return streaming_list
+
     def _get_disco_api_info(self, url, display_id, disco_host, realm, country):
     def _get_disco_api_info(self, url, display_id, disco_host, realm, country):
         geo_countries = [country.upper()]
         geo_countries = [country.upper()]
         self._initialize_geo_bypass({
         self._initialize_geo_bypass({
             'countries': geo_countries,
             'countries': geo_countries,
         })
         })
         disco_base = 'https://%s/' % disco_host
         disco_base = 'https://%s/' % disco_host
-        token = self._download_json(
-            disco_base + 'token', display_id, 'Downloading token',
-            query={
-                'realm': realm,
-            })['data']['attributes']['token']
         headers = {
         headers = {
             'Referer': url,
             'Referer': url,
-            'Authorization': 'Bearer ' + token,
         }
         }
-        video = self._download_json(
-            disco_base + 'content/videos/' + display_id, display_id,
-            headers=headers, query={
-                'fields[channel]': 'name',
-                'fields[image]': 'height,src,width',
-                'fields[show]': 'name',
-                'fields[tag]': 'name',
-                'fields[video]': 'description,episodeNumber,name,publishStart,seasonNumber,videoDuration',
-                'include': 'images,primaryChannel,show,tags'
-            })
+        self._update_disco_api_headers(headers, disco_base, display_id, realm)
+        try:
+            video = self._download_json(
+                disco_base + 'content/videos/' + display_id, display_id,
+                headers=headers, query={
+                    'fields[channel]': 'name',
+                    'fields[image]': 'height,src,width',
+                    'fields[show]': 'name',
+                    'fields[tag]': 'name',
+                    'fields[video]': 'description,episodeNumber,name,publishStart,seasonNumber,videoDuration',
+                    'include': 'images,primaryChannel,show,tags'
+                })
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
+                self._process_errors(e, geo_countries)
+            raise
         video_id = video['data']['id']
         video_id = video['data']['id']
         info = video['data']['attributes']
         info = video['data']['attributes']
         title = info['name'].strip()
         title = info['name'].strip()
         formats = []
         formats = []
         try:
         try:
-            streaming = self._download_json(
-                disco_base + 'playback/videoPlaybackInfo/' + video_id,
-                display_id, headers=headers)['data']['attributes']['streaming']
+            streaming = self._download_video_playback_info(
+                disco_base, video_id, headers)
         except ExtractorError as e:
         except ExtractorError as e:
             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
-                info = self._parse_json(e.cause.read().decode('utf-8'), display_id)
-                error = info['errors'][0]
-                error_code = error.get('code')
-                if error_code == 'access.denied.geoblocked':
-                    self.raise_geo_restricted(countries=geo_countries)
-                elif error_code == 'access.denied.missingpackage':
-                    self.raise_login_required()
-                raise ExtractorError(info['errors'][0]['detail'], expected=True)
+                self._process_errors(e, geo_countries)
             raise
             raise
-        for format_id, format_dict in streaming.items():
+        for format_dict in streaming:
             if not isinstance(format_dict, dict):
             if not isinstance(format_dict, dict):
                 continue
                 continue
             format_url = format_dict.get('url')
             format_url = format_dict.get('url')
             if not format_url:
             if not format_url:
                 continue
                 continue
+            format_id = format_dict.get('type')
             ext = determine_ext(format_url)
             ext = determine_ext(format_url)
             if format_id == 'dash' or ext == 'mpd':
             if format_id == 'dash' or ext == 'mpd':
                 formats.extend(self._extract_mpd_formats(
                 formats.extend(self._extract_mpd_formats(
@@ -248,7 +274,7 @@ class DPlayIE(InfoExtractor):
             'id': video_id,
             'id': video_id,
             'display_id': display_id,
             'display_id': display_id,
             'title': title,
             'title': title,
-            'description': info.get('description'),
+            'description': strip_or_none(info.get('description')),
             'duration': float_or_none(info.get('videoDuration'), 1000),
             'duration': float_or_none(info.get('videoDuration'), 1000),
             'timestamp': unified_timestamp(info.get('publishStart')),
             'timestamp': unified_timestamp(info.get('publishStart')),
             'series': series,
             'series': series,
@@ -268,3 +294,76 @@ class DPlayIE(InfoExtractor):
         host = 'disco-api.' + domain if domain[0] == 'd' else 'eu2-prod.disco-api.com'
         host = 'disco-api.' + domain if domain[0] == 'd' else 'eu2-prod.disco-api.com'
         return self._get_disco_api_info(
         return self._get_disco_api_info(
             url, display_id, host, 'dplay' + country, country)
             url, display_id, host, 'dplay' + country, country)
+
+
+class DiscoveryPlusIE(DPlayIE):
+    _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/video' + DPlayIE._PATH_REGEX
+    _TESTS = [{
+        'url': 'https://www.discoveryplus.com/video/property-brothers-forever-home/food-and-family',
+        'info_dict': {
+            'id': '1140794',
+            'display_id': 'property-brothers-forever-home/food-and-family',
+            'ext': 'mp4',
+            'title': 'Food and Family',
+            'description': 'The brothers help a Richmond family expand their single-level home.',
+            'duration': 2583.113,
+            'timestamp': 1609304400,
+            'upload_date': '20201230',
+            'creator': 'HGTV',
+            'series': 'Property Brothers: Forever Home',
+            'season_number': 1,
+            'episode_number': 1,
+        },
+        'skip': 'Available for Premium users',
+    }]
+
+    def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
+        headers['x-disco-client'] = 'WEB:UNKNOWN:dplus_us:15.0.0'
+
+    def _download_video_playback_info(self, disco_base, video_id, headers):
+        return self._download_json(
+            disco_base + 'playback/v3/videoPlaybackInfo',
+            video_id, headers=headers, data=json.dumps({
+                'deviceInfo': {
+                    'adBlocker': False,
+                },
+                'videoId': video_id,
+                'wisteriaProperties': {
+                    'platform': 'desktop',
+                    'product': 'dplus_us',
+                },
+            }).encode('utf-8'))['data']['attributes']['streaming']
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        return self._get_disco_api_info(
+            url, display_id, 'us1-prod-direct.discoveryplus.com', 'go', 'us')
+
+
+class HGTVDeIE(DPlayIE):
+    _VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayIE._PATH_REGEX
+    _TESTS = [{
+        'url': 'https://de.hgtv.com/sendungen/tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette/',
+        'info_dict': {
+            'id': '151205',
+            'display_id': 'tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette',
+            'ext': 'mp4',
+            'title': 'Wer braucht schon eine Toilette',
+            'description': 'md5:05b40a27e7aed2c9172de34d459134e2',
+            'duration': 1177.024,
+            'timestamp': 1595705400,
+            'upload_date': '20200725',
+            'creator': 'HGTV',
+            'series': 'Tiny House - klein, aber oho',
+            'season_number': 3,
+            'episode_number': 3,
+        },
+        'params': {
+            'format': 'bestvideo',
+        },
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        return self._get_disco_api_info(
+            url, display_id, 'eu1-prod.disco-api.com', 'hgtv', 'de')

+ 35 - 185
youtube_dl/extractor/dreisat.py

@@ -1,193 +1,43 @@
 from __future__ import unicode_literals
 from __future__ import unicode_literals
 
 
-import re
+from .zdf import ZDFIE
 
 
-from .common import InfoExtractor
-from ..utils import (
-    int_or_none,
-    unified_strdate,
-    xpath_text,
-    determine_ext,
-    float_or_none,
-    ExtractorError,
-)
 
 
-
-class DreiSatIE(InfoExtractor):
+class DreiSatIE(ZDFIE):
     IE_NAME = '3sat'
     IE_NAME = '3sat'
-    _GEO_COUNTRIES = ['DE']
-    _VALID_URL = r'https?://(?:www\.)?3sat\.de/mediathek/(?:(?:index|mediathek)\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)'
-    _TESTS = [
-        {
-            'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918',
-            'md5': 'be37228896d30a88f315b638900a026e',
-            'info_dict': {
-                'id': '45918',
-                'ext': 'mp4',
-                'title': 'Waidmannsheil',
-                'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
-                'uploader': 'SCHWEIZWEIT',
-                'uploader_id': '100000210',
-                'upload_date': '20140913'
-            },
-            'params': {
-                'skip_download': True,  # m3u8 downloads
-            }
+    _VALID_URL = r'https?://(?:www\.)?3sat\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)\.html'
+    _TESTS = [{
+        # Same as https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html
+        'url': 'https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html',
+        'md5': '0aff3e7bc72c8813f5e0fae333316a1d',
+        'info_dict': {
+            'id': '141007_ab18_10wochensommer_film',
+            'ext': 'mp4',
+            'title': 'Ab 18! - 10 Wochen Sommer',
+            'description': 'md5:8253f41dc99ce2c3ff892dac2d65fe26',
+            'duration': 2660,
+            'timestamp': 1608604200,
+            'upload_date': '20201222',
         },
         },
-        {
-            'url': 'http://www.3sat.de/mediathek/mediathek.php?mode=play&obj=51066',
-            'only_matching': True,
+    }, {
+        'url': 'https://www.3sat.de/gesellschaft/schweizweit/waidmannsheil-100.html',
+        'info_dict': {
+            'id': '140913_sendung_schweizweit',
+            'ext': 'mp4',
+            'title': 'Waidmannsheil',
+            'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
+            'timestamp': 1410623100,
+            'upload_date': '20140913'
         },
         },
-    ]
-
-    def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
-        param_groups = {}
-        for param_group in smil.findall(self._xpath_ns('./head/paramGroup', namespace)):
-            group_id = param_group.get(self._xpath_ns(
-                'id', 'http://www.w3.org/XML/1998/namespace'))
-            params = {}
-            for param in param_group:
-                params[param.get('name')] = param.get('value')
-            param_groups[group_id] = params
-
-        formats = []
-        for video in smil.findall(self._xpath_ns('.//video', namespace)):
-            src = video.get('src')
-            if not src:
-                continue
-            bitrate = int_or_none(self._search_regex(r'_(\d+)k', src, 'bitrate', None)) or float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
-            group_id = video.get('paramGroup')
-            param_group = param_groups[group_id]
-            for proto in param_group['protocols'].split(','):
-                formats.append({
-                    'url': '%s://%s' % (proto, param_group['host']),
-                    'app': param_group['app'],
-                    'play_path': src,
-                    'ext': 'flv',
-                    'format_id': '%s-%d' % (proto, bitrate),
-                    'tbr': bitrate,
-                })
-        self._sort_formats(formats)
-        return formats
-
-    def extract_from_xml_url(self, video_id, xml_url):
-        doc = self._download_xml(
-            xml_url, video_id,
-            note='Downloading video info',
-            errnote='Failed to download video info')
-
-        status_code = xpath_text(doc, './status/statuscode')
-        if status_code and status_code != 'ok':
-            if status_code == 'notVisibleAnymore':
-                message = 'Video %s is not available' % video_id
-            else:
-                message = '%s returned error: %s' % (self.IE_NAME, status_code)
-            raise ExtractorError(message, expected=True)
-
-        title = xpath_text(doc, './/information/title', 'title', True)
-
-        urls = []
-        formats = []
-        for fnode in doc.findall('.//formitaeten/formitaet'):
-            video_url = xpath_text(fnode, 'url')
-            if not video_url or video_url in urls:
-                continue
-            urls.append(video_url)
-
-            is_available = 'http://www.metafilegenerator' not in video_url
-            geoloced = 'static_geoloced_online' in video_url
-            if not is_available or geoloced:
-                continue
-
-            format_id = fnode.attrib['basetype']
-            format_m = re.match(r'''(?x)
-                (?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_
-                (?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+)
-            ''', format_id)
-
-            ext = determine_ext(video_url, None) or format_m.group('container')
-
-            if ext == 'meta':
-                continue
-            elif ext == 'smil':
-                formats.extend(self._extract_smil_formats(
-                    video_url, video_id, fatal=False))
-            elif ext == 'm3u8':
-                # the certificates are misconfigured (see
-                # https://github.com/ytdl-org/youtube-dl/issues/8665)
-                if video_url.startswith('https://'):
-                    continue
-                formats.extend(self._extract_m3u8_formats(
-                    video_url, video_id, 'mp4', 'm3u8_native',
-                    m3u8_id=format_id, fatal=False))
-            elif ext == 'f4m':
-                formats.extend(self._extract_f4m_formats(
-                    video_url, video_id, f4m_id=format_id, fatal=False))
-            else:
-                quality = xpath_text(fnode, './quality')
-                if quality:
-                    format_id += '-' + quality
-
-                abr = int_or_none(xpath_text(fnode, './audioBitrate'), 1000)
-                vbr = int_or_none(xpath_text(fnode, './videoBitrate'), 1000)
-
-                tbr = int_or_none(self._search_regex(
-                    r'_(\d+)k', video_url, 'bitrate', None))
-                if tbr and vbr and not abr:
-                    abr = tbr - vbr
-
-                formats.append({
-                    'format_id': format_id,
-                    'url': video_url,
-                    'ext': ext,
-                    'acodec': format_m.group('acodec'),
-                    'vcodec': format_m.group('vcodec'),
-                    'abr': abr,
-                    'vbr': vbr,
-                    'tbr': tbr,
-                    'width': int_or_none(xpath_text(fnode, './width')),
-                    'height': int_or_none(xpath_text(fnode, './height')),
-                    'filesize': int_or_none(xpath_text(fnode, './filesize')),
-                    'protocol': format_m.group('proto').lower(),
-                })
-
-        geolocation = xpath_text(doc, './/details/geolocation')
-        if not formats and geolocation and geolocation != 'none':
-            self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
-
-        self._sort_formats(formats)
-
-        thumbnails = []
-        for node in doc.findall('.//teaserimages/teaserimage'):
-            thumbnail_url = node.text
-            if not thumbnail_url:
-                continue
-            thumbnail = {
-                'url': thumbnail_url,
-            }
-            thumbnail_key = node.get('key')
-            if thumbnail_key:
-                m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key)
-                if m:
-                    thumbnail['width'] = int(m.group(1))
-                    thumbnail['height'] = int(m.group(2))
-            thumbnails.append(thumbnail)
-
-        upload_date = unified_strdate(xpath_text(doc, './/details/airtime'))
-
-        return {
-            'id': video_id,
-            'title': title,
-            'description': xpath_text(doc, './/information/detail'),
-            'duration': int_or_none(xpath_text(doc, './/details/lengthSec')),
-            'thumbnails': thumbnails,
-            'uploader': xpath_text(doc, './/details/originChannelTitle'),
-            'uploader_id': xpath_text(doc, './/details/originChannelId'),
-            'upload_date': upload_date,
-            'formats': formats,
+        'params': {
+            'skip_download': True,
         }
         }
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?id=%s' % video_id
-        return self.extract_from_xml_url(video_id, details_url)
+    }, {
+        # Same as https://www.zdf.de/filme/filme-sonstige/der-hauptmann-112.html
+        'url': 'https://www.3sat.de/film/spielfilm/der-hauptmann-100.html',
+        'only_matching': True,
+    }, {
+        # Same as https://www.zdf.de/wissen/nano/nano-21-mai-2019-102.html, equal media ids
+        'url': 'https://www.3sat.de/wissen/nano/nano-21-mai-2019-102.html',
+        'only_matching': True,
+    }]

+ 28 - 17
youtube_dl/extractor/egghead.py

@@ -12,26 +12,35 @@ from ..utils import (
 )
 )
 
 
 
 
-class EggheadCourseIE(InfoExtractor):
+class EggheadBaseIE(InfoExtractor):
+    def _call_api(self, path, video_id, resource, fatal=True):
+        return self._download_json(
+            'https://app.egghead.io/api/v1/' + path,
+            video_id, 'Downloading %s JSON' % resource, fatal=fatal)
+
+
+class EggheadCourseIE(EggheadBaseIE):
     IE_DESC = 'egghead.io course'
     IE_DESC = 'egghead.io course'
     IE_NAME = 'egghead:course'
     IE_NAME = 'egghead:course'
-    _VALID_URL = r'https://egghead\.io/courses/(?P<id>[^/?#&]+)'
-    _TEST = {
+    _VALID_URL = r'https://(?:app\.)?egghead\.io/(?:course|playlist)s/(?P<id>[^/?#&]+)'
+    _TESTS = [{
         'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript',
         'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript',
         'playlist_count': 29,
         'playlist_count': 29,
         'info_dict': {
         'info_dict': {
-            'id': '72',
+            'id': '432655',
             'title': 'Professor Frisby Introduces Composable Functional JavaScript',
             'title': 'Professor Frisby Introduces Composable Functional JavaScript',
             'description': 're:(?s)^This course teaches the ubiquitous.*You\'ll start composing functionality before you know it.$',
             'description': 're:(?s)^This course teaches the ubiquitous.*You\'ll start composing functionality before you know it.$',
         },
         },
-    }
+    }, {
+        'url': 'https://app.egghead.io/playlists/professor-frisby-introduces-composable-functional-javascript',
+        'only_matching': True,
+    }]
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
         playlist_id = self._match_id(url)
         playlist_id = self._match_id(url)
-
-        lessons = self._download_json(
-            'https://egghead.io/api/v1/series/%s/lessons' % playlist_id,
-            playlist_id, 'Downloading course lessons JSON')
+        series_path = 'series/' + playlist_id
+        lessons = self._call_api(
+            series_path + '/lessons', playlist_id, 'course lessons')
 
 
         entries = []
         entries = []
         for lesson in lessons:
         for lesson in lessons:
@@ -44,9 +53,8 @@ class EggheadCourseIE(InfoExtractor):
             entries.append(self.url_result(
             entries.append(self.url_result(
                 lesson_url, ie=EggheadLessonIE.ie_key(), video_id=lesson_id))
                 lesson_url, ie=EggheadLessonIE.ie_key(), video_id=lesson_id))
 
 
-        course = self._download_json(
-            'https://egghead.io/api/v1/series/%s' % playlist_id,
-            playlist_id, 'Downloading course JSON', fatal=False) or {}
+        course = self._call_api(
+            series_path, playlist_id, 'course', False) or {}
 
 
         playlist_id = course.get('id')
         playlist_id = course.get('id')
         if playlist_id:
         if playlist_id:
@@ -57,10 +65,10 @@ class EggheadCourseIE(InfoExtractor):
             course.get('description'))
             course.get('description'))
 
 
 
 
-class EggheadLessonIE(InfoExtractor):
+class EggheadLessonIE(EggheadBaseIE):
     IE_DESC = 'egghead.io lesson'
     IE_DESC = 'egghead.io lesson'
     IE_NAME = 'egghead:lesson'
     IE_NAME = 'egghead:lesson'
-    _VALID_URL = r'https://egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https://(?:app\.)?egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
     _TESTS = [{
     _TESTS = [{
         'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
         'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
         'info_dict': {
         'info_dict': {
@@ -74,7 +82,7 @@ class EggheadLessonIE(InfoExtractor):
             'upload_date': '20161209',
             'upload_date': '20161209',
             'duration': 304,
             'duration': 304,
             'view_count': 0,
             'view_count': 0,
-            'tags': ['javascript', 'free'],
+            'tags': 'count:2',
         },
         },
         'params': {
         'params': {
             'skip_download': True,
             'skip_download': True,
@@ -83,13 +91,16 @@ class EggheadLessonIE(InfoExtractor):
     }, {
     }, {
         'url': 'https://egghead.io/api/v1/lessons/react-add-redux-to-a-react-application',
         'url': 'https://egghead.io/api/v1/lessons/react-add-redux-to-a-react-application',
         'only_matching': True,
         'only_matching': True,
+    }, {
+        'url': 'https://app.egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
+        'only_matching': True,
     }]
     }]
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
         display_id = self._match_id(url)
         display_id = self._match_id(url)
 
 
-        lesson = self._download_json(
-            'https://egghead.io/api/v1/lessons/%s' % display_id, display_id)
+        lesson = self._call_api(
+            'lessons/' + display_id, display_id, 'lesson')
 
 
         lesson_id = compat_str(lesson['id'])
         lesson_id = compat_str(lesson['id'])
         title = lesson['title']
         title = lesson['title']

+ 9 - 12
youtube_dl/extractor/eroprofile.py

@@ -6,7 +6,7 @@ from .common import InfoExtractor
 from ..compat import compat_urllib_parse_urlencode
 from ..compat import compat_urllib_parse_urlencode
 from ..utils import (
 from ..utils import (
     ExtractorError,
     ExtractorError,
-    unescapeHTML
+    merge_dicts,
 )
 )
 
 
 
 
@@ -24,7 +24,8 @@ class EroProfileIE(InfoExtractor):
             'title': 'sexy babe softcore',
             'title': 'sexy babe softcore',
             'thumbnail': r're:https?://.*\.jpg',
             'thumbnail': r're:https?://.*\.jpg',
             'age_limit': 18,
             'age_limit': 18,
-        }
+        },
+        'skip': 'Video not found',
     }, {
     }, {
         'url': 'http://www.eroprofile.com/m/videos/view/Try-It-On-Pee_cut_2-wmv-4shared-com-file-sharing-download-movie-file',
         'url': 'http://www.eroprofile.com/m/videos/view/Try-It-On-Pee_cut_2-wmv-4shared-com-file-sharing-download-movie-file',
         'md5': '1baa9602ede46ce904c431f5418d8916',
         'md5': '1baa9602ede46ce904c431f5418d8916',
@@ -77,19 +78,15 @@ class EroProfileIE(InfoExtractor):
             [r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
             [r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
             webpage, 'video id', default=None)
             webpage, 'video id', default=None)
 
 
-        video_url = unescapeHTML(self._search_regex(
-            r'<source src="([^"]+)', webpage, 'video url'))
         title = self._html_search_regex(
         title = self._html_search_regex(
-            r'Title:</th><td>([^<]+)</td>', webpage, 'title')
-        thumbnail = self._search_regex(
-            r'onclick="showVideoPlayer\(\)"><img src="([^"]+)',
-            webpage, 'thumbnail', fatal=False)
+            (r'Title:</th><td>([^<]+)</td>', r'<h1[^>]*>(.+?)</h1>'),
+            webpage, 'title')
+
+        info = self._parse_html5_media_entries(url, webpage, video_id)[0]
 
 
-        return {
+        return merge_dicts(info, {
             'id': video_id,
             'id': video_id,
             'display_id': display_id,
             'display_id': display_id,
-            'url': video_url,
             'title': title,
             'title': title,
-            'thumbnail': thumbnail,
             'age_limit': 18,
             'age_limit': 18,
-        }
+        })

+ 74 - 19
youtube_dl/extractor/extractors.py

@@ -42,7 +42,10 @@ from .aljazeera import AlJazeeraIE
 from .alphaporno import AlphaPornoIE
 from .alphaporno import AlphaPornoIE
 from .amara import AmaraIE
 from .amara import AmaraIE
 from .amcnetworks import AMCNetworksIE
 from .amcnetworks import AMCNetworksIE
-from .americastestkitchen import AmericasTestKitchenIE
+from .americastestkitchen import (
+    AmericasTestKitchenIE,
+    AmericasTestKitchenSeasonIE,
+)
 from .animeondemand import AnimeOnDemandIE
 from .animeondemand import AnimeOnDemandIE
 from .anvato import AnvatoIE
 from .anvato import AnvatoIE
 from .aol import AolIE
 from .aol import AolIE
@@ -69,6 +72,7 @@ from .arte import (
     ArteTVEmbedIE,
     ArteTVEmbedIE,
     ArteTVPlaylistIE,
     ArteTVPlaylistIE,
 )
 )
+from .arnes import ArnesIE
 from .asiancrush import (
 from .asiancrush import (
     AsianCrushIE,
     AsianCrushIE,
     AsianCrushPlaylistIE,
     AsianCrushPlaylistIE,
@@ -87,11 +91,13 @@ from .awaan import (
 )
 )
 from .azmedien import AZMedienIE
 from .azmedien import AZMedienIE
 from .baidu import BaiduVideoIE
 from .baidu import BaiduVideoIE
+from .bandaichannel import BandaiChannelIE
 from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
 from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
 from .bbc import (
 from .bbc import (
     BBCCoUkIE,
     BBCCoUkIE,
     BBCCoUkArticleIE,
     BBCCoUkArticleIE,
-    BBCCoUkIPlayerPlaylistIE,
+    BBCCoUkIPlayerEpisodesIE,
+    BBCCoUkIPlayerGroupIE,
     BBCCoUkPlaylistIE,
     BBCCoUkPlaylistIE,
     BBCIE,
     BBCIE,
 )
 )
@@ -126,7 +132,6 @@ from .bleacherreport import (
     BleacherReportIE,
     BleacherReportIE,
     BleacherReportCMSIE,
     BleacherReportCMSIE,
 )
 )
-from .blinkx import BlinkxIE
 from .bloomberg import BloombergIE
 from .bloomberg import BloombergIE
 from .bokecc import BokeCCIE
 from .bokecc import BokeCCIE
 from .bongacams import BongaCamsIE
 from .bongacams import BongaCamsIE
@@ -160,6 +165,7 @@ from .canvas import (
     CanvasIE,
     CanvasIE,
     CanvasEenIE,
     CanvasEenIE,
     VrtNUIE,
     VrtNUIE,
+    DagelijkseKostIE,
 )
 )
 from .carambatv import (
 from .carambatv import (
     CarambaTVIE,
     CarambaTVIE,
@@ -184,7 +190,11 @@ from .cbsnews import (
     CBSNewsIE,
     CBSNewsIE,
     CBSNewsLiveVideoIE,
     CBSNewsLiveVideoIE,
 )
 )
-from .cbssports import CBSSportsIE
+from .cbssports import (
+    CBSSportsEmbedIE,
+    CBSSportsIE,
+    TwentyFourSevenSportsIE,
+)
 from .ccc import (
 from .ccc import (
     CCCIE,
     CCCIE,
     CCCPlaylistIE,
     CCCPlaylistIE,
@@ -232,11 +242,8 @@ from .cnn import (
 )
 )
 from .coub import CoubIE
 from .coub import CoubIE
 from .comedycentral import (
 from .comedycentral import (
-    ComedyCentralFullEpisodesIE,
     ComedyCentralIE,
     ComedyCentralIE,
-    ComedyCentralShortnameIE,
     ComedyCentralTVIE,
     ComedyCentralTVIE,
-    ToshIE,
 )
 )
 from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
 from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
 from .commonprotocols import (
 from .commonprotocols import (
@@ -287,7 +294,11 @@ from .douyutv import (
     DouyuShowIE,
     DouyuShowIE,
     DouyuTVIE,
     DouyuTVIE,
 )
 )
-from .dplay import DPlayIE
+from .dplay import (
+    DPlayIE,
+    DiscoveryPlusIE,
+    HGTVDeIE,
+)
 from .dreisat import DreiSatIE
 from .dreisat import DreiSatIE
 from .drbonanza import DRBonanzaIE
 from .drbonanza import DRBonanzaIE
 from .drtuber import DrTuberIE
 from .drtuber import DrTuberIE
@@ -416,6 +427,7 @@ from .gamestar import GameStarIE
 from .gaskrank import GaskrankIE
 from .gaskrank import GaskrankIE
 from .gazeta import GazetaIE
 from .gazeta import GazetaIE
 from .gdcvault import GDCVaultIE
 from .gdcvault import GDCVaultIE
+from .gedidigital import GediDigitalIE
 from .generic import GenericIE
 from .generic import GenericIE
 from .gfycat import GfycatIE
 from .gfycat import GfycatIE
 from .giantbomb import GiantBombIE
 from .giantbomb import GiantBombIE
@@ -470,8 +482,8 @@ from .hungama import (
 from .hypem import HypemIE
 from .hypem import HypemIE
 from .ign import (
 from .ign import (
     IGNIE,
     IGNIE,
-    OneUPIE,
-    PCMagIE,
+    IGNVideoIE,
+    IGNArticleIE,
 )
 )
 from .iheart import (
 from .iheart import (
     IHeartRadioIE,
     IHeartRadioIE,
@@ -526,7 +538,10 @@ from .karaoketv import KaraoketvIE
 from .karrierevideos import KarriereVideosIE
 from .karrierevideos import KarriereVideosIE
 from .keezmovies import KeezMoviesIE
 from .keezmovies import KeezMoviesIE
 from .ketnet import KetnetIE
 from .ketnet import KetnetIE
-from .khanacademy import KhanAcademyIE
+from .khanacademy import (
+    KhanAcademyIE,
+    KhanAcademyUnitIE,
+)
 from .kickstarter import KickStarterIE
 from .kickstarter import KickStarterIE
 from .kinja import KinjaEmbedIE
 from .kinja import KinjaEmbedIE
 from .kinopoisk import KinoPoiskIE
 from .kinopoisk import KinoPoiskIE
@@ -583,7 +598,11 @@ from .limelight import (
     LimelightChannelIE,
     LimelightChannelIE,
     LimelightChannelListIE,
     LimelightChannelListIE,
 )
 )
-from .line import LineTVIE
+from .line import (
+    LineTVIE,
+    LineLiveIE,
+    LineLiveChannelIE,
+)
 from .linkedin import (
 from .linkedin import (
     LinkedInLearningIE,
     LinkedInLearningIE,
     LinkedInLearningCourseIE,
     LinkedInLearningCourseIE,
@@ -591,10 +610,6 @@ from .linkedin import (
 from .linuxacademy import LinuxAcademyIE
 from .linuxacademy import LinuxAcademyIE
 from .litv import LiTVIE
 from .litv import LiTVIE
 from .livejournal import LiveJournalIE
 from .livejournal import LiveJournalIE
-from .liveleak import (
-    LiveLeakIE,
-    LiveLeakEmbedIE,
-)
 from .livestream import (
 from .livestream import (
     LivestreamIE,
     LivestreamIE,
     LivestreamOriginalIE,
     LivestreamOriginalIE,
@@ -620,6 +635,7 @@ from .mangomolo import (
     MangomoloLiveIE,
     MangomoloLiveIE,
 )
 )
 from .manyvids import ManyVidsIE
 from .manyvids import ManyVidsIE
+from .maoritv import MaoriTVIE
 from .markiza import (
 from .markiza import (
     MarkizaIE,
     MarkizaIE,
     MarkizaPageIE,
     MarkizaPageIE,
@@ -648,6 +664,11 @@ from .microsoftvirtualacademy import (
     MicrosoftVirtualAcademyIE,
     MicrosoftVirtualAcademyIE,
     MicrosoftVirtualAcademyCourseIE,
     MicrosoftVirtualAcademyCourseIE,
 )
 )
+from .minds import (
+    MindsIE,
+    MindsChannelIE,
+    MindsGroupIE,
+)
 from .ministrygrid import MinistryGridIE
 from .ministrygrid import MinistryGridIE
 from .minoto import MinotoIE
 from .minoto import MinotoIE
 from .miomio import MioMioIE
 from .miomio import MioMioIE
@@ -658,7 +679,10 @@ from .mixcloud import (
     MixcloudUserIE,
     MixcloudUserIE,
     MixcloudPlaylistIE,
     MixcloudPlaylistIE,
 )
 )
-from .mlb import MLBIE
+from .mlb import (
+    MLBIE,
+    MLBVideoIE,
+)
 from .mnet import MnetIE
 from .mnet import MnetIE
 from .moevideo import MoeVideoIE
 from .moevideo import MoeVideoIE
 from .mofosex import (
 from .mofosex import (
@@ -859,6 +883,11 @@ from .packtpub import (
     PacktPubIE,
     PacktPubIE,
     PacktPubCourseIE,
     PacktPubCourseIE,
 )
 )
+from .palcomp3 import (
+    PalcoMP3IE,
+    PalcoMP3ArtistIE,
+    PalcoMP3VideoIE,
+)
 from .pandoratv import PandoraTVIE
 from .pandoratv import PandoraTVIE
 from .parliamentliveuk import ParliamentLiveUKIE
 from .parliamentliveuk import ParliamentLiveUKIE
 from .patreon import PatreonIE
 from .patreon import PatreonIE
@@ -892,6 +921,7 @@ from .platzi import (
 from .playfm import PlayFMIE
 from .playfm import PlayFMIE
 from .playplustv import PlayPlusTVIE
 from .playplustv import PlayPlusTVIE
 from .plays import PlaysTVIE
 from .plays import PlaysTVIE
+from .playstuff import PlayStuffIE
 from .playtvak import PlaytvakIE
 from .playtvak import PlaytvakIE
 from .playvid import PlayvidIE
 from .playvid import PlayvidIE
 from .playwire import PlaywireIE
 from .playwire import PlaywireIE
@@ -1016,6 +1046,7 @@ from .safari import (
     SafariApiIE,
     SafariApiIE,
     SafariCourseIE,
     SafariCourseIE,
 )
 )
+from .samplefocus import SampleFocusIE
 from .sapo import SapoIE
 from .sapo import SapoIE
 from .savefrom import SaveFromIE
 from .savefrom import SaveFromIE
 from .sbs import SBSIE
 from .sbs import SBSIE
@@ -1048,6 +1079,11 @@ from .shared import (
     VivoIE,
     VivoIE,
 )
 )
 from .showroomlive import ShowRoomLiveIE
 from .showroomlive import ShowRoomLiveIE
+from .simplecast import (
+    SimplecastIE,
+    SimplecastEpisodeIE,
+    SimplecastPodcastIE,
+)
 from .sina import SinaIE
 from .sina import SinaIE
 from .sixplay import SixPlayIE
 from .sixplay import SixPlayIE
 from .skyit import (
 from .skyit import (
@@ -1113,6 +1149,10 @@ from .stitcher import (
 from .sport5 import Sport5IE
 from .sport5 import Sport5IE
 from .sportbox import SportBoxIE
 from .sportbox import SportBoxIE
 from .sportdeutschland import SportDeutschlandIE
 from .sportdeutschland import SportDeutschlandIE
+from .spotify import (
+    SpotifyIE,
+    SpotifyShowIE,
+)
 from .spreaker import (
 from .spreaker import (
     SpreakerIE,
     SpreakerIE,
     SpreakerPageIE,
     SpreakerPageIE,
@@ -1128,6 +1168,11 @@ from .srgssr import (
 from .srmediathek import SRMediathekIE
 from .srmediathek import SRMediathekIE
 from .stanfordoc import StanfordOpenClassroomIE
 from .stanfordoc import StanfordOpenClassroomIE
 from .steam import SteamIE
 from .steam import SteamIE
+from .storyfire import (
+    StoryFireIE,
+    StoryFireUserIE,
+    StoryFireSeriesIE,
+)
 from .streamable import StreamableIE
 from .streamable import StreamableIE
 from .streamcloud import StreamcloudIE
 from .streamcloud import StreamcloudIE
 from .streamcz import StreamCZIE
 from .streamcz import StreamCZIE
@@ -1226,6 +1271,10 @@ from .toutv import TouTvIE
 from .toypics import ToypicsUserIE, ToypicsIE
 from .toypics import ToypicsUserIE, ToypicsIE
 from .traileraddict import TrailerAddictIE
 from .traileraddict import TrailerAddictIE
 from .trilulilu import TriluliluIE
 from .trilulilu import TriluliluIE
+from .trovo import (
+    TrovoIE,
+    TrovoVodIE,
+)
 from .trunews import TruNewsIE
 from .trunews import TruNewsIE
 from .trutv import TruTVIE
 from .trutv import TruTVIE
 from .tube8 import Tube8IE
 from .tube8 import Tube8IE
@@ -1244,6 +1293,7 @@ from .tv2 import (
     TV2IE,
     TV2IE,
     TV2ArticleIE,
     TV2ArticleIE,
     KatsomoIE,
     KatsomoIE,
+    MTVUutisetArticleIE,
 )
 )
 from .tv2dk import (
 from .tv2dk import (
     TV2DKIE,
     TV2DKIE,
@@ -1382,7 +1432,6 @@ from .vidme import (
     VidmeUserIE,
     VidmeUserIE,
     VidmeUserLikesIE,
     VidmeUserLikesIE,
 )
 )
-from .vidzi import VidziIE
 from .vier import VierIE, VierVideosIE
 from .vier import VierIE, VierVideosIE
 from .viewlift import (
 from .viewlift import (
     ViewLiftIE,
     ViewLiftIE,
@@ -1442,6 +1491,7 @@ from .vrv import (
     VRVSeriesIE,
     VRVSeriesIE,
 )
 )
 from .vshare import VShareIE
 from .vshare import VShareIE
+from .vtm import VTMIE
 from .medialaan import MedialaanIE
 from .medialaan import MedialaanIE
 from .vube import VubeIE
 from .vube import VubeIE
 from .vuclip import VuClipIE
 from .vuclip import VuClipIE
@@ -1585,5 +1635,10 @@ from .zattoo import (
     ZattooLiveIE,
     ZattooLiveIE,
 )
 )
 from .zdf import ZDFIE, ZDFChannelIE
 from .zdf import ZDFIE, ZDFChannelIE
-from .zingmp3 import ZingMp3IE
+from .zhihu import ZhihuIE
+from .zingmp3 import (
+    ZingMp3IE,
+    ZingMp3AlbumIE,
+)
+from .zoom import ZoomIE
 from .zype import ZypeIE
 from .zype import ZypeIE

+ 4 - 1
youtube_dl/extractor/facebook.py

@@ -521,7 +521,10 @@ class FacebookIE(InfoExtractor):
                 raise ExtractorError(
                 raise ExtractorError(
                     'The video is not available, Facebook said: "%s"' % m_msg.group(1),
                     'The video is not available, Facebook said: "%s"' % m_msg.group(1),
                     expected=True)
                     expected=True)
-            elif '>You must log in to continue' in webpage:
+            elif any(p in webpage for p in (
+                    '>You must log in to continue',
+                    'id="login_form"',
+                    'id="loginbutton"')):
                 self.raise_login_required()
                 self.raise_login_required()
 
 
         if not video_data and '/watchparty/' in url:
         if not video_data and '/watchparty/' in url:

+ 13 - 19
youtube_dl/extractor/formula1.py

@@ -5,29 +5,23 @@ from .common import InfoExtractor
 
 
 
 
 class Formula1IE(InfoExtractor):
 class Formula1IE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?formula1\.com/(?:content/fom-website/)?en/video/\d{4}/\d{1,2}/(?P<id>.+?)\.html'
-    _TESTS = [{
-        'url': 'http://www.formula1.com/content/fom-website/en/video/2016/5/Race_highlights_-_Spain_2016.html',
-        'md5': '8c79e54be72078b26b89e0e111c0502b',
+    _VALID_URL = r'https?://(?:www\.)?formula1\.com/en/latest/video\.[^.]+\.(?P<id>\d+)\.html'
+    _TEST = {
+        'url': 'https://www.formula1.com/en/latest/video.race-highlights-spain-2016.6060988138001.html',
+        'md5': 'be7d3a8c2f804eb2ab2aa5d941c359f8',
         'info_dict': {
         'info_dict': {
-            'id': 'JvYXJpMzE6pArfHWm5ARp5AiUmD-gibV',
+            'id': '6060988138001',
             'ext': 'mp4',
             'ext': 'mp4',
             'title': 'Race highlights - Spain 2016',
             'title': 'Race highlights - Spain 2016',
+            'timestamp': 1463332814,
+            'upload_date': '20160515',
+            'uploader_id': '6057949432001',
         },
         },
-        'params': {
-            # m3u8 download
-            'skip_download': True,
-        },
-        'add_ie': ['Ooyala'],
-    }, {
-        'url': 'http://www.formula1.com/en/video/2016/5/Race_highlights_-_Spain_2016.html',
-        'only_matching': True,
-    }]
+        'add_ie': ['BrightcoveNew'],
+    }
+    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/6057949432001/S1WMrhjlh_default/index.html?videoId=%s'
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
-        display_id = self._match_id(url)
-        webpage = self._download_webpage(url, display_id)
-        ooyala_embed_code = self._search_regex(
-            r'data-videoid="([^"]+)"', webpage, 'ooyala embed code')
+        bc_id = self._match_id(url)
         return self.url_result(
         return self.url_result(
-            'ooyala:%s' % ooyala_embed_code, 'Ooyala', ooyala_embed_code)
+            self.BRIGHTCOVE_URL_TEMPLATE % bc_id, 'BrightcoveNew', bc_id)

+ 12 - 8
youtube_dl/extractor/franceculture.py

@@ -11,7 +11,7 @@ from ..utils import (
 
 
 class FranceCultureIE(InfoExtractor):
 class FranceCultureIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emissions/(?:[^/]+/)*(?P<id>[^/?#&]+)'
     _VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emissions/(?:[^/]+/)*(?P<id>[^/?#&]+)'
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.franceculture.fr/emissions/carnet-nomade/rendez-vous-au-pays-des-geeks',
         'url': 'http://www.franceculture.fr/emissions/carnet-nomade/rendez-vous-au-pays-des-geeks',
         'info_dict': {
         'info_dict': {
             'id': 'rendez-vous-au-pays-des-geeks',
             'id': 'rendez-vous-au-pays-des-geeks',
@@ -20,10 +20,14 @@ class FranceCultureIE(InfoExtractor):
             'title': 'Rendez-vous au pays des geeks',
             'title': 'Rendez-vous au pays des geeks',
             'thumbnail': r're:^https?://.*\.jpg$',
             'thumbnail': r're:^https?://.*\.jpg$',
             'upload_date': '20140301',
             'upload_date': '20140301',
-            'timestamp': 1393642916,
+            'timestamp': 1393700400,
             'vcodec': 'none',
             'vcodec': 'none',
         }
         }
-    }
+    }, {
+        # no thumbnail
+        'url': 'https://www.franceculture.fr/emissions/la-recherche-montre-en-main/la-recherche-montre-en-main-du-mercredi-10-octobre-2018',
+        'only_matching': True,
+    }]
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
         display_id = self._match_id(url)
         display_id = self._match_id(url)
@@ -36,19 +40,19 @@ class FranceCultureIE(InfoExtractor):
                     </h1>|
                     </h1>|
                     <div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>
                     <div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>
                 ).*?
                 ).*?
-                (<button[^>]+data-asset-source="[^"]+"[^>]+>)
+                (<button[^>]+data-(?:url|asset-source)="[^"]+"[^>]+>)
             ''',
             ''',
             webpage, 'video data'))
             webpage, 'video data'))
 
 
-        video_url = video_data['data-asset-source']
-        title = video_data.get('data-asset-title') or self._og_search_title(webpage)
+        video_url = video_data.get('data-url') or video_data['data-asset-source']
+        title = video_data.get('data-asset-title') or video_data.get('data-diffusion-title') or self._og_search_title(webpage)
 
 
         description = self._html_search_regex(
         description = self._html_search_regex(
             r'(?s)<div[^>]+class="intro"[^>]*>.*?<h2>(.+?)</h2>',
             r'(?s)<div[^>]+class="intro"[^>]*>.*?<h2>(.+?)</h2>',
             webpage, 'description', default=None)
             webpage, 'description', default=None)
         thumbnail = self._search_regex(
         thumbnail = self._search_regex(
             r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+(?:data-dejavu-)?src="([^"]+)"',
             r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+(?:data-dejavu-)?src="([^"]+)"',
-            webpage, 'thumbnail', fatal=False)
+            webpage, 'thumbnail', default=None)
         uploader = self._html_search_regex(
         uploader = self._html_search_regex(
             r'(?s)<span class="author">(.*?)</span>',
             r'(?s)<span class="author">(.*?)</span>',
             webpage, 'uploader', default=None)
             webpage, 'uploader', default=None)
@@ -64,6 +68,6 @@ class FranceCultureIE(InfoExtractor):
             'ext': ext,
             'ext': ext,
             'vcodec': 'none' if ext == 'mp3' else None,
             'vcodec': 'none' if ext == 'mp3' else None,
             'uploader': uploader,
             'uploader': uploader,
-            'timestamp': int_or_none(video_data.get('data-asset-created-date')),
+            'timestamp': int_or_none(video_data.get('data-start-time')) or int_or_none(video_data.get('data-asset-created-date')),
             'duration': int_or_none(video_data.get('data-duration')),
             'duration': int_or_none(video_data.get('data-duration')),
         }
         }

+ 6 - 1
youtube_dl/extractor/francetv.py

@@ -383,6 +383,10 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
     }, {
     }, {
         'url': 'http://france3-regions.francetvinfo.fr/limousin/emissions/jt-1213-limousin',
         'url': 'http://france3-regions.francetvinfo.fr/limousin/emissions/jt-1213-limousin',
         'only_matching': True,
         'only_matching': True,
+    }, {
+        # "<figure id=" pattern (#28792)
+        'url': 'https://www.francetvinfo.fr/culture/patrimoine/incendie-de-notre-dame-de-paris/notre-dame-de-paris-de-l-incendie-de-la-cathedrale-a-sa-reconstruction_4372291.html',
+        'only_matching': True,
     }]
     }]
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
@@ -399,7 +403,8 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
         video_id = self._search_regex(
         video_id = self._search_regex(
             (r'player\.load[^;]+src:\s*["\']([^"\']+)',
             (r'player\.load[^;]+src:\s*["\']([^"\']+)',
              r'id-video=([^@]+@[^"]+)',
              r'id-video=([^@]+@[^"]+)',
-             r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"'),
+             r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"',
+             r'(?:data-id|<figure[^<]+\bid)=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'),
             webpage, 'video id')
             webpage, 'video id')
 
 
         return self._make_url_result(video_id)
         return self._make_url_result(video_id)

+ 1 - 1
youtube_dl/extractor/fujitv.py

@@ -17,7 +17,7 @@ class FujiTVFODPlus7IE(InfoExtractor):
     def _real_extract(self, url):
     def _real_extract(self, url):
         video_id = self._match_id(url)
         video_id = self._match_id(url)
         formats = self._extract_m3u8_formats(
         formats = self._extract_m3u8_formats(
-            self._BASE_URL + 'abr/pc_html5/%s.m3u8' % video_id, video_id)
+            self._BASE_URL + 'abr/pc_html5/%s.m3u8' % video_id, video_id, 'mp4')
         for f in formats:
         for f in formats:
             wh = self._BITRATE_MAP.get(f.get('tbr'))
             wh = self._BITRATE_MAP.get(f.get('tbr'))
             if wh:
             if wh:

+ 5 - 1
youtube_dl/extractor/funimation.py

@@ -16,7 +16,7 @@ from ..utils import (
 
 
 
 
 class FunimationIE(InfoExtractor):
 class FunimationIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/shows/[^/]+/(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/(?:[^/]+/)?shows/[^/]+/(?P<id>[^/?#&]+)'
 
 
     _NETRC_MACHINE = 'funimation'
     _NETRC_MACHINE = 'funimation'
     _TOKEN = None
     _TOKEN = None
@@ -51,6 +51,10 @@ class FunimationIE(InfoExtractor):
     }, {
     }, {
         'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/',
         'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/',
         'only_matching': True,
         'only_matching': True,
+    }, {
+        # with lang code
+        'url': 'https://www.funimation.com/en/shows/hacksign/role-play/',
+        'only_matching': True,
     }]
     }]
 
 
     def _login(self):
     def _login(self):

+ 33 - 1
youtube_dl/extractor/gdcvault.py

@@ -6,6 +6,7 @@ from .common import InfoExtractor
 from .kaltura import KalturaIE
 from .kaltura import KalturaIE
 from ..utils import (
 from ..utils import (
     HEADRequest,
     HEADRequest,
+    remove_start,
     sanitized_Request,
     sanitized_Request,
     smuggle_url,
     smuggle_url,
     urlencode_postdata,
     urlencode_postdata,
@@ -102,6 +103,26 @@ class GDCVaultIE(InfoExtractor):
                 'format': 'mp4-408',
                 'format': 'mp4-408',
             },
             },
         },
         },
+        {
+            # Kaltura embed, whitespace between quote and embedded URL in iframe's src
+            'url': 'https://www.gdcvault.com/play/1025699',
+            'info_dict': {
+                'id': '0_zagynv0a',
+                'ext': 'mp4',
+                'title': 'Tech Toolbox',
+                'upload_date': '20190408',
+                'uploader_id': 'joe@blazestreaming.com',
+                'timestamp': 1554764629,
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
+        {
+            # HTML5 video
+            'url': 'http://www.gdcvault.com/play/1014846/Conference-Keynote-Shigeru',
+            'only_matching': True,
+        },
     ]
     ]
 
 
     def _login(self, webpage_url, display_id):
     def _login(self, webpage_url, display_id):
@@ -175,7 +196,18 @@ class GDCVaultIE(InfoExtractor):
 
 
             xml_name = self._html_search_regex(
             xml_name = self._html_search_regex(
                 r'<iframe src=".*?\?xml(?:=|URL=xml/)(.+?\.xml).*?".*?</iframe>',
                 r'<iframe src=".*?\?xml(?:=|URL=xml/)(.+?\.xml).*?".*?</iframe>',
-                start_page, 'xml filename')
+                start_page, 'xml filename', default=None)
+            if not xml_name:
+                info = self._parse_html5_media_entries(url, start_page, video_id)[0]
+                info.update({
+                    'title': remove_start(self._search_regex(
+                        r'>Session Name:\s*<.*?>\s*<td>(.+?)</td>', start_page,
+                        'title', default=None) or self._og_search_title(
+                        start_page, default=None), 'GDC Vault - '),
+                    'id': video_id,
+                    'display_id': display_id,
+                })
+                return info
             embed_url = '%s/xml/%s' % (xml_root, xml_name)
             embed_url = '%s/xml/%s' % (xml_root, xml_name)
             ie_key = 'DigitallySpeaking'
             ie_key = 'DigitallySpeaking'
 
 

+ 161 - 0
youtube_dl/extractor/gedidigital.py

@@ -0,0 +1,161 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    determine_ext,
+    int_or_none,
+)
+
+
+class GediDigitalIE(InfoExtractor):
+    _VALID_URL = r'''(?x)https?://video\.
+        (?:
+            (?:
+                (?:espresso\.)?repubblica
+                |lastampa
+                |ilsecoloxix
+            )|
+            (?:
+                iltirreno
+                |messaggeroveneto
+                |ilpiccolo
+                |gazzettadimantova
+                |mattinopadova
+                |laprovinciapavese
+                |tribunatreviso
+                |nuovavenezia
+                |gazzettadimodena
+                |lanuovaferrara
+                |corrierealpi
+                |lasentinella
+            )\.gelocal
+        )\.it(?:/[^/]+){2,3}?/(?P<id>\d+)(?:[/?&#]|$)'''
+    _TESTS = [{
+        'url': 'https://video.lastampa.it/politica/il-paradosso-delle-regionali-la-lega-vince-ma-sembra-aver-perso/121559/121683',
+        'md5': '84658d7fb9e55a6e57ecc77b73137494',
+        'info_dict': {
+            'id': '121559',
+            'ext': 'mp4',
+            'title': 'Il paradosso delle Regionali: ecco perché la Lega vince ma sembra aver perso',
+            'description': 'md5:de7f4d6eaaaf36c153b599b10f8ce7ca',
+            'thumbnail': r're:^https://www\.repstatic\.it/video/photo/.+?-thumb-full-.+?\.jpg$',
+            'duration': 125,
+        },
+    }, {
+        'url': 'https://video.espresso.repubblica.it/embed/tutti-i-video/01-ted-villa/14772/14870&width=640&height=360',
+        'only_matching': True,
+    }, {
+        'url': 'https://video.repubblica.it/motori/record-della-pista-a-spa-francorchamps-la-pagani-huayra-roadster-bc-stupisce/367415/367963',
+        'only_matching': True,
+    }, {
+        'url': 'https://video.ilsecoloxix.it/sport/cassani-e-i-brividi-azzurri-ai-mondiali-di-imola-qui-mi-sono-innamorato-del-ciclismo-da-ragazzino-incredibile-tornarci-da-ct/66184/66267',
+        'only_matching': True,
+    }, {
+        'url': 'https://video.iltirreno.gelocal.it/sport/dentro-la-notizia-ferrari-cosa-succede-a-maranello/141059/142723',
+        'only_matching': True,
+    }, {
+        'url': 'https://video.messaggeroveneto.gelocal.it/locale/maria-giovanna-elmi-covid-vaccino/138155/139268',
+        'only_matching': True,
+    }, {
+        'url': 'https://video.ilpiccolo.gelocal.it/dossier/big-john/dinosauro-big-john-al-via-le-visite-guidate-a-trieste/135226/135751',
+        'only_matching': True,
+    }, {
+        'url': 'https://video.gazzettadimantova.gelocal.it/locale/dal-ponte-visconteo-di-valeggio-l-and-8217sos-dei-ristoratori-aprire-anche-a-cena/137310/137818',
+        'only_matching': True,
+    }, {
+        'url': 'https://video.mattinopadova.gelocal.it/dossier/coronavirus-in-veneto/covid-a-vo-un-anno-dopo-un-cuore-tricolore-per-non-dimenticare/138402/138964',
+        'only_matching': True,
+    }, {
+        'url': 'https://video.laprovinciapavese.gelocal.it/locale/mede-zona-rossa-via-alle-vaccinazioni-per-gli-over-80/137545/138120',
+        'only_matching': True,
+    }, {
+        'url': 'https://video.tribunatreviso.gelocal.it/dossier/coronavirus-in-veneto/ecco-le-prima-vaccinazioni-di-massa-nella-marca/134485/135024',
+        'only_matching': True,
+    }, {
+        'url': 'https://video.nuovavenezia.gelocal.it/locale/camion-troppo-alto-per-il-ponte-ferroviario-perde-il-carico/135734/136266',
+        'only_matching': True,
+    }, {
+        'url': 'https://video.gazzettadimodena.gelocal.it/locale/modena-scoperta-la-proteina-che-predice-il-livello-di-gravita-del-covid/139109/139796',
+        'only_matching': True,
+    }, {
+        'url': 'https://video.lanuovaferrara.gelocal.it/locale/due-bombole-di-gpl-aperte-e-abbandonate-i-vigili-bruciano-il-gas/134391/134957',
+        'only_matching': True,
+    }, {
+        'url': 'https://video.corrierealpi.gelocal.it/dossier/cortina-2021-i-mondiali-di-sci-alpino/mondiali-di-sci-il-timelapse-sulla-splendida-olympia/133760/134331',
+        'only_matching': True,
+    }, {
+        'url': 'https://video.lasentinella.gelocal.it/locale/vestigne-centra-un-auto-e-si-ribalta/138931/139466',
+        'only_matching': True,
+    }, {
+        'url': 'https://video.espresso.repubblica.it/tutti-i-video/01-ted-villa/14772',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+        title = self._html_search_meta(
+            ['twitter:title', 'og:title'], webpage, fatal=True)
+        player_data = re.findall(
+            r"PlayerFactory\.setParam\('(?P<type>format|param)',\s*'(?P<name>[^']+)',\s*'(?P<val>[^']+)'\);",
+            webpage)
+
+        formats = []
+        duration = thumb = None
+        for t, n, v in player_data:
+            if t == 'format':
+                if n in ('video-hds-vod-ec', 'video-hls-vod-ec', 'video-viralize', 'video-youtube-pfp'):
+                    continue
+                elif n.endswith('-vod-ak'):
+                    formats.extend(self._extract_akamai_formats(
+                        v, video_id, {'http': 'media.gedidigital.it'}))
+                else:
+                    ext = determine_ext(v)
+                    if ext == 'm3u8':
+                        formats.extend(self._extract_m3u8_formats(
+                            v, video_id, 'mp4', 'm3u8_native', m3u8_id=n, fatal=False))
+                        continue
+                    f = {
+                        'format_id': n,
+                        'url': v,
+                    }
+                    if ext == 'mp3':
+                        abr = int_or_none(self._search_regex(
+                            r'-mp3-audio-(\d+)', v, 'abr', default=None))
+                        f.update({
+                            'abr': abr,
+                            'tbr': abr,
+                            'vcodec': 'none'
+                        })
+                    else:
+                        mobj = re.match(r'^video-rrtv-(\d+)(?:-(\d+))?$', n)
+                        if mobj:
+                            f.update({
+                                'height': int(mobj.group(1)),
+                                'vbr': int_or_none(mobj.group(2)),
+                            })
+                        if not f.get('vbr'):
+                            f['vbr'] = int_or_none(self._search_regex(
+                                r'-video-rrtv-(\d+)', v, 'abr', default=None))
+                    formats.append(f)
+            elif t == 'param':
+                if n in ['image_full', 'image']:
+                    thumb = v
+                elif n == 'videoDuration':
+                    duration = int_or_none(v)
+
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': self._html_search_meta(
+                ['twitter:description', 'og:description', 'description'], webpage),
+            'thumbnail': thumb or self._og_search_thumbnail(webpage),
+            'formats': formats,
+            'duration': duration,
+        }

+ 55 - 33
youtube_dl/extractor/generic.py

@@ -84,7 +84,6 @@ from .jwplatform import JWPlatformIE
 from .digiteka import DigitekaIE
 from .digiteka import DigitekaIE
 from .arkena import ArkenaIE
 from .arkena import ArkenaIE
 from .instagram import InstagramIE
 from .instagram import InstagramIE
-from .liveleak import LiveLeakIE
 from .threeqsdn import ThreeQSDNIE
 from .threeqsdn import ThreeQSDNIE
 from .theplatform import ThePlatformIE
 from .theplatform import ThePlatformIE
 from .kaltura import KalturaIE
 from .kaltura import KalturaIE
@@ -126,8 +125,11 @@ from .viqeo import ViqeoIE
 from .expressen import ExpressenIE
 from .expressen import ExpressenIE
 from .zype import ZypeIE
 from .zype import ZypeIE
 from .odnoklassniki import OdnoklassnikiIE
 from .odnoklassniki import OdnoklassnikiIE
+from .vk import VKIE
 from .kinja import KinjaEmbedIE
 from .kinja import KinjaEmbedIE
 from .arcpublishing import ArcPublishingIE
 from .arcpublishing import ArcPublishingIE
+from .medialaan import MedialaanIE
+from .simplecast import SimplecastIE
 
 
 
 
 class GenericIE(InfoExtractor):
 class GenericIE(InfoExtractor):
@@ -1626,31 +1628,6 @@ class GenericIE(InfoExtractor):
                 'upload_date': '20160409',
                 'upload_date': '20160409',
             },
             },
         },
         },
-        # LiveLeak embed
-        {
-            'url': 'http://www.wykop.pl/link/3088787/',
-            'md5': '7619da8c820e835bef21a1efa2a0fc71',
-            'info_dict': {
-                'id': '874_1459135191',
-                'ext': 'mp4',
-                'title': 'Man shows poor quality of new apartment building',
-                'description': 'The wall is like a sand pile.',
-                'uploader': 'Lake8737',
-            },
-            'add_ie': [LiveLeakIE.ie_key()],
-        },
-        # Another LiveLeak embed pattern (#13336)
-        {
-            'url': 'https://milo.yiannopoulos.net/2017/06/concealed-carry-robbery/',
-            'info_dict': {
-                'id': '2eb_1496309988',
-                'ext': 'mp4',
-                'title': 'Thief robs place where everyone was armed',
-                'description': 'md5:694d73ee79e535953cf2488562288eee',
-                'uploader': 'brazilwtf',
-            },
-            'add_ie': [LiveLeakIE.ie_key()],
-        },
         # Duplicated embedded video URLs
         # Duplicated embedded video URLs
         {
         {
             'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
             'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
@@ -2223,6 +2200,34 @@ class GenericIE(InfoExtractor):
                 'duration': 1581,
                 'duration': 1581,
             },
             },
         },
         },
+        {
+            # MyChannels SDK embed
+            # https://www.24kitchen.nl/populair/deskundige-dit-waarom-sommigen-gevoelig-zijn-voor-voedselallergieen
+            'url': 'https://www.demorgen.be/nieuws/burgemeester-rotterdam-richt-zich-in-videoboodschap-tot-relschoppers-voelt-het-goed~b0bcfd741/',
+            'md5': '90c0699c37006ef18e198c032d81739c',
+            'info_dict': {
+                'id': '194165',
+                'ext': 'mp4',
+                'title': 'Burgemeester Aboutaleb spreekt relschoppers toe',
+                'timestamp': 1611740340,
+                'upload_date': '20210127',
+                'duration': 159,
+            },
+        },
+        {
+            # Simplecast player embed
+            'url': 'https://www.bio.org/podcast',
+            'info_dict': {
+                'id': 'podcast',
+                'title': 'I AM BIO Podcast | BIO',
+            },
+            'playlist_mincount': 52,
+        },
+        {
+            # Sibnet embed (https://help.sibnet.ru/?sibnet_video_embed)
+            'url': 'https://phpbb3.x-tk.ru/bbcode-video-sibnet-t24.html',
+            'only_matching': True,
+        },
     ]
     ]
 
 
     def report_following_redirect(self, new_url):
     def report_following_redirect(self, new_url):
@@ -2462,6 +2467,9 @@ class GenericIE(InfoExtractor):
         webpage = self._webpage_read_content(
         webpage = self._webpage_read_content(
             full_response, url, video_id, prefix=first_bytes)
             full_response, url, video_id, prefix=first_bytes)
 
 
+        if '<title>DPG Media Privacy Gate</title>' in webpage:
+            webpage = self._download_webpage(url, video_id)
+
         self.report_extraction(video_id)
         self.report_extraction(video_id)
 
 
         # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
         # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
@@ -2593,6 +2601,11 @@ class GenericIE(InfoExtractor):
         if arc_urls:
         if arc_urls:
             return self.playlist_from_matches(arc_urls, video_id, video_title, ie=ArcPublishingIE.ie_key())
             return self.playlist_from_matches(arc_urls, video_id, video_title, ie=ArcPublishingIE.ie_key())
 
 
+        mychannels_urls = MedialaanIE._extract_urls(webpage)
+        if mychannels_urls:
+            return self.playlist_from_matches(
+                mychannels_urls, video_id, video_title, ie=MedialaanIE.ie_key())
+
         # Look for embedded rtl.nl player
         # Look for embedded rtl.nl player
         matches = re.findall(
         matches = re.findall(
             r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',
             r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',
@@ -2744,6 +2757,11 @@ class GenericIE(InfoExtractor):
         if odnoklassniki_url:
         if odnoklassniki_url:
             return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
             return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
 
 
+        # Look for sibnet embedded player
+        sibnet_urls = VKIE._extract_sibnet_urls(webpage)
+        if sibnet_urls:
+            return self.playlist_from_matches(sibnet_urls, video_id, video_title)
+
         # Look for embedded ivi player
         # Look for embedded ivi player
         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
         if mobj is not None:
         if mobj is not None:
@@ -2769,6 +2787,12 @@ class GenericIE(InfoExtractor):
             return self.playlist_from_matches(
             return self.playlist_from_matches(
                 matches, video_id, video_title, getter=unescapeHTML, ie='FunnyOrDie')
                 matches, video_id, video_title, getter=unescapeHTML, ie='FunnyOrDie')
 
 
+        # Look for Simplecast embeds
+        simplecast_urls = SimplecastIE._extract_urls(webpage)
+        if simplecast_urls:
+            return self.playlist_from_matches(
+                simplecast_urls, video_id, video_title)
+
         # Look for BBC iPlayer embed
         # Look for BBC iPlayer embed
         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
         if matches:
         if matches:
@@ -2914,7 +2938,7 @@ class GenericIE(InfoExtractor):
             webpage)
             webpage)
         if not mobj:
         if not mobj:
             mobj = re.search(
             mobj = re.search(
-                r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
+                r'data-video-link=["\'](?P<url>http://m\.mlb\.com/video/[^"\']+)',
                 webpage)
                 webpage)
         if mobj is not None:
         if mobj is not None:
             return self.url_result(mobj.group('url'), 'MLB')
             return self.url_result(mobj.group('url'), 'MLB')
@@ -3129,11 +3153,6 @@ class GenericIE(InfoExtractor):
             return self.url_result(
             return self.url_result(
                 self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
                 self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
 
 
-        # Look for LiveLeak embeds
-        liveleak_urls = LiveLeakIE._extract_urls(webpage)
-        if liveleak_urls:
-            return self.playlist_from_matches(liveleak_urls, video_id, video_title)
-
         # Look for 3Q SDN embeds
         # Look for 3Q SDN embeds
         threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
         threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
         if threeqsdn_url:
         if threeqsdn_url:
@@ -3361,6 +3380,9 @@ class GenericIE(InfoExtractor):
                         'url': src,
                         'url': src,
                         'ext': (mimetype2ext(src_type)
                         'ext': (mimetype2ext(src_type)
                                 or ext if ext in KNOWN_EXTENSIONS else 'mp4'),
                                 or ext if ext in KNOWN_EXTENSIONS else 'mp4'),
+                        'http_headers': {
+                            'Referer': full_response.geturl(),
+                        },
                     })
                     })
             if formats:
             if formats:
                 self._sort_formats(formats)
                 self._sort_formats(formats)
@@ -3429,7 +3451,7 @@ class GenericIE(InfoExtractor):
             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
             m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
             if m_video_type is not None:
             if m_video_type is not None:
-                found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
+                found = filter_video(re.findall(r'<meta.*?property="og:(?:video|audio)".*?content="(.*?)"', webpage))
         if not found:
         if not found:
             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
             found = re.search(
             found = re.search(

+ 38 - 8
youtube_dl/extractor/go.py

@@ -4,10 +4,12 @@ from __future__ import unicode_literals
 import re
 import re
 
 
 from .adobepass import AdobePassIE
 from .adobepass import AdobePassIE
+from ..compat import compat_str
 from ..utils import (
 from ..utils import (
     int_or_none,
     int_or_none,
     determine_ext,
     determine_ext,
     parse_age_limit,
     parse_age_limit,
+    try_get,
     urlencode_postdata,
     urlencode_postdata,
     ExtractorError,
     ExtractorError,
 )
 )
@@ -116,6 +118,18 @@ class GoIE(AdobePassIE):
             # m3u8 download
             # m3u8 download
             'skip_download': True,
             'skip_download': True,
         },
         },
+    }, {
+        'url': 'https://abc.com/shows/modern-family/episode-guide/season-01/101-pilot',
+        'info_dict': {
+            'id': 'VDKA22600213',
+            'ext': 'mp4',
+            'title': 'Pilot',
+            'description': 'md5:74306df917cfc199d76d061d66bebdb4',
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
     }, {
     }, {
         'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
         'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
         'only_matching': True,
         'only_matching': True,
@@ -149,14 +163,30 @@ class GoIE(AdobePassIE):
         brand = site_info.get('brand')
         brand = site_info.get('brand')
         if not video_id or not site_info:
         if not video_id or not site_info:
             webpage = self._download_webpage(url, display_id or video_id)
             webpage = self._download_webpage(url, display_id or video_id)
-            video_id = self._search_regex(
-                (
-                    # There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
-                    # from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
-                    r'data-video-id=["\']*(VDKA\w+)',
-                    # https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet
-                    r'\b(?:video)?id["\']\s*:\s*["\'](VDKA\w+)'
-                ), webpage, 'video id', default=video_id)
+            data = self._parse_json(
+                self._search_regex(
+                    r'["\']__abc_com__["\']\s*\]\s*=\s*({.+?})\s*;', webpage,
+                    'data', default='{}'),
+                display_id or video_id, fatal=False)
+            # https://abc.com/shows/modern-family/episode-guide/season-01/101-pilot
+            layout = try_get(data, lambda x: x['page']['content']['video']['layout'], dict)
+            video_id = None
+            if layout:
+                video_id = try_get(
+                    layout,
+                    (lambda x: x['videoid'], lambda x: x['video']['id']),
+                    compat_str)
+            if not video_id:
+                video_id = self._search_regex(
+                    (
+                        # There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
+                        # from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
+                        r'data-video-id=["\']*(VDKA\w+)',
+                        # page.analytics.videoIdCode
+                        r'\bvideoIdCode["\']\s*:\s*["\']((?:vdka|VDKA)\w+)',
+                        # https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet
+                        r'\b(?:video)?id["\']\s*:\s*["\'](VDKA\w+)'
+                    ), webpage, 'video id', default=video_id)
             if not site_info:
             if not site_info:
                 brand = self._search_regex(
                 brand = self._search_regex(
                     (r'data-brand=\s*["\']\s*(\d+)',
                     (r'data-brand=\s*["\']\s*(\d+)',

+ 7 - 1
youtube_dl/extractor/googledrive.py

@@ -7,6 +7,7 @@ from ..compat import compat_parse_qs
 from ..utils import (
 from ..utils import (
     determine_ext,
     determine_ext,
     ExtractorError,
     ExtractorError,
+    get_element_by_class,
     int_or_none,
     int_or_none,
     lowercase_escape,
     lowercase_escape,
     try_get,
     try_get,
@@ -237,7 +238,7 @@ class GoogleDriveIE(InfoExtractor):
                 if confirmation_webpage:
                 if confirmation_webpage:
                     confirm = self._search_regex(
                     confirm = self._search_regex(
                         r'confirm=([^&"\']+)', confirmation_webpage,
                         r'confirm=([^&"\']+)', confirmation_webpage,
-                        'confirmation code', fatal=False)
+                        'confirmation code', default=None)
                     if confirm:
                     if confirm:
                         confirmed_source_url = update_url_query(source_url, {
                         confirmed_source_url = update_url_query(source_url, {
                             'confirm': confirm,
                             'confirm': confirm,
@@ -245,6 +246,11 @@ class GoogleDriveIE(InfoExtractor):
                         urlh = request_source_file(confirmed_source_url, 'confirmed source')
                         urlh = request_source_file(confirmed_source_url, 'confirmed source')
                         if urlh and urlh.headers.get('Content-Disposition'):
                         if urlh and urlh.headers.get('Content-Disposition'):
                             add_source_format(urlh)
                             add_source_format(urlh)
+                    else:
+                        self.report_warning(
+                            get_element_by_class('uc-error-subcaption', confirmation_webpage)
+                            or get_element_by_class('uc-error-caption', confirmation_webpage)
+                            or 'unable to extract confirmation code')
 
 
         if not formats and reason:
         if not formats and reason:
             raise ExtractorError(reason, expected=True)
             raise ExtractorError(reason, expected=True)

+ 198 - 173
youtube_dl/extractor/ign.py

@@ -3,230 +3,255 @@ from __future__ import unicode_literals
 import re
 import re
 
 
 from .common import InfoExtractor
 from .common import InfoExtractor
+from ..compat import (
+    compat_parse_qs,
+    compat_urllib_parse_urlparse,
+)
 from ..utils import (
 from ..utils import (
+    HEADRequest,
+    determine_ext,
     int_or_none,
     int_or_none,
     parse_iso8601,
     parse_iso8601,
+    strip_or_none,
+    try_get,
 )
 )
 
 
 
 
-class IGNIE(InfoExtractor):
+class IGNBaseIE(InfoExtractor):
+    def _call_api(self, slug):
+        return self._download_json(
+            'http://apis.ign.com/{0}/v3/{0}s/slug/{1}'.format(self._PAGE_TYPE, slug), slug)
+
+
+class IGNIE(IGNBaseIE):
     """
     """
     Extractor for some of the IGN sites, like www.ign.com, es.ign.com de.ign.com.
     Extractor for some of the IGN sites, like www.ign.com, es.ign.com de.ign.com.
     Some videos of it.ign.com are also supported
     Some videos of it.ign.com are also supported
     """
     """
 
 
-    _VALID_URL = r'https?://.+?\.ign\.com/(?:[^/]+/)?(?P<type>videos|show_videos|articles|feature|(?:[^/]+/\d+/video))(/.+)?/(?P<name_or_id>.+)'
+    _VALID_URL = r'https?://(?:.+?\.ign|www\.pcmag)\.com/videos/(?:\d{4}/\d{2}/\d{2}/)?(?P<id>[^/?&#]+)'
     IE_NAME = 'ign.com'
     IE_NAME = 'ign.com'
+    _PAGE_TYPE = 'video'
 
 
-    _API_URL_TEMPLATE = 'http://apis.ign.com/video/v3/videos/%s'
-    _EMBED_RE = r'<iframe[^>]+?["\']((?:https?:)?//.+?\.ign\.com.+?/embed.+?)["\']'
-
-    _TESTS = [
-        {
-            'url': 'http://www.ign.com/videos/2013/06/05/the-last-of-us-review',
-            'md5': 'febda82c4bafecd2d44b6e1a18a595f8',
-            'info_dict': {
-                'id': '8f862beef863986b2785559b9e1aa599',
-                'ext': 'mp4',
-                'title': 'The Last of Us Review',
-                'description': 'md5:c8946d4260a4d43a00d5ae8ed998870c',
-                'timestamp': 1370440800,
-                'upload_date': '20130605',
-                'uploader_id': 'cberidon@ign.com',
-            }
-        },
-        {
-            'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
-            'info_dict': {
-                'id': '100-little-things-in-gta-5-that-will-blow-your-mind',
-            },
-            'playlist': [
-                {
-                    'info_dict': {
-                        'id': '5ebbd138523268b93c9141af17bec937',
-                        'ext': 'mp4',
-                        'title': 'GTA 5 Video Review',
-                        'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
-                        'timestamp': 1379339880,
-                        'upload_date': '20130916',
-                        'uploader_id': 'danieljkrupa@gmail.com',
-                    },
-                },
-                {
-                    'info_dict': {
-                        'id': '638672ee848ae4ff108df2a296418ee2',
-                        'ext': 'mp4',
-                        'title': '26 Twisted Moments from GTA 5 in Slow Motion',
-                        'description': 'The twisted beauty of GTA 5 in stunning slow motion.',
-                        'timestamp': 1386878820,
-                        'upload_date': '20131212',
-                        'uploader_id': 'togilvie@ign.com',
-                    },
-                },
-            ],
-            'params': {
-                'skip_download': True,
-            },
-        },
-        {
-            'url': 'http://www.ign.com/articles/2014/08/15/rewind-theater-wild-trailer-gamescom-2014?watch',
-            'md5': '618fedb9c901fd086f6f093564ef8558',
-            'info_dict': {
-                'id': '078fdd005f6d3c02f63d795faa1b984f',
-                'ext': 'mp4',
-                'title': 'Rewind Theater - Wild Trailer Gamescom 2014',
-                'description': 'Brian and Jared explore Michel Ancel\'s captivating new preview.',
-                'timestamp': 1408047180,
-                'upload_date': '20140814',
-                'uploader_id': 'jamesduggan1990@gmail.com',
-            },
-        },
-        {
-            'url': 'http://me.ign.com/en/videos/112203/video/how-hitman-aims-to-be-different-than-every-other-s',
-            'only_matching': True,
-        },
-        {
-            'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds',
-            'only_matching': True,
-        },
-        {
-            # videoId pattern
-            'url': 'http://www.ign.com/articles/2017/06/08/new-ducktales-short-donalds-birthday-doesnt-go-as-planned',
-            'only_matching': True,
-        },
-    ]
-
-    def _find_video_id(self, webpage):
-        res_id = [
-            r'"video_id"\s*:\s*"(.*?)"',
-            r'class="hero-poster[^"]*?"[^>]*id="(.+?)"',
-            r'data-video-id="(.+?)"',
-            r'<object id="vid_(.+?)"',
-            r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
-            r'videoId&quot;\s*:\s*&quot;(.+?)&quot;',
-            r'videoId["\']\s*:\s*["\']([^"\']+?)["\']',
-        ]
-        return self._search_regex(res_id, webpage, 'video id', default=None)
+    _TESTS = [{
+        'url': 'http://www.ign.com/videos/2013/06/05/the-last-of-us-review',
+        'md5': 'd2e1586d9987d40fad7867bf96a018ea',
+        'info_dict': {
+            'id': '8f862beef863986b2785559b9e1aa599',
+            'ext': 'mp4',
+            'title': 'The Last of Us Review',
+            'description': 'md5:c8946d4260a4d43a00d5ae8ed998870c',
+            'timestamp': 1370440800,
+            'upload_date': '20130605',
+            'tags': 'count:9',
+        }
+    }, {
+        'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data',
+        'md5': 'f1581a6fe8c5121be5b807684aeac3f6',
+        'info_dict': {
+            'id': 'ee10d774b508c9b8ec07e763b9125b91',
+            'ext': 'mp4',
+            'title': 'What\'s New Now: Is GoGo Snooping on Your Data?',
+            'description': 'md5:817a20299de610bd56f13175386da6fa',
+            'timestamp': 1420571160,
+            'upload_date': '20150106',
+            'tags': 'count:4',
+        }
+    }, {
+        'url': 'https://www.ign.com/videos/is-a-resident-evil-4-remake-on-the-way-ign-daily-fix',
+        'only_matching': True,
+    }]
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        name_or_id = mobj.group('name_or_id')
-        page_type = mobj.group('type')
-        webpage = self._download_webpage(url, name_or_id)
-        if page_type != 'video':
-            multiple_urls = re.findall(
-                r'<param name="flashvars"[^>]*value="[^"]*?url=(https?://www\.ign\.com/videos/.*?)["&]',
-                webpage)
-            if multiple_urls:
-                entries = [self.url_result(u, ie='IGN') for u in multiple_urls]
-                return {
-                    '_type': 'playlist',
-                    'id': name_or_id,
-                    'entries': entries,
-                }
-
-        video_id = self._find_video_id(webpage)
-        if not video_id:
-            return self.url_result(self._search_regex(
-                self._EMBED_RE, webpage, 'embed url'))
-        return self._get_video_info(video_id)
-
-    def _get_video_info(self, video_id):
-        api_data = self._download_json(
-            self._API_URL_TEMPLATE % video_id, video_id)
+        display_id = self._match_id(url)
+        video = self._call_api(display_id)
+        video_id = video['videoId']
+        metadata = video['metadata']
+        title = metadata.get('longTitle') or metadata.get('title') or metadata['name']
 
 
         formats = []
         formats = []
-        m3u8_url = api_data['refs'].get('m3uUrl')
+        refs = video.get('refs') or {}
+
+        m3u8_url = refs.get('m3uUrl')
         if m3u8_url:
         if m3u8_url:
             formats.extend(self._extract_m3u8_formats(
             formats.extend(self._extract_m3u8_formats(
                 m3u8_url, video_id, 'mp4', 'm3u8_native',
                 m3u8_url, video_id, 'mp4', 'm3u8_native',
                 m3u8_id='hls', fatal=False))
                 m3u8_id='hls', fatal=False))
-        f4m_url = api_data['refs'].get('f4mUrl')
+
+        f4m_url = refs.get('f4mUrl')
         if f4m_url:
         if f4m_url:
             formats.extend(self._extract_f4m_formats(
             formats.extend(self._extract_f4m_formats(
                 f4m_url, video_id, f4m_id='hds', fatal=False))
                 f4m_url, video_id, f4m_id='hds', fatal=False))
-        for asset in api_data['assets']:
+
+        for asset in (video.get('assets') or []):
+            asset_url = asset.get('url')
+            if not asset_url:
+                continue
             formats.append({
             formats.append({
-                'url': asset['url'],
-                'tbr': asset.get('actual_bitrate_kbps'),
-                'fps': asset.get('frame_rate'),
+                'url': asset_url,
+                'tbr': int_or_none(asset.get('bitrate'), 1000),
+                'fps': int_or_none(asset.get('frame_rate')),
                 'height': int_or_none(asset.get('height')),
                 'height': int_or_none(asset.get('height')),
                 'width': int_or_none(asset.get('width')),
                 'width': int_or_none(asset.get('width')),
             })
             })
+
+        mezzanine_url = try_get(video, lambda x: x['system']['mezzanineUrl'])
+        if mezzanine_url:
+            formats.append({
+                'ext': determine_ext(mezzanine_url, 'mp4'),
+                'format_id': 'mezzanine',
+                'preference': 1,
+                'url': mezzanine_url,
+            })
+
         self._sort_formats(formats)
         self._sort_formats(formats)
 
 
-        thumbnails = [{
-            'url': thumbnail['url']
-        } for thumbnail in api_data.get('thumbnails', [])]
+        thumbnails = []
+        for thumbnail in (video.get('thumbnails') or []):
+            thumbnail_url = thumbnail.get('url')
+            if not thumbnail_url:
+                continue
+            thumbnails.append({
+                'url': thumbnail_url,
+            })
 
 
-        metadata = api_data['metadata']
+        tags = []
+        for tag in (video.get('tags') or []):
+            display_name = tag.get('displayName')
+            if not display_name:
+                continue
+            tags.append(display_name)
 
 
         return {
         return {
-            'id': api_data.get('videoId') or video_id,
-            'title': metadata.get('longTitle') or metadata.get('name') or metadata.get['title'],
-            'description': metadata.get('description'),
+            'id': video_id,
+            'title': title,
+            'description': strip_or_none(metadata.get('description')),
             'timestamp': parse_iso8601(metadata.get('publishDate')),
             'timestamp': parse_iso8601(metadata.get('publishDate')),
             'duration': int_or_none(metadata.get('duration')),
             'duration': int_or_none(metadata.get('duration')),
-            'display_id': metadata.get('slug') or video_id,
-            'uploader_id': metadata.get('creator'),
+            'display_id': display_id,
             'thumbnails': thumbnails,
             'thumbnails': thumbnails,
             'formats': formats,
             'formats': formats,
+            'tags': tags,
         }
         }
 
 
 
 
-class OneUPIE(IGNIE):
-    _VALID_URL = r'https?://gamevideos\.1up\.com/(?P<type>video)/id/(?P<name_or_id>.+)\.html'
-    IE_NAME = '1up.com'
-
+class IGNVideoIE(InfoExtractor):
+    _VALID_URL = r'https?://.+?\.ign\.com/(?:[a-z]{2}/)?[^/]+/(?P<id>\d+)/(?:video|trailer)/'
     _TESTS = [{
     _TESTS = [{
-        'url': 'http://gamevideos.1up.com/video/id/34976.html',
-        'md5': 'c9cc69e07acb675c31a16719f909e347',
+        'url': 'http://me.ign.com/en/videos/112203/video/how-hitman-aims-to-be-different-than-every-other-s',
+        'md5': 'dd9aca7ed2657c4e118d8b261e5e9de1',
         'info_dict': {
         'info_dict': {
-            'id': '34976',
+            'id': 'e9be7ea899a9bbfc0674accc22a36cc8',
             'ext': 'mp4',
             'ext': 'mp4',
-            'title': 'Sniper Elite V2 - Trailer',
-            'description': 'md5:bf0516c5ee32a3217aa703e9b1bc7826',
-            'timestamp': 1313099220,
-            'upload_date': '20110811',
-            'uploader_id': 'IGN',
+            'title': 'How Hitman Aims to Be Different Than Every Other Stealth Game - NYCC 2015',
+            'description': 'Taking out assassination targets in Hitman has never been more stylish.',
+            'timestamp': 1444665600,
+            'upload_date': '20151012',
         }
         }
+    }, {
+        'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds',
+        'only_matching': True,
+    }, {
+        # Youtube embed
+        'url': 'https://me.ign.com/ar/ratchet-clank-rift-apart/144327/trailer/embed',
+        'only_matching': True,
+    }, {
+        # Twitter embed
+        'url': 'http://adria.ign.com/sherlock-season-4/9687/trailer/embed',
+        'only_matching': True,
+    }, {
+        # Vimeo embed
+        'url': 'https://kr.ign.com/bic-2018/3307/trailer/embed',
+        'only_matching': True,
     }]
     }]
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        result = super(OneUPIE, self)._real_extract(url)
-        result['id'] = mobj.group('name_or_id')
-        return result
-
-
-class PCMagIE(IGNIE):
-    _VALID_URL = r'https?://(?:www\.)?pcmag\.com/(?P<type>videos|article2)(/.+)?/(?P<name_or_id>.+)'
-    IE_NAME = 'pcmag'
+        video_id = self._match_id(url)
+        req = HEADRequest(url.rsplit('/', 1)[0] + '/embed')
+        url = self._request_webpage(req, video_id).geturl()
+        ign_url = compat_parse_qs(
+            compat_urllib_parse_urlparse(url).query).get('url', [None])[0]
+        if ign_url:
+            return self.url_result(ign_url, IGNIE.ie_key())
+        return self.url_result(url)
 
 
-    _EMBED_RE = r'iframe\.setAttribute\("src",\s*__util.objToUrlString\("http://widgets\.ign\.com/video/embed/content\.html?[^"]*url=([^"]+)["&]'
 
 
+class IGNArticleIE(IGNBaseIE):
+    _VALID_URL = r'https?://.+?\.ign\.com/(?:articles(?:/\d{4}/\d{2}/\d{2})?|(?:[a-z]{2}/)?feature/\d+)/(?P<id>[^/?&#]+)'
+    _PAGE_TYPE = 'article'
     _TESTS = [{
     _TESTS = [{
-        'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data',
-        'md5': '212d6154fd0361a2781075f1febbe9ad',
+        'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
         'info_dict': {
         'info_dict': {
-            'id': 'ee10d774b508c9b8ec07e763b9125b91',
-            'ext': 'mp4',
-            'title': '010615_What\'s New Now: Is GoGo Snooping on Your Data?',
-            'description': 'md5:a7071ae64d2f68cc821c729d4ded6bb3',
-            'timestamp': 1420571160,
-            'upload_date': '20150106',
-            'uploader_id': 'cozzipix@gmail.com',
-        }
+            'id': '524497489e4e8ff5848ece34',
+            'title': '100 Little Things in GTA 5 That Will Blow Your Mind',
+        },
+        'playlist': [
+            {
+                'info_dict': {
+                    'id': '5ebbd138523268b93c9141af17bec937',
+                    'ext': 'mp4',
+                    'title': 'GTA 5 Video Review',
+                    'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
+                    'timestamp': 1379339880,
+                    'upload_date': '20130916',
+                },
+            },
+            {
+                'info_dict': {
+                    'id': '638672ee848ae4ff108df2a296418ee2',
+                    'ext': 'mp4',
+                    'title': '26 Twisted Moments from GTA 5 in Slow Motion',
+                    'description': 'The twisted beauty of GTA 5 in stunning slow motion.',
+                    'timestamp': 1386878820,
+                    'upload_date': '20131212',
+                },
+            },
+        ],
+        'params': {
+            'playlist_items': '2-3',
+            'skip_download': True,
+        },
     }, {
     }, {
-        'url': 'http://www.pcmag.com/article2/0,2817,2470156,00.asp',
-        'md5': '94130c1ca07ba0adb6088350681f16c1',
+        'url': 'http://www.ign.com/articles/2014/08/15/rewind-theater-wild-trailer-gamescom-2014?watch',
         'info_dict': {
         'info_dict': {
-            'id': '042e560ba94823d43afcb12ddf7142ca',
-            'ext': 'mp4',
-            'title': 'HTC\'s Weird New Re Camera - What\'s New Now',
-            'description': 'md5:53433c45df96d2ea5d0fda18be2ca908',
-            'timestamp': 1412953920,
-            'upload_date': '20141010',
-            'uploader_id': 'chris_snyder@pcmag.com',
-        }
+            'id': '53ee806780a81ec46e0790f8',
+            'title': 'Rewind Theater - Wild Trailer Gamescom 2014',
+        },
+        'playlist_count': 2,
+    }, {
+        # videoId pattern
+        'url': 'http://www.ign.com/articles/2017/06/08/new-ducktales-short-donalds-birthday-doesnt-go-as-planned',
+        'only_matching': True,
+    }, {
+        # Youtube embed
+        'url': 'https://www.ign.com/articles/2021-mvp-named-in-puppy-bowl-xvii',
+        'only_matching': True,
+    }, {
+        # IMDB embed
+        'url': 'https://www.ign.com/articles/2014/08/07/sons-of-anarchy-final-season-trailer',
+        'only_matching': True,
+    }, {
+        # Facebook embed
+        'url': 'https://www.ign.com/articles/2017/09/20/marvels-the-punisher-watch-the-new-trailer-for-the-netflix-series',
+        'only_matching': True,
+    }, {
+        # Brightcove embed
+        'url': 'https://www.ign.com/articles/2016/01/16/supergirl-goes-flying-with-martian-manhunter-in-new-clip',
+        'only_matching': True,
     }]
     }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        article = self._call_api(display_id)
+
+        def entries():
+            media_url = try_get(article, lambda x: x['mediaRelations'][0]['media']['metadata']['url'])
+            if media_url:
+                yield self.url_result(media_url, IGNIE.ie_key())
+            for content in (article.get('content') or []):
+                for video_url in re.findall(r'(?:\[(?:ignvideo\s+url|youtube\s+clip_id)|<iframe[^>]+src)="([^"]+)"', content):
+                    yield self.url_result(video_url)
+
+        return self.playlist_result(
+            entries(), article.get('articleId'),
+            strip_or_none(try_get(article, lambda x: x['metadata']['headline'])))

+ 27 - 2
youtube_dl/extractor/instagram.py

@@ -12,6 +12,7 @@ from ..compat import (
 )
 )
 from ..utils import (
 from ..utils import (
     ExtractorError,
     ExtractorError,
+    float_or_none,
     get_element_by_attribute,
     get_element_by_attribute,
     int_or_none,
     int_or_none,
     lowercase_escape,
     lowercase_escape,
@@ -32,6 +33,7 @@ class InstagramIE(InfoExtractor):
             'title': 'Video by naomipq',
             'title': 'Video by naomipq',
             'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
             'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
             'thumbnail': r're:^https?://.*\.jpg',
             'thumbnail': r're:^https?://.*\.jpg',
+            'duration': 0,
             'timestamp': 1371748545,
             'timestamp': 1371748545,
             'upload_date': '20130620',
             'upload_date': '20130620',
             'uploader_id': 'naomipq',
             'uploader_id': 'naomipq',
@@ -48,6 +50,7 @@ class InstagramIE(InfoExtractor):
             'ext': 'mp4',
             'ext': 'mp4',
             'title': 'Video by britneyspears',
             'title': 'Video by britneyspears',
             'thumbnail': r're:^https?://.*\.jpg',
             'thumbnail': r're:^https?://.*\.jpg',
+            'duration': 0,
             'timestamp': 1453760977,
             'timestamp': 1453760977,
             'upload_date': '20160125',
             'upload_date': '20160125',
             'uploader_id': 'britneyspears',
             'uploader_id': 'britneyspears',
@@ -86,6 +89,24 @@ class InstagramIE(InfoExtractor):
             'title': 'Post by instagram',
             'title': 'Post by instagram',
             'description': 'md5:0f9203fc6a2ce4d228da5754bcf54957',
             'description': 'md5:0f9203fc6a2ce4d228da5754bcf54957',
         },
         },
+    }, {
+        # IGTV
+        'url': 'https://www.instagram.com/tv/BkfuX9UB-eK/',
+        'info_dict': {
+            'id': 'BkfuX9UB-eK',
+            'ext': 'mp4',
+            'title': 'Fingerboarding Tricks with @cass.fb',
+            'thumbnail': r're:^https?://.*\.jpg',
+            'duration': 53.83,
+            'timestamp': 1530032919,
+            'upload_date': '20180626',
+            'uploader_id': 'instagram',
+            'uploader': 'Instagram',
+            'like_count': int,
+            'comment_count': int,
+            'comments': list,
+            'description': 'Meet Cass Hirst (@cass.fb), a fingerboarding pro who can perform tiny ollies and kickflips while blindfolded.',
+        }
     }, {
     }, {
         'url': 'https://instagram.com/p/-Cmh1cukG2/',
         'url': 'https://instagram.com/p/-Cmh1cukG2/',
         'only_matching': True,
         'only_matching': True,
@@ -159,7 +180,9 @@ class InstagramIE(InfoExtractor):
             description = try_get(
             description = try_get(
                 media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
                 media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
                 compat_str) or media.get('caption')
                 compat_str) or media.get('caption')
+            title = media.get('title')
             thumbnail = media.get('display_src') or media.get('display_url')
             thumbnail = media.get('display_src') or media.get('display_url')
+            duration = float_or_none(media.get('video_duration'))
             timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date'))
             timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date'))
             uploader = media.get('owner', {}).get('full_name')
             uploader = media.get('owner', {}).get('full_name')
             uploader_id = media.get('owner', {}).get('username')
             uploader_id = media.get('owner', {}).get('username')
@@ -200,9 +223,10 @@ class InstagramIE(InfoExtractor):
                             continue
                             continue
                         entries.append({
                         entries.append({
                             'id': node.get('shortcode') or node['id'],
                             'id': node.get('shortcode') or node['id'],
-                            'title': 'Video %d' % edge_num,
+                            'title': node.get('title') or 'Video %d' % edge_num,
                             'url': node_video_url,
                             'url': node_video_url,
                             'thumbnail': node.get('display_url'),
                             'thumbnail': node.get('display_url'),
+                            'duration': float_or_none(node.get('video_duration')),
                             'width': int_or_none(try_get(node, lambda x: x['dimensions']['width'])),
                             'width': int_or_none(try_get(node, lambda x: x['dimensions']['width'])),
                             'height': int_or_none(try_get(node, lambda x: x['dimensions']['height'])),
                             'height': int_or_none(try_get(node, lambda x: x['dimensions']['height'])),
                             'view_count': int_or_none(node.get('video_view_count')),
                             'view_count': int_or_none(node.get('video_view_count')),
@@ -239,8 +263,9 @@ class InstagramIE(InfoExtractor):
             'id': video_id,
             'id': video_id,
             'formats': formats,
             'formats': formats,
             'ext': 'mp4',
             'ext': 'mp4',
-            'title': 'Video by %s' % uploader_id,
+            'title': title or 'Video by %s' % uploader_id,
             'description': description,
             'description': description,
+            'duration': duration,
             'thumbnail': thumbnail,
             'thumbnail': thumbnail,
             'timestamp': timestamp,
             'timestamp': timestamp,
             'uploader_id': uploader_id,
             'uploader_id': uploader_id,

+ 41 - 33
youtube_dl/extractor/jamendo.py

@@ -29,34 +29,51 @@ class JamendoIE(InfoExtractor):
             'id': '196219',
             'id': '196219',
             'display_id': 'stories-from-emona-i',
             'display_id': 'stories-from-emona-i',
             'ext': 'flac',
             'ext': 'flac',
-            'title': 'Maya Filipič - Stories from Emona I',
-            'artist': 'Maya Filipič',
+            # 'title': 'Maya Filipič - Stories from Emona I',
+            'title': 'Stories from Emona I',
+            # 'artist': 'Maya Filipič',
             'track': 'Stories from Emona I',
             'track': 'Stories from Emona I',
             'duration': 210,
             'duration': 210,
             'thumbnail': r're:^https?://.*\.jpg',
             'thumbnail': r're:^https?://.*\.jpg',
             'timestamp': 1217438117,
             'timestamp': 1217438117,
             'upload_date': '20080730',
             'upload_date': '20080730',
+            'license': 'by-nc-nd',
+            'view_count': int,
+            'like_count': int,
+            'average_rating': int,
+            'tags': ['piano', 'peaceful', 'newage', 'strings', 'upbeat'],
         }
         }
     }, {
     }, {
         'url': 'https://licensing.jamendo.com/en/track/1496667/energetic-rock',
         'url': 'https://licensing.jamendo.com/en/track/1496667/energetic-rock',
         'only_matching': True,
         'only_matching': True,
     }]
     }]
 
 
+    def _call_api(self, resource, resource_id):
+        path = '/api/%ss' % resource
+        rand = compat_str(random.random())
+        return self._download_json(
+            'https://www.jamendo.com' + path, resource_id, query={
+                'id[]': resource_id,
+            }, headers={
+                'X-Jam-Call': '$%s*%s~' % (hashlib.sha1((path + rand).encode()).hexdigest(), rand)
+            })[0]
+
     def _real_extract(self, url):
     def _real_extract(self, url):
         track_id, display_id = self._VALID_URL_RE.match(url).groups()
         track_id, display_id = self._VALID_URL_RE.match(url).groups()
-        webpage = self._download_webpage(
-            'https://www.jamendo.com/track/' + track_id, track_id)
-        models = self._parse_json(self._html_search_regex(
-            r"data-bundled-models='([^']+)",
-            webpage, 'bundled models'), track_id)
-        track = models['track']['models'][0]
+        # webpage = self._download_webpage(
+        #     'https://www.jamendo.com/track/' + track_id, track_id)
+        # models = self._parse_json(self._html_search_regex(
+        #     r"data-bundled-models='([^']+)",
+        #     webpage, 'bundled models'), track_id)
+        # track = models['track']['models'][0]
+        track = self._call_api('track', track_id)
         title = track_name = track['name']
         title = track_name = track['name']
-        get_model = lambda x: try_get(models, lambda y: y[x]['models'][0], dict) or {}
-        artist = get_model('artist')
-        artist_name = artist.get('name')
-        if artist_name:
-            title = '%s - %s' % (artist_name, title)
-        album = get_model('album')
+        # get_model = lambda x: try_get(models, lambda y: y[x]['models'][0], dict) or {}
+        # artist = get_model('artist')
+        # artist_name = artist.get('name')
+        # if artist_name:
+        #     title = '%s - %s' % (artist_name, title)
+        # album = get_model('album')
 
 
         formats = [{
         formats = [{
             'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294'
             'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294'
@@ -74,7 +91,7 @@ class JamendoIE(InfoExtractor):
 
 
         urls = []
         urls = []
         thumbnails = []
         thumbnails = []
-        for _, covers in track.get('cover', {}).items():
+        for covers in (track.get('cover') or {}).values():
             for cover_id, cover_url in covers.items():
             for cover_id, cover_url in covers.items():
                 if not cover_url or cover_url in urls:
                 if not cover_url or cover_url in urls:
                     continue
                     continue
@@ -88,13 +105,14 @@ class JamendoIE(InfoExtractor):
                 })
                 })
 
 
         tags = []
         tags = []
-        for tag in track.get('tags', []):
+        for tag in (track.get('tags') or []):
             tag_name = tag.get('name')
             tag_name = tag.get('name')
             if not tag_name:
             if not tag_name:
                 continue
                 continue
             tags.append(tag_name)
             tags.append(tag_name)
 
 
         stats = track.get('stats') or {}
         stats = track.get('stats') or {}
+        license = track.get('licenseCC') or []
 
 
         return {
         return {
             'id': track_id,
             'id': track_id,
@@ -103,11 +121,11 @@ class JamendoIE(InfoExtractor):
             'title': title,
             'title': title,
             'description': track.get('description'),
             'description': track.get('description'),
             'duration': int_or_none(track.get('duration')),
             'duration': int_or_none(track.get('duration')),
-            'artist': artist_name,
+            # 'artist': artist_name,
             'track': track_name,
             'track': track_name,
-            'album': album.get('name'),
+            # 'album': album.get('name'),
             'formats': formats,
             'formats': formats,
-            'license': '-'.join(track.get('licenseCC', [])) or None,
+            'license': '-'.join(license) if license else None,
             'timestamp': int_or_none(track.get('dateCreated')),
             'timestamp': int_or_none(track.get('dateCreated')),
             'view_count': int_or_none(stats.get('listenedAll')),
             'view_count': int_or_none(stats.get('listenedAll')),
             'like_count': int_or_none(stats.get('favorited')),
             'like_count': int_or_none(stats.get('favorited')),
@@ -116,9 +134,9 @@ class JamendoIE(InfoExtractor):
         }
         }
 
 
 
 
-class JamendoAlbumIE(InfoExtractor):
+class JamendoAlbumIE(JamendoIE):
     _VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)'
     _VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)'
-    _TEST = {
+    _TESTS = [{
         'url': 'https://www.jamendo.com/album/121486/duck-on-cover',
         'url': 'https://www.jamendo.com/album/121486/duck-on-cover',
         'info_dict': {
         'info_dict': {
             'id': '121486',
             'id': '121486',
@@ -151,17 +169,7 @@ class JamendoAlbumIE(InfoExtractor):
         'params': {
         'params': {
             'playlistend': 2
             'playlistend': 2
         }
         }
-    }
-
-    def _call_api(self, resource, resource_id):
-        path = '/api/%ss' % resource
-        rand = compat_str(random.random())
-        return self._download_json(
-            'https://www.jamendo.com' + path, resource_id, query={
-                'id[]': resource_id,
-            }, headers={
-                'X-Jam-Call': '$%s*%s~' % (hashlib.sha1((path + rand).encode()).hexdigest(), rand)
-            })[0]
+    }]
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
         album_id = self._match_id(url)
         album_id = self._match_id(url)
@@ -169,7 +177,7 @@ class JamendoAlbumIE(InfoExtractor):
         album_name = album.get('name')
         album_name = album.get('name')
 
 
         entries = []
         entries = []
-        for track in album.get('tracks', []):
+        for track in (album.get('tracks') or []):
             track_id = track.get('id')
             track_id = track.get('id')
             if not track_id:
             if not track_id:
                 continue
                 continue

+ 30 - 34
youtube_dl/extractor/kakao.py

@@ -3,10 +3,13 @@
 from __future__ import unicode_literals
 from __future__ import unicode_literals
 
 
 from .common import InfoExtractor
 from .common import InfoExtractor
-from ..compat import compat_str
+from ..compat import compat_HTTPError
 from ..utils import (
 from ..utils import (
+    ExtractorError,
     int_or_none,
     int_or_none,
+    str_or_none,
     strip_or_none,
     strip_or_none,
+    try_get,
     unified_timestamp,
     unified_timestamp,
     update_url_query,
     update_url_query,
 )
 )
@@ -23,7 +26,7 @@ class KakaoIE(InfoExtractor):
             'id': '301965083',
             'id': '301965083',
             'ext': 'mp4',
             'ext': 'mp4',
             'title': '乃木坂46 バナナマン 「3期生紹介コーナーが始動!顔高低差GPも!」 『乃木坂工事中』',
             'title': '乃木坂46 バナナマン 「3期生紹介コーナーが始動!顔高低差GPも!」 『乃木坂工事中』',
-            'uploader_id': 2671005,
+            'uploader_id': '2671005',
             'uploader': '그랑그랑이',
             'uploader': '그랑그랑이',
             'timestamp': 1488160199,
             'timestamp': 1488160199,
             'upload_date': '20170227',
             'upload_date': '20170227',
@@ -36,11 +39,15 @@ class KakaoIE(InfoExtractor):
             'ext': 'mp4',
             'ext': 'mp4',
             'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회',
             'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회',
             'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)',
             'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)',
-            'uploader_id': 2653210,
+            'uploader_id': '2653210',
             'uploader': '쇼! 음악중심',
             'uploader': '쇼! 음악중심',
             'timestamp': 1485684628,
             'timestamp': 1485684628,
             'upload_date': '20170129',
             'upload_date': '20170129',
         }
         }
+    }, {
+        # geo restricted
+        'url': 'https://tv.kakao.com/channel/3643855/cliplink/412069491',
+        'only_matching': True,
     }]
     }]
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
@@ -68,8 +75,7 @@ class KakaoIE(InfoExtractor):
             'fields': ','.join([
             'fields': ','.join([
                 '-*', 'tid', 'clipLink', 'displayTitle', 'clip', 'title',
                 '-*', 'tid', 'clipLink', 'displayTitle', 'clip', 'title',
                 'description', 'channelId', 'createTime', 'duration', 'playCount',
                 'description', 'channelId', 'createTime', 'duration', 'playCount',
-                'likeCount', 'commentCount', 'tagList', 'channel', 'name',
-                'clipChapterThumbnailList', 'thumbnailUrl', 'timeInSec', 'isDefault',
+                'likeCount', 'commentCount', 'tagList', 'channel', 'name', 'thumbnailUrl',
                 'videoOutputList', 'width', 'height', 'kbps', 'profile', 'label'])
                 'videoOutputList', 'width', 'height', 'kbps', 'profile', 'label'])
         }
         }
 
 
@@ -82,24 +88,28 @@ class KakaoIE(InfoExtractor):
 
 
         title = clip.get('title') or clip_link.get('displayTitle')
         title = clip.get('title') or clip_link.get('displayTitle')
 
 
-        query['tid'] = impress.get('tid', '')
+        query.update({
+            'fields': '-*,code,message,url',
+            'tid': impress.get('tid') or '',
+        })
 
 
         formats = []
         formats = []
-        for fmt in clip.get('videoOutputList', []):
+        for fmt in (clip.get('videoOutputList') or []):
             try:
             try:
                 profile_name = fmt['profile']
                 profile_name = fmt['profile']
                 if profile_name == 'AUDIO':
                 if profile_name == 'AUDIO':
                     continue
                     continue
-                query.update({
-                    'profile': profile_name,
-                    'fields': '-*,url',
-                })
-                fmt_url_json = self._download_json(
-                    api_base + 'raw/videolocation', display_id,
-                    'Downloading video URL for profile %s' % profile_name,
-                    query=query, headers=player_header, fatal=False)
-
-                if fmt_url_json is None:
+                query['profile'] = profile_name
+                try:
+                    fmt_url_json = self._download_json(
+                        api_base + 'raw/videolocation', display_id,
+                        'Downloading video URL for profile %s' % profile_name,
+                        query=query, headers=player_header)
+                except ExtractorError as e:
+                    if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+                        resp = self._parse_json(e.cause.read().decode(), video_id)
+                        if resp.get('code') == 'GeoBlocked':
+                            self.raise_geo_restricted()
                     continue
                     continue
 
 
                 fmt_url = fmt_url_json['url']
                 fmt_url = fmt_url_json['url']
@@ -116,27 +126,13 @@ class KakaoIE(InfoExtractor):
                 pass
                 pass
         self._sort_formats(formats)
         self._sort_formats(formats)
 
 
-        thumbs = []
-        for thumb in clip.get('clipChapterThumbnailList', []):
-            thumbs.append({
-                'url': thumb.get('thumbnailUrl'),
-                'id': compat_str(thumb.get('timeInSec')),
-                'preference': -1 if thumb.get('isDefault') else 0
-            })
-        top_thumbnail = clip.get('thumbnailUrl')
-        if top_thumbnail:
-            thumbs.append({
-                'url': top_thumbnail,
-                'preference': 10,
-            })
-
         return {
         return {
             'id': display_id,
             'id': display_id,
             'title': title,
             'title': title,
             'description': strip_or_none(clip.get('description')),
             'description': strip_or_none(clip.get('description')),
-            'uploader': clip_link.get('channel', {}).get('name'),
-            'uploader_id': clip_link.get('channelId'),
-            'thumbnails': thumbs,
+            'uploader': try_get(clip_link, lambda x: x['channel']['name']),
+            'uploader_id': str_or_none(clip_link.get('channelId')),
+            'thumbnail': clip.get('thumbnailUrl'),
             'timestamp': unified_timestamp(clip_link.get('createTime')),
             'timestamp': unified_timestamp(clip_link.get('createTime')),
             'duration': int_or_none(clip.get('duration')),
             'duration': int_or_none(clip.get('duration')),
             'view_count': int_or_none(clip.get('playCount')),
             'view_count': int_or_none(clip.get('playCount')),

+ 7 - 7
youtube_dl/extractor/kaltura.py

@@ -120,7 +120,7 @@ class KalturaIE(InfoExtractor):
     def _extract_urls(webpage):
     def _extract_urls(webpage):
         # Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site
         # Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site
         finditer = (
         finditer = (
-            re.finditer(
+            list(re.finditer(
                 r"""(?xs)
                 r"""(?xs)
                     kWidget\.(?:thumb)?[Ee]mbed\(
                     kWidget\.(?:thumb)?[Ee]mbed\(
                     \{.*?
                     \{.*?
@@ -128,8 +128,8 @@ class KalturaIE(InfoExtractor):
                         (?P<q2>['"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*?
                         (?P<q2>['"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*?
                         (?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s*
                         (?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s*
                         (?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
                         (?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
-                """, webpage)
-            or re.finditer(
+                """, webpage))
+            or list(re.finditer(
                 r'''(?xs)
                 r'''(?xs)
                     (?P<q1>["'])
                     (?P<q1>["'])
                         (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
                         (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
@@ -142,16 +142,16 @@ class KalturaIE(InfoExtractor):
                         \[\s*(?P<q2_1>["'])entry_?[Ii]d(?P=q2_1)\s*\]\s*=\s*
                         \[\s*(?P<q2_1>["'])entry_?[Ii]d(?P=q2_1)\s*\]\s*=\s*
                     )
                     )
                     (?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
                     (?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
-                ''', webpage)
-            or re.finditer(
+                ''', webpage))
+            or list(re.finditer(
                 r'''(?xs)
                 r'''(?xs)
-                    <(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])
+                    <(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])\s*
                       (?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
                       (?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
                       (?:(?!(?P=q1)).)*
                       (?:(?!(?P=q1)).)*
                       [?&;]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+)
                       [?&;]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+)
                       (?:(?!(?P=q1)).)*
                       (?:(?!(?P=q1)).)*
                     (?P=q1)
                     (?P=q1)
-                ''', webpage)
+                ''', webpage))
         )
         )
         urls = []
         urls = []
         for mobj in finditer:
         for mobj in finditer:

+ 81 - 56
youtube_dl/extractor/khanacademy.py

@@ -1,82 +1,107 @@
 from __future__ import unicode_literals
 from __future__ import unicode_literals
 
 
-import re
+import json
 
 
 from .common import InfoExtractor
 from .common import InfoExtractor
 from ..utils import (
 from ..utils import (
-    unified_strdate,
+    int_or_none,
+    parse_iso8601,
+    try_get,
 )
 )
 
 
 
 
-class KhanAcademyIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:(?:www|api)\.)?khanacademy\.org/(?P<key>[^/]+)/(?:[^/]+/){,2}(?P<id>[^?#/]+)(?:$|[?#])'
-    IE_NAME = 'KhanAcademy'
+class KhanAcademyBaseIE(InfoExtractor):
+    _VALID_URL_TEMPL = r'https?://(?:www\.)?khanacademy\.org/(?P<id>(?:[^/]+/){%s}%s[^?#/&]+)'
 
 
-    _TESTS = [{
-        'url': 'http://www.khanacademy.org/video/one-time-pad',
-        'md5': '7b391cce85e758fb94f763ddc1bbb979',
+    def _parse_video(self, video):
+        return {
+            '_type': 'url_transparent',
+            'url': video['youtubeId'],
+            'id': video.get('slug'),
+            'title': video.get('title'),
+            'thumbnail': video.get('imageUrl') or video.get('thumbnailUrl'),
+            'duration': int_or_none(video.get('duration')),
+            'description': video.get('description'),
+            'ie_key': 'Youtube',
+        }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        component_props = self._parse_json(self._download_json(
+            'https://www.khanacademy.org/api/internal/graphql',
+            display_id, query={
+                'hash': 1604303425,
+                'variables': json.dumps({
+                    'path': display_id,
+                    'queryParams': '',
+                }),
+            })['data']['contentJson'], display_id)['componentProps']
+        return self._parse_component_props(component_props)
+
+
+class KhanAcademyIE(KhanAcademyBaseIE):
+    IE_NAME = 'khanacademy'
+    _VALID_URL = KhanAcademyBaseIE._VALID_URL_TEMPL % ('4', 'v/')
+    _TEST = {
+        'url': 'https://www.khanacademy.org/computing/computer-science/cryptography/crypt/v/one-time-pad',
+        'md5': '9c84b7b06f9ebb80d22a5c8dedefb9a0',
         'info_dict': {
         'info_dict': {
-            'id': 'one-time-pad',
-            'ext': 'webm',
+            'id': 'FlIG3TvQCBQ',
+            'ext': 'mp4',
             'title': 'The one-time pad',
             'title': 'The one-time pad',
             'description': 'The perfect cipher',
             'description': 'The perfect cipher',
             'duration': 176,
             'duration': 176,
             'uploader': 'Brit Cruise',
             'uploader': 'Brit Cruise',
             'uploader_id': 'khanacademy',
             'uploader_id': 'khanacademy',
             'upload_date': '20120411',
             'upload_date': '20120411',
+            'timestamp': 1334170113,
+            'license': 'cc-by-nc-sa',
         },
         },
         'add_ie': ['Youtube'],
         'add_ie': ['Youtube'],
-    }, {
-        'url': 'https://www.khanacademy.org/math/applied-math/cryptography',
+    }
+
+    def _parse_component_props(self, component_props):
+        video = component_props['tutorialPageData']['contentModel']
+        info = self._parse_video(video)
+        author_names = video.get('authorNames')
+        info.update({
+            'uploader': ', '.join(author_names) if author_names else None,
+            'timestamp': parse_iso8601(video.get('dateAdded')),
+            'license': video.get('kaUserLicense'),
+        })
+        return info
+
+
+class KhanAcademyUnitIE(KhanAcademyBaseIE):
+    IE_NAME = 'khanacademy:unit'
+    _VALID_URL = (KhanAcademyBaseIE._VALID_URL_TEMPL % ('2', '')) + '/?(?:[?#&]|$)'
+    _TEST = {
+        'url': 'https://www.khanacademy.org/computing/computer-science/cryptography',
         'info_dict': {
         'info_dict': {
             'id': 'cryptography',
             'id': 'cryptography',
-            'title': 'Journey into cryptography',
+            'title': 'Cryptography',
             'description': 'How have humans protected their secret messages through history? What has changed today?',
             'description': 'How have humans protected their secret messages through history? What has changed today?',
         },
         },
-        'playlist_mincount': 3,
-    }]
-
-    def _real_extract(self, url):
-        m = re.match(self._VALID_URL, url)
-        video_id = m.group('id')
+        'playlist_mincount': 31,
+    }
 
 
-        if m.group('key') == 'video':
-            data = self._download_json(
-                'http://api.khanacademy.org/api/v1/videos/' + video_id,
-                video_id, 'Downloading video info')
+    def _parse_component_props(self, component_props):
+        curation = component_props['curation']
 
 
-            upload_date = unified_strdate(data['date_added'])
-            uploader = ', '.join(data['author_names'])
-            return {
-                '_type': 'url_transparent',
-                'url': data['url'],
-                'id': video_id,
-                'title': data['title'],
-                'thumbnail': data['image_url'],
-                'duration': data['duration'],
-                'description': data['description'],
-                'uploader': uploader,
-                'upload_date': upload_date,
+        entries = []
+        tutorials = try_get(curation, lambda x: x['tabs'][0]['modules'][0]['tutorials'], list) or []
+        for tutorial_number, tutorial in enumerate(tutorials, 1):
+            chapter_info = {
+                'chapter': tutorial.get('title'),
+                'chapter_number': tutorial_number,
+                'chapter_id': tutorial.get('id'),
             }
             }
-        else:
-            # topic
-            data = self._download_json(
-                'http://api.khanacademy.org/api/v1/topic/' + video_id,
-                video_id, 'Downloading topic info')
+            for content_item in (tutorial.get('contentItems') or []):
+                if content_item.get('kind') == 'Video':
+                    info = self._parse_video(content_item)
+                    info.update(chapter_info)
+                    entries.append(info)
 
 
-            entries = [
-                {
-                    '_type': 'url',
-                    'url': c['url'],
-                    'id': c['id'],
-                    'title': c['title'],
-                }
-                for c in data['children'] if c['kind'] in ('Video', 'Topic')]
-
-            return {
-                '_type': 'playlist',
-                'id': video_id,
-                'title': data['title'],
-                'description': data['description'],
-                'entries': entries,
-            }
+        return self.playlist_result(
+            entries, curation.get('unit'), curation.get('title'),
+            curation.get('description'))

+ 78 - 12
youtube_dl/extractor/lbry.py

@@ -5,7 +5,12 @@ import functools
 import json
 import json
 
 
 from .common import InfoExtractor
 from .common import InfoExtractor
-from ..compat import compat_str
+from ..compat import (
+    compat_parse_qs,
+    compat_str,
+    compat_urllib_parse_unquote,
+    compat_urllib_parse_urlparse,
+)
 from ..utils import (
 from ..utils import (
     determine_ext,
     determine_ext,
     ExtractorError,
     ExtractorError,
@@ -57,6 +62,7 @@ class LBRYBaseIE(InfoExtractor):
             'description': stream_value.get('description'),
             'description': stream_value.get('description'),
             'license': stream_value.get('license'),
             'license': stream_value.get('license'),
             'timestamp': int_or_none(stream.get('timestamp')),
             'timestamp': int_or_none(stream.get('timestamp')),
+            'release_timestamp': int_or_none(stream_value.get('release_time')),
             'tags': stream_value.get('tags'),
             'tags': stream_value.get('tags'),
             'duration': int_or_none(media.get('duration')),
             'duration': int_or_none(media.get('duration')),
             'channel': try_get(signing_channel, lambda x: x['value']['title']),
             'channel': try_get(signing_channel, lambda x: x['value']['title']),
@@ -89,6 +95,8 @@ class LBRYIE(LBRYBaseIE):
             'description': 'md5:f6cb5c704b332d37f5119313c2c98f51',
             'description': 'md5:f6cb5c704b332d37f5119313c2c98f51',
             'timestamp': 1595694354,
             'timestamp': 1595694354,
             'upload_date': '20200725',
             'upload_date': '20200725',
+            'release_timestamp': 1595340697,
+            'release_date': '20200721',
             'width': 1280,
             'width': 1280,
             'height': 720,
             'height': 720,
         }
         }
@@ -103,6 +111,8 @@ class LBRYIE(LBRYBaseIE):
             'description': 'md5:661ac4f1db09f31728931d7b88807a61',
             'description': 'md5:661ac4f1db09f31728931d7b88807a61',
             'timestamp': 1591312601,
             'timestamp': 1591312601,
             'upload_date': '20200604',
             'upload_date': '20200604',
+            'release_timestamp': 1591312421,
+            'release_date': '20200604',
             'tags': list,
             'tags': list,
             'duration': 2570,
             'duration': 2570,
             'channel': 'The LBRY Foundation',
             'channel': 'The LBRY Foundation',
@@ -110,6 +120,26 @@ class LBRYIE(LBRYBaseIE):
             'channel_url': 'https://lbry.tv/@LBRYFoundation:0ed629d2b9c601300cacf7eabe9da0be79010212',
             'channel_url': 'https://lbry.tv/@LBRYFoundation:0ed629d2b9c601300cacf7eabe9da0be79010212',
             'vcodec': 'none',
             'vcodec': 'none',
         }
         }
+    }, {
+        # HLS
+        'url': 'https://odysee.com/@gardeningincanada:b/plants-i-will-never-grow-again.-the:e',
+        'md5': 'fc82f45ea54915b1495dd7cb5cc1289f',
+        'info_dict': {
+            'id': 'e51671357333fe22ae88aad320bde2f6f96b1410',
+            'ext': 'mp4',
+            'title': 'PLANTS I WILL NEVER GROW AGAIN. THE BLACK LIST PLANTS FOR A CANADIAN GARDEN | Gardening in Canada 🍁',
+            'description': 'md5:9c539c6a03fb843956de61a4d5288d5e',
+            'timestamp': 1618254123,
+            'upload_date': '20210412',
+            'release_timestamp': 1618254002,
+            'release_date': '20210412',
+            'tags': list,
+            'duration': 554,
+            'channel': 'Gardening In Canada',
+            'channel_id': 'b8be0e93b423dad221abe29545fbe8ec36e806bc',
+            'channel_url': 'https://odysee.com/@gardeningincanada:b8be0e93b423dad221abe29545fbe8ec36e806bc',
+            'formats': 'mincount:3',
+        }
     }, {
     }, {
         'url': 'https://odysee.com/@BrodieRobertson:5/apple-is-tracking-everything-you-do-on:e',
         'url': 'https://odysee.com/@BrodieRobertson:5/apple-is-tracking-everything-you-do-on:e',
         'only_matching': True,
         'only_matching': True,
@@ -131,6 +161,9 @@ class LBRYIE(LBRYBaseIE):
     }, {
     }, {
         'url': 'https://lbry.tv/$/download/Episode-1/e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
         'url': 'https://lbry.tv/$/download/Episode-1/e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
         'only_matching': True,
         'only_matching': True,
+    }, {
+        'url': 'https://lbry.tv/@lacajadepandora:a/TRUMP-EST%C3%81-BIEN-PUESTO-con-Pilar-Baselga,-Carlos-Senra,-Luis-Palacios-(720p_30fps_H264-192kbit_AAC):1',
+        'only_matching': True,
     }]
     }]
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
@@ -139,6 +172,7 @@ class LBRYIE(LBRYBaseIE):
             display_id = display_id.split('/', 2)[-1].replace('/', ':')
             display_id = display_id.split('/', 2)[-1].replace('/', ':')
         else:
         else:
             display_id = display_id.replace(':', '#')
             display_id = display_id.replace(':', '#')
+        display_id = compat_urllib_parse_unquote(display_id)
         uri = 'lbry://' + display_id
         uri = 'lbry://' + display_id
         result = self._resolve_url(uri, display_id, 'stream')
         result = self._resolve_url(uri, display_id, 'stream')
         result_value = result['value']
         result_value = result['value']
@@ -149,10 +183,18 @@ class LBRYIE(LBRYBaseIE):
         streaming_url = self._call_api_proxy(
         streaming_url = self._call_api_proxy(
             'get', claim_id, {'uri': uri}, 'streaming url')['streaming_url']
             'get', claim_id, {'uri': uri}, 'streaming url')['streaming_url']
         info = self._parse_stream(result, url)
         info = self._parse_stream(result, url)
+        urlh = self._request_webpage(
+            streaming_url, display_id, note='Downloading streaming redirect url info')
+        if determine_ext(urlh.geturl()) == 'm3u8':
+            info['formats'] = self._extract_m3u8_formats(
+                urlh.geturl(), display_id, 'mp4', entry_protocol='m3u8_native',
+                m3u8_id='hls')
+            self._sort_formats(info['formats'])
+        else:
+            info['url'] = streaming_url
         info.update({
         info.update({
             'id': claim_id,
             'id': claim_id,
             'title': title,
             'title': title,
-            'url': streaming_url,
         })
         })
         return info
         return info
 
 
@@ -174,17 +216,18 @@ class LBRYChannelIE(LBRYBaseIE):
     }]
     }]
     _PAGE_SIZE = 50
     _PAGE_SIZE = 50
 
 
-    def _fetch_page(self, claim_id, url, page):
+    def _fetch_page(self, claim_id, url, params, page):
         page += 1
         page += 1
+        page_params = {
+            'channel_ids': [claim_id],
+            'claim_type': 'stream',
+            'no_totals': True,
+            'page': page,
+            'page_size': self._PAGE_SIZE,
+        }
+        page_params.update(params)
         result = self._call_api_proxy(
         result = self._call_api_proxy(
-            'claim_search', claim_id, {
-                'channel_ids': [claim_id],
-                'claim_type': 'stream',
-                'no_totals': True,
-                'page': page,
-                'page_size': self._PAGE_SIZE,
-                'stream_types': self._SUPPORTED_STREAM_TYPES,
-            }, 'page %d' % page)
+            'claim_search', claim_id, page_params, 'page %d' % page)
         for item in (result.get('items') or []):
         for item in (result.get('items') or []):
             stream_claim_name = item.get('name')
             stream_claim_name = item.get('name')
             stream_claim_id = item.get('claim_id')
             stream_claim_id = item.get('claim_id')
@@ -205,8 +248,31 @@ class LBRYChannelIE(LBRYBaseIE):
         result = self._resolve_url(
         result = self._resolve_url(
             'lbry://' + display_id, display_id, 'channel')
             'lbry://' + display_id, display_id, 'channel')
         claim_id = result['claim_id']
         claim_id = result['claim_id']
+        qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+        content = qs.get('content', [None])[0]
+        params = {
+            'fee_amount': qs.get('fee_amount', ['>=0'])[0],
+            'order_by': {
+                'new': ['release_time'],
+                'top': ['effective_amount'],
+                'trending': ['trending_group', 'trending_mixed'],
+            }[qs.get('order', ['new'])[0]],
+            'stream_types': [content] if content in ['audio', 'video'] else self._SUPPORTED_STREAM_TYPES,
+        }
+        duration = qs.get('duration', [None])[0]
+        if duration:
+            params['duration'] = {
+                'long': '>=1200',
+                'short': '<=240',
+            }[duration]
+        language = qs.get('language', ['all'])[0]
+        if language != 'all':
+            languages = [language]
+            if language == 'en':
+                languages.append('none')
+            params['any_languages'] = languages
         entries = OnDemandPagedList(
         entries = OnDemandPagedList(
-            functools.partial(self._fetch_page, claim_id, url),
+            functools.partial(self._fetch_page, claim_id, url, params),
             self._PAGE_SIZE)
             self._PAGE_SIZE)
         result_value = result.get('value') or {}
         result_value = result.get('value') or {}
         return self.playlist_result(
         return self.playlist_result(

+ 141 - 1
youtube_dl/extractor/line.py

@@ -4,7 +4,13 @@ from __future__ import unicode_literals
 import re
 import re
 
 
 from .common import InfoExtractor
 from .common import InfoExtractor
-from ..utils import js_to_json
+from ..compat import compat_str
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    js_to_json,
+    str_or_none,
+)
 
 
 
 
 class LineTVIE(InfoExtractor):
 class LineTVIE(InfoExtractor):
@@ -88,3 +94,137 @@ class LineTVIE(InfoExtractor):
                            for thumbnail in video_info.get('thumbnails', {}).get('list', [])],
                            for thumbnail in video_info.get('thumbnails', {}).get('list', [])],
             'view_count': video_info.get('meta', {}).get('count'),
             'view_count': video_info.get('meta', {}).get('count'),
         }
         }
+
+
+class LineLiveBaseIE(InfoExtractor):
+    _API_BASE_URL = 'https://live-api.line-apps.com/web/v4.0/channel/'
+
+    def _parse_broadcast_item(self, item):
+        broadcast_id = compat_str(item['id'])
+        title = item['title']
+        is_live = item.get('isBroadcastingNow')
+
+        thumbnails = []
+        for thumbnail_id, thumbnail_url in (item.get('thumbnailURLs') or {}).items():
+            if not thumbnail_url:
+                continue
+            thumbnails.append({
+                'id': thumbnail_id,
+                'url': thumbnail_url,
+            })
+
+        channel = item.get('channel') or {}
+        channel_id = str_or_none(channel.get('id'))
+
+        return {
+            'id': broadcast_id,
+            'title': self._live_title(title) if is_live else title,
+            'thumbnails': thumbnails,
+            'timestamp': int_or_none(item.get('createdAt')),
+            'channel': channel.get('name'),
+            'channel_id': channel_id,
+            'channel_url': 'https://live.line.me/channels/' + channel_id if channel_id else None,
+            'duration': int_or_none(item.get('archiveDuration')),
+            'view_count': int_or_none(item.get('viewerCount')),
+            'comment_count': int_or_none(item.get('chatCount')),
+            'is_live': is_live,
+        }
+
+
+class LineLiveIE(LineLiveBaseIE):
+    _VALID_URL = r'https?://live\.line\.me/channels/(?P<channel_id>\d+)/broadcast/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://live.line.me/channels/4867368/broadcast/16331360',
+        'md5': 'bc931f26bf1d4f971e3b0982b3fab4a3',
+        'info_dict': {
+            'id': '16331360',
+            'title': '振りコピ講座😙😙😙',
+            'ext': 'mp4',
+            'timestamp': 1617095132,
+            'upload_date': '20210330',
+            'channel': '白川ゆめか',
+            'channel_id': '4867368',
+            'view_count': int,
+            'comment_count': int,
+            'is_live': False,
+        }
+    }, {
+        # archiveStatus == 'DELETED'
+        'url': 'https://live.line.me/channels/4778159/broadcast/16378488',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        channel_id, broadcast_id = re.match(self._VALID_URL, url).groups()
+        broadcast = self._download_json(
+            self._API_BASE_URL + '%s/broadcast/%s' % (channel_id, broadcast_id),
+            broadcast_id)
+        item = broadcast['item']
+        info = self._parse_broadcast_item(item)
+        protocol = 'm3u8' if info['is_live'] else 'm3u8_native'
+        formats = []
+        for k, v in (broadcast.get(('live' if info['is_live'] else 'archived') + 'HLSURLs') or {}).items():
+            if not v:
+                continue
+            if k == 'abr':
+                formats.extend(self._extract_m3u8_formats(
+                    v, broadcast_id, 'mp4', protocol,
+                    m3u8_id='hls', fatal=False))
+                continue
+            f = {
+                'ext': 'mp4',
+                'format_id': 'hls-' + k,
+                'protocol': protocol,
+                'url': v,
+            }
+            if not k.isdigit():
+                f['vcodec'] = 'none'
+            formats.append(f)
+        if not formats:
+            archive_status = item.get('archiveStatus')
+            if archive_status != 'ARCHIVED':
+                raise ExtractorError('this video has been ' + archive_status.lower(), expected=True)
+        self._sort_formats(formats)
+        info['formats'] = formats
+        return info
+
+
+class LineLiveChannelIE(LineLiveBaseIE):
+    _VALID_URL = r'https?://live\.line\.me/channels/(?P<id>\d+)(?!/broadcast/\d+)(?:[/?&#]|$)'
+    _TEST = {
+        'url': 'https://live.line.me/channels/5893542',
+        'info_dict': {
+            'id': '5893542',
+            'title': 'いくらちゃん',
+            'description': 'md5:c3a4af801f43b2fac0b02294976580be',
+        },
+        'playlist_mincount': 29
+    }
+
+    def _archived_broadcasts_entries(self, archived_broadcasts, channel_id):
+        while True:
+            for row in (archived_broadcasts.get('rows') or []):
+                share_url = str_or_none(row.get('shareURL'))
+                if not share_url:
+                    continue
+                info = self._parse_broadcast_item(row)
+                info.update({
+                    '_type': 'url',
+                    'url': share_url,
+                    'ie_key': LineLiveIE.ie_key(),
+                })
+                yield info
+            if not archived_broadcasts.get('hasNextPage'):
+                return
+            archived_broadcasts = self._download_json(
+                self._API_BASE_URL + channel_id + '/archived_broadcasts',
+                channel_id, query={
+                    'lastId': info['id'],
+                })
+
+    def _real_extract(self, url):
+        channel_id = self._match_id(url)
+        channel = self._download_json(self._API_BASE_URL + channel_id, channel_id)
+        return self.playlist_result(
+            self._archived_broadcasts_entries(channel.get('archivedBroadcasts') or {}, channel_id),
+            channel_id, channel.get('title'), channel.get('information'))

+ 0 - 191
youtube_dl/extractor/liveleak.py

@@ -1,191 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import int_or_none
-
-
-class LiveLeakIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:\w+\.)?liveleak\.com/view\?.*?\b[it]=(?P<id>[\w_]+)'
-    _TESTS = [{
-        'url': 'http://www.liveleak.com/view?i=757_1364311680',
-        'md5': '0813c2430bea7a46bf13acf3406992f4',
-        'info_dict': {
-            'id': '757_1364311680',
-            'ext': 'mp4',
-            'description': 'extremely bad day for this guy..!',
-            'uploader': 'ljfriel2',
-            'title': 'Most unlucky car accident',
-            'thumbnail': r're:^https?://.*\.jpg$'
-        }
-    }, {
-        'url': 'http://www.liveleak.com/view?i=f93_1390833151',
-        'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf',
-        'info_dict': {
-            'id': 'f93_1390833151',
-            'ext': 'mp4',
-            'description': 'German Television Channel NDR does an exclusive interview with Edward Snowden.\r\nUploaded on LiveLeak cause German Television thinks the rest of the world isn\'t intereseted in Edward Snowden.',
-            'uploader': 'ARD_Stinkt',
-            'title': 'German Television does first Edward Snowden Interview (ENGLISH)',
-            'thumbnail': r're:^https?://.*\.jpg$'
-        }
-    }, {
-        # Prochan embed
-        'url': 'http://www.liveleak.com/view?i=4f7_1392687779',
-        'md5': '42c6d97d54f1db107958760788c5f48f',
-        'info_dict': {
-            'id': '4f7_1392687779',
-            'ext': 'mp4',
-            'description': "The guy with the cigarette seems amazingly nonchalant about the whole thing...  I really hope my friends' reactions would be a bit stronger.\r\n\r\nAction-go to 0:55.",
-            'uploader': 'CapObveus',
-            'title': 'Man is Fatally Struck by Reckless Car While Packing up a Moving Truck',
-            'age_limit': 18,
-        },
-        'skip': 'Video is dead',
-    }, {
-        # Covers https://github.com/ytdl-org/youtube-dl/pull/5983
-        # Multiple resolutions
-        'url': 'http://www.liveleak.com/view?i=801_1409392012',
-        'md5': 'c3a449dbaca5c0d1825caecd52a57d7b',
-        'info_dict': {
-            'id': '801_1409392012',
-            'ext': 'mp4',
-            'description': 'Happened on 27.7.2014. \r\nAt 0:53 you can see people still swimming at near beach.',
-            'uploader': 'bony333',
-            'title': 'Crazy Hungarian tourist films close call waterspout in Croatia',
-            'thumbnail': r're:^https?://.*\.jpg$'
-        }
-    }, {
-        # Covers https://github.com/ytdl-org/youtube-dl/pull/10664#issuecomment-247439521
-        'url': 'http://m.liveleak.com/view?i=763_1473349649',
-        'add_ie': ['Youtube'],
-        'info_dict': {
-            'id': '763_1473349649',
-            'ext': 'mp4',
-            'title': 'Reporters and public officials ignore epidemic of black on asian violence in Sacramento | Colin Flaherty',
-            'description': 'Colin being the warrior he is and showing the injustice Asians in Sacramento are being subjected to.',
-            'uploader': 'Ziz',
-            'upload_date': '20160908',
-            'uploader_id': 'UCEbta5E_jqlZmEJsriTEtnw'
-        },
-        'params': {
-            'skip_download': True,
-        },
-    }, {
-        'url': 'https://www.liveleak.com/view?i=677_1439397581',
-        'info_dict': {
-            'id': '677_1439397581',
-            'title': 'Fuel Depot in China Explosion caught on video',
-        },
-        'playlist_count': 3,
-    }, {
-        'url': 'https://www.liveleak.com/view?t=HvHi_1523016227',
-        'only_matching': True,
-    }, {
-        # No original video
-        'url': 'https://www.liveleak.com/view?t=C26ZZ_1558612804',
-        'only_matching': True,
-    }]
-
-    @staticmethod
-    def _extract_urls(webpage):
-        return re.findall(
-            r'<iframe[^>]+src="(https?://(?:\w+\.)?liveleak\.com/ll_embed\?[^"]*[ift]=[\w_]+[^"]+)"',
-            webpage)
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
-
-        video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip()
-        video_description = self._og_search_description(webpage)
-        video_uploader = self._html_search_regex(
-            r'By:.*?(\w+)</a>', webpage, 'uploader', fatal=False)
-        age_limit = int_or_none(self._search_regex(
-            r'you confirm that you are ([0-9]+) years and over.',
-            webpage, 'age limit', default=None))
-        video_thumbnail = self._og_search_thumbnail(webpage)
-
-        entries = self._parse_html5_media_entries(url, webpage, video_id)
-        if not entries:
-            # Maybe an embed?
-            embed_url = self._search_regex(
-                r'<iframe[^>]+src="((?:https?:)?//(?:www\.)?(?:prochan|youtube)\.com/embed[^"]+)"',
-                webpage, 'embed URL')
-            return {
-                '_type': 'url_transparent',
-                'url': embed_url,
-                'id': video_id,
-                'title': video_title,
-                'description': video_description,
-                'uploader': video_uploader,
-                'age_limit': age_limit,
-            }
-
-        for idx, info_dict in enumerate(entries):
-            formats = []
-            for a_format in info_dict['formats']:
-                if not a_format.get('height'):
-                    a_format['height'] = int_or_none(self._search_regex(
-                        r'([0-9]+)p\.mp4', a_format['url'], 'height label',
-                        default=None))
-                formats.append(a_format)
-
-                # Removing '.*.mp4' gives the raw video, which is essentially
-                # the same video without the LiveLeak logo at the top (see
-                # https://github.com/ytdl-org/youtube-dl/pull/4768)
-                orig_url = re.sub(r'\.mp4\.[^.]+', '', a_format['url'])
-                if a_format['url'] != orig_url:
-                    format_id = a_format.get('format_id')
-                    format_id = 'original' + ('-' + format_id if format_id else '')
-                    if self._is_valid_url(orig_url, video_id, format_id):
-                        formats.append({
-                            'format_id': format_id,
-                            'url': orig_url,
-                            'preference': 1,
-                        })
-            self._sort_formats(formats)
-            info_dict['formats'] = formats
-
-            # Don't append entry ID for one-video pages to keep backward compatibility
-            if len(entries) > 1:
-                info_dict['id'] = '%s_%s' % (video_id, idx + 1)
-            else:
-                info_dict['id'] = video_id
-
-            info_dict.update({
-                'title': video_title,
-                'description': video_description,
-                'uploader': video_uploader,
-                'age_limit': age_limit,
-                'thumbnail': video_thumbnail,
-            })
-
-        return self.playlist_result(entries, video_id, video_title)
-
-
-class LiveLeakEmbedIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?liveleak\.com/ll_embed\?.*?\b(?P<kind>[ift])=(?P<id>[\w_]+)'
-
-    # See generic.py for actual test cases
-    _TESTS = [{
-        'url': 'https://www.liveleak.com/ll_embed?i=874_1459135191',
-        'only_matching': True,
-    }, {
-        'url': 'https://www.liveleak.com/ll_embed?f=ab065df993c1',
-        'only_matching': True,
-    }]
-
-    def _real_extract(self, url):
-        kind, video_id = re.match(self._VALID_URL, url).groups()
-
-        if kind == 'f':
-            webpage = self._download_webpage(url, video_id)
-            liveleak_url = self._search_regex(
-                r'(?:logourl\s*:\s*|window\.open\()(?P<q1>[\'"])(?P<url>%s)(?P=q1)' % LiveLeakIE._VALID_URL,
-                webpage, 'LiveLeak URL', group='url')
-        else:
-            liveleak_url = 'http://www.liveleak.com/view?%s=%s' % (kind, video_id)
-
-        return self.url_result(liveleak_url, ie=LiveLeakIE.ie_key())

+ 31 - 0
youtube_dl/extractor/maoritv.py

@@ -0,0 +1,31 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class MaoriTVIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?maoritelevision\.com/shows/(?:[^/]+/)+(?P<id>[^/?&#]+)'
+    _TEST = {
+        'url': 'https://www.maoritelevision.com/shows/korero-mai/S01E054/korero-mai-series-1-episode-54',
+        'md5': '5ade8ef53851b6a132c051b1cd858899',
+        'info_dict': {
+            'id': '4774724855001',
+            'ext': 'mp4',
+            'title': 'Kōrero Mai, Series 1 Episode 54',
+            'upload_date': '20160226',
+            'timestamp': 1456455018,
+            'description': 'md5:59bde32fd066d637a1a55794c56d8dcb',
+            'uploader_id': '1614493167001',
+        },
+    }
+    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1614493167001/HJlhIQhQf_default/index.html?videoId=%s'
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        brightcove_id = self._search_regex(
+            r'data-main-video-id=["\'](\d+)', webpage, 'brightcove id')
+        return self.url_result(
+            self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
+            'BrightcoveNew', brightcove_id)

+ 13 - 7
youtube_dl/extractor/medaltv.py

@@ -15,33 +15,39 @@ from ..utils import (
 
 
 
 
 class MedalTVIE(InfoExtractor):
 class MedalTVIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?medal\.tv/clips/(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?medal\.tv/clips/(?P<id>[^/?#&]+)'
     _TESTS = [{
     _TESTS = [{
-        'url': 'https://medal.tv/clips/34934644/3Is9zyGMoBMr',
+        'url': 'https://medal.tv/clips/2mA60jWAGQCBH',
         'md5': '7b07b064331b1cf9e8e5c52a06ae68fa',
         'md5': '7b07b064331b1cf9e8e5c52a06ae68fa',
         'info_dict': {
         'info_dict': {
-            'id': '34934644',
+            'id': '2mA60jWAGQCBH',
             'ext': 'mp4',
             'ext': 'mp4',
             'title': 'Quad Cold',
             'title': 'Quad Cold',
             'description': 'Medal,https://medal.tv/desktop/',
             'description': 'Medal,https://medal.tv/desktop/',
             'uploader': 'MowgliSB',
             'uploader': 'MowgliSB',
             'timestamp': 1603165266,
             'timestamp': 1603165266,
             'upload_date': '20201020',
             'upload_date': '20201020',
-            'uploader_id': 10619174,
+            'uploader_id': '10619174',
         }
         }
     }, {
     }, {
-        'url': 'https://medal.tv/clips/36787208',
+        'url': 'https://medal.tv/clips/2um24TWdty0NA',
         'md5': 'b6dc76b78195fff0b4f8bf4a33ec2148',
         'md5': 'b6dc76b78195fff0b4f8bf4a33ec2148',
         'info_dict': {
         'info_dict': {
-            'id': '36787208',
+            'id': '2um24TWdty0NA',
             'ext': 'mp4',
             'ext': 'mp4',
             'title': 'u tk me i tk u bigger',
             'title': 'u tk me i tk u bigger',
             'description': 'Medal,https://medal.tv/desktop/',
             'description': 'Medal,https://medal.tv/desktop/',
             'uploader': 'Mimicc',
             'uploader': 'Mimicc',
             'timestamp': 1605580939,
             'timestamp': 1605580939,
             'upload_date': '20201117',
             'upload_date': '20201117',
-            'uploader_id': 5156321,
+            'uploader_id': '5156321',
         }
         }
+    }, {
+        'url': 'https://medal.tv/clips/37rMeFpryCC-9',
+        'only_matching': True,
+    }, {
+        'url': 'https://medal.tv/clips/2WRj40tpY_EU9',
+        'only_matching': True,
     }]
     }]
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):

+ 76 - 231
youtube_dl/extractor/medialaan.py

@@ -2,268 +2,113 @@ from __future__ import unicode_literals
 
 
 import re
 import re
 
 
-from .gigya import GigyaBaseIE
-
-from ..compat import compat_str
+from .common import InfoExtractor
 from ..utils import (
 from ..utils import (
+    extract_attributes,
     int_or_none,
     int_or_none,
-    parse_duration,
-    try_get,
-    unified_timestamp,
+    mimetype2ext,
+    parse_iso8601,
 )
 )
 
 
 
 
-class MedialaanIE(GigyaBaseIE):
+class MedialaanIE(InfoExtractor):
     _VALID_URL = r'''(?x)
     _VALID_URL = r'''(?x)
                     https?://
                     https?://
-                        (?:www\.|nieuws\.)?
                         (?:
                         (?:
-                            (?P<site_id>vtm|q2|vtmkzoom)\.be/
-                            (?:
-                                video(?:/[^/]+/id/|/?\?.*?\baid=)|
-                                (?:[^/]+/)*
-                            )
+                            (?:embed\.)?mychannels.video/embed/|
+                            embed\.mychannels\.video/(?:s(?:dk|cript)/)?production/|
+                            (?:www\.)?(?:
+                                (?:
+                                    7sur7|
+                                    demorgen|
+                                    hln|
+                                    joe|
+                                    qmusic
+                                )\.be|
+                                (?:
+                                    [abe]d|
+                                    bndestem|
+                                    destentor|
+                                    gelderlander|
+                                    pzc|
+                                    tubantia|
+                                    volkskrant
+                                )\.nl
+                            )/video/(?:[^/]+/)*[^/?&#]+~p
                         )
                         )
-                        (?P<id>[^/?#&]+)
+                        (?P<id>\d+)
                     '''
                     '''
-    _NETRC_MACHINE = 'medialaan'
-    _APIKEY = '3_HZ0FtkMW_gOyKlqQzW5_0FHRC7Nd5XpXJZcDdXY4pk5eES2ZWmejRW5egwVm4ug-'
-    _SITE_TO_APP_ID = {
-        'vtm': 'vtm_watch',
-        'q2': 'q2',
-        'vtmkzoom': 'vtmkzoom',
-    }
     _TESTS = [{
     _TESTS = [{
-        # vod
-        'url': 'http://vtm.be/video/volledige-afleveringen/id/vtm_20170219_VM0678361_vtmwatch',
+        'url': 'https://www.bndestem.nl/video/de-terugkeer-van-ally-de-aap-en-wie-vertrekt-er-nog-bij-nac~p193993',
         'info_dict': {
         'info_dict': {
-            'id': 'vtm_20170219_VM0678361_vtmwatch',
+            'id': '193993',
             'ext': 'mp4',
             'ext': 'mp4',
-            'title': 'Allemaal Chris afl. 6',
-            'description': 'md5:4be86427521e7b07e0adb0c9c554ddb2',
-            'timestamp': 1487533280,
-            'upload_date': '20170219',
-            'duration': 2562,
-            'series': 'Allemaal Chris',
-            'season': 'Allemaal Chris',
-            'season_number': 1,
-            'season_id': '256936078124527',
-            'episode': 'Allemaal Chris afl. 6',
-            'episode_number': 6,
-            'episode_id': '256936078591527',
+            'title': 'De terugkeer van Ally de Aap en wie vertrekt er nog bij NAC?',
+            'timestamp': 1611663540,
+            'upload_date': '20210126',
+            'duration': 238,
         },
         },
         'params': {
         'params': {
             'skip_download': True,
             'skip_download': True,
         },
         },
-        'skip': 'Requires account credentials',
-    }, {
-        # clip
-        'url': 'http://vtm.be/video?aid=168332',
-        'info_dict': {
-            'id': '168332',
-            'ext': 'mp4',
-            'title': '"Veronique liegt!"',
-            'description': 'md5:1385e2b743923afe54ba4adc38476155',
-            'timestamp': 1489002029,
-            'upload_date': '20170308',
-            'duration': 96,
-        },
     }, {
     }, {
-        # vod
-        'url': 'http://vtm.be/video/volledige-afleveringen/id/257107153551000',
+        'url': 'https://www.gelderlander.nl/video/kanalen/degelderlander~c320/series/snel-nieuws~s984/noodbevel-in-doetinchem-politie-stuurt-mensen-centrum-uit~p194093',
         'only_matching': True,
         'only_matching': True,
     }, {
     }, {
-        # vod
-        'url': 'http://vtm.be/video?aid=163157',
+        'url': 'https://embed.mychannels.video/sdk/production/193993?options=TFTFF_default',
         'only_matching': True,
         'only_matching': True,
     }, {
     }, {
-        # vod
-        'url': 'http://www.q2.be/video/volledige-afleveringen/id/2be_20170301_VM0684442_q2',
+        'url': 'https://embed.mychannels.video/script/production/193993',
         'only_matching': True,
         'only_matching': True,
     }, {
     }, {
-        # clip
-        'url': 'http://vtmkzoom.be/k3-dansstudio/een-nieuw-seizoen-van-k3-dansstudio',
+        'url': 'https://embed.mychannels.video/production/193993',
         'only_matching': True,
         'only_matching': True,
     }, {
     }, {
-        # http/s redirect
-        'url': 'https://vtmkzoom.be/video?aid=45724',
-        'info_dict': {
-            'id': '257136373657000',
-            'ext': 'mp4',
-            'title': 'K3 Dansstudio Ushuaia afl.6',
-        },
-        'params': {
-            'skip_download': True,
-        },
-        'skip': 'Requires account credentials',
+        'url': 'https://mychannels.video/embed/193993',
+        'only_matching': True,
     }, {
     }, {
-        # nieuws.vtm.be
-        'url': 'https://nieuws.vtm.be/stadion/stadion/genk-nog-moeilijk-programma',
+        'url': 'https://embed.mychannels.video/embed/193993',
         'only_matching': True,
         'only_matching': True,
     }]
     }]
 
 
-    def _real_initialize(self):
-        self._logged_in = False
-
-    def _login(self):
-        username, password = self._get_login_info()
-        if username is None:
-            self.raise_login_required()
-
-        auth_data = {
-            'APIKey': self._APIKEY,
-            'sdk': 'js_6.1',
-            'format': 'json',
-            'loginID': username,
-            'password': password,
-        }
-
-        auth_info = self._gigya_login(auth_data)
-
-        self._uid = auth_info['UID']
-        self._uid_signature = auth_info['UIDSignature']
-        self._signature_timestamp = auth_info['signatureTimestamp']
-
-        self._logged_in = True
+    @staticmethod
+    def _extract_urls(webpage):
+        entries = []
+        for element in re.findall(r'(<div[^>]+data-mychannels-type="video"[^>]*>)', webpage):
+            mychannels_id = extract_attributes(element).get('data-mychannels-id')
+            if mychannels_id:
+                entries.append('https://mychannels.video/embed/' + mychannels_id)
+        return entries
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id, site_id = mobj.group('id', 'site_id')
-
-        webpage = self._download_webpage(url, video_id)
-
-        config = self._parse_json(
-            self._search_regex(
-                r'videoJSConfig\s*=\s*JSON\.parse\(\'({.+?})\'\);',
-                webpage, 'config', default='{}'), video_id,
-            transform_source=lambda s: s.replace(
-                '\\\\', '\\').replace(r'\"', '"').replace(r"\'", "'"))
-
-        vod_id = config.get('vodId') or self._search_regex(
-            (r'\\"vodId\\"\s*:\s*\\"(.+?)\\"',
-             r'"vodId"\s*:\s*"(.+?)"',
-             r'<[^>]+id=["\']vod-(\d+)'),
-            webpage, 'video_id', default=None)
-
-        # clip, no authentication required
-        if not vod_id:
-            player = self._parse_json(
-                self._search_regex(
-                    r'vmmaplayer\(({.+?})\);', webpage, 'vmma player',
-                    default=''),
-                video_id, transform_source=lambda s: '[%s]' % s, fatal=False)
-            if player:
-                video = player[-1]
-                if video['videoUrl'] in ('http', 'https'):
-                    return self.url_result(video['url'], MedialaanIE.ie_key())
-                info = {
-                    'id': video_id,
-                    'url': video['videoUrl'],
-                    'title': video['title'],
-                    'thumbnail': video.get('imageUrl'),
-                    'timestamp': int_or_none(video.get('createdDate')),
-                    'duration': int_or_none(video.get('duration')),
-                }
+        production_id = self._match_id(url)
+        production = self._download_json(
+            'https://embed.mychannels.video/sdk/production/' + production_id,
+            production_id, query={'options': 'UUUU_default'})['productions'][0]
+        title = production['title']
+
+        formats = []
+        for source in (production.get('sources') or []):
+            src = source.get('src')
+            if not src:
+                continue
+            ext = mimetype2ext(source.get('type'))
+            if ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    src, production_id, 'mp4', 'm3u8_native',
+                    m3u8_id='hls', fatal=False))
             else:
             else:
-                info = self._parse_html5_media_entries(
-                    url, webpage, video_id, m3u8_id='hls')[0]
-                info.update({
-                    'id': video_id,
-                    'title': self._html_search_meta('description', webpage),
-                    'duration': parse_duration(self._html_search_meta('duration', webpage)),
-                })
-        # vod, authentication required
-        else:
-            if not self._logged_in:
-                self._login()
-
-            settings = self._parse_json(
-                self._search_regex(
-                    r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
-                    webpage, 'drupal settings', default='{}'),
-                video_id)
-
-            def get(container, item):
-                return try_get(
-                    settings, lambda x: x[container][item],
-                    compat_str) or self._search_regex(
-                    r'"%s"\s*:\s*"([^"]+)' % item, webpage, item,
-                    default=None)
-
-            app_id = get('vod', 'app_id') or self._SITE_TO_APP_ID.get(site_id, 'vtm_watch')
-            sso = get('vod', 'gigyaDatabase') or 'vtm-sso'
-
-            data = self._download_json(
-                'http://vod.medialaan.io/api/1.0/item/%s/video' % vod_id,
-                video_id, query={
-                    'app_id': app_id,
-                    'user_network': sso,
-                    'UID': self._uid,
-                    'UIDSignature': self._uid_signature,
-                    'signatureTimestamp': self._signature_timestamp,
+                formats.append({
+                    'ext': ext,
+                    'url': src,
                 })
                 })
-
-            formats = self._extract_m3u8_formats(
-                data['response']['uri'], video_id, entry_protocol='m3u8_native',
-                ext='mp4', m3u8_id='hls')
-
-            self._sort_formats(formats)
-
-            info = {
-                'id': vod_id,
-                'formats': formats,
-            }
-
-            api_key = get('vod', 'apiKey')
-            channel = get('medialaanGigya', 'channel')
-
-            if api_key:
-                videos = self._download_json(
-                    'http://vod.medialaan.io/vod/v2/videos', video_id, fatal=False,
-                    query={
-                        'channels': channel,
-                        'ids': vod_id,
-                        'limit': 1,
-                        'apikey': api_key,
-                    })
-                if videos:
-                    video = try_get(
-                        videos, lambda x: x['response']['videos'][0], dict)
-                    if video:
-                        def get(container, item, expected_type=None):
-                            return try_get(
-                                video, lambda x: x[container][item], expected_type)
-
-                        def get_string(container, item):
-                            return get(container, item, compat_str)
-
-                        info.update({
-                            'series': get_string('program', 'title'),
-                            'season': get_string('season', 'title'),
-                            'season_number': int_or_none(get('season', 'number')),
-                            'season_id': get_string('season', 'id'),
-                            'episode': get_string('episode', 'title'),
-                            'episode_number': int_or_none(get('episode', 'number')),
-                            'episode_id': get_string('episode', 'id'),
-                            'duration': int_or_none(
-                                video.get('duration')) or int_or_none(
-                                video.get('durationMillis'), scale=1000),
-                            'title': get_string('episode', 'title'),
-                            'description': get_string('episode', 'text'),
-                            'timestamp': unified_timestamp(get_string(
-                                'publication', 'begin')),
-                        })
-
-            if not info.get('title'):
-                info['title'] = try_get(
-                    config, lambda x: x['videoConfig']['title'],
-                    compat_str) or self._html_search_regex(
-                    r'\\"title\\"\s*:\s*\\"(.+?)\\"', webpage, 'title',
-                    default=None) or self._og_search_title(webpage)
-
-        if not info.get('description'):
-            info['description'] = self._html_search_regex(
-                r'<div[^>]+class="field-item\s+even">\s*<p>(.+?)</p>',
-                webpage, 'description', default=None)
-
-        return info
+        self._sort_formats(formats)
+
+        return {
+            'id': production_id,
+            'title': title,
+            'formats': formats,
+            'thumbnail': production.get('posterUrl'),
+            'timestamp': parse_iso8601(production.get('publicationDate'), ' '),
+            'duration': int_or_none(production.get('duration')) or None,
+        }

+ 196 - 0
youtube_dl/extractor/minds.py

@@ -0,0 +1,196 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    clean_html,
+    int_or_none,
+    str_or_none,
+    strip_or_none,
+)
+
+
+class MindsBaseIE(InfoExtractor):
+    _VALID_URL_BASE = r'https?://(?:www\.)?minds\.com/'
+
+    def _call_api(self, path, video_id, resource, query=None):
+        api_url = 'https://www.minds.com/api/' + path
+        token = self._get_cookies(api_url).get('XSRF-TOKEN')
+        return self._download_json(
+            api_url, video_id, 'Downloading %s JSON metadata' % resource, headers={
+                'Referer': 'https://www.minds.com/',
+                'X-XSRF-TOKEN': token.value if token else '',
+            }, query=query)
+
+
+class MindsIE(MindsBaseIE):
+    IE_NAME = 'minds'
+    _VALID_URL = MindsBaseIE._VALID_URL_BASE + r'(?:media|newsfeed|archive/view)/(?P<id>[0-9]+)'
+    _TESTS = [{
+        'url': 'https://www.minds.com/media/100000000000086822',
+        'md5': '215a658184a419764852239d4970b045',
+        'info_dict': {
+            'id': '100000000000086822',
+            'ext': 'mp4',
+            'title': 'Minds intro sequence',
+            'thumbnail': r're:https?://.+\.png',
+            'uploader_id': 'ottman',
+            'upload_date': '20130524',
+            'timestamp': 1369404826,
+            'uploader': 'Bill Ottman',
+            'view_count': int,
+            'like_count': int,
+            'dislike_count': int,
+            'tags': ['animation'],
+            'comment_count': int,
+            'license': 'attribution-cc',
+        },
+    }, {
+        # entity.type == 'activity' and empty title
+        'url': 'https://www.minds.com/newsfeed/798025111988506624',
+        'md5': 'b2733a74af78d7fd3f541c4cbbaa5950',
+        'info_dict': {
+            'id': '798022190320226304',
+            'ext': 'mp4',
+            'title': '798022190320226304',
+            'uploader': 'ColinFlaherty',
+            'upload_date': '20180111',
+            'timestamp': 1515639316,
+            'uploader_id': 'ColinFlaherty',
+        },
+    }, {
+        'url': 'https://www.minds.com/archive/view/715172106794442752',
+        'only_matching': True,
+    }, {
+        # youtube perma_url
+        'url': 'https://www.minds.com/newsfeed/1197131838022602752',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        entity_id = self._match_id(url)
+        entity = self._call_api(
+            'v1/entities/entity/' + entity_id, entity_id, 'entity')['entity']
+        if entity.get('type') == 'activity':
+            if entity.get('custom_type') == 'video':
+                video_id = entity['entity_guid']
+            else:
+                return self.url_result(entity['perma_url'])
+        else:
+            assert(entity['subtype'] == 'video')
+            video_id = entity_id
+        # 1080p and webm formats available only on the sources array
+        video = self._call_api(
+            'v2/media/video/' + video_id, video_id, 'video')
+
+        formats = []
+        for source in (video.get('sources') or []):
+            src = source.get('src')
+            if not src:
+                continue
+            formats.append({
+                'format_id': source.get('label'),
+                'height': int_or_none(source.get('size')),
+                'url': src,
+            })
+        self._sort_formats(formats)
+
+        entity = video.get('entity') or entity
+        owner = entity.get('ownerObj') or {}
+        uploader_id = owner.get('username')
+
+        tags = entity.get('tags')
+        if tags and isinstance(tags, compat_str):
+            tags = [tags]
+
+        thumbnail = None
+        poster = video.get('poster') or entity.get('thumbnail_src')
+        if poster:
+            urlh = self._request_webpage(poster, video_id, fatal=False)
+            if urlh:
+                thumbnail = urlh.geturl()
+
+        return {
+            'id': video_id,
+            'title': entity.get('title') or video_id,
+            'formats': formats,
+            'description': clean_html(entity.get('description')) or None,
+            'license': str_or_none(entity.get('license')),
+            'timestamp': int_or_none(entity.get('time_created')),
+            'uploader': strip_or_none(owner.get('name')),
+            'uploader_id': uploader_id,
+            'uploader_url': 'https://www.minds.com/' + uploader_id if uploader_id else None,
+            'view_count': int_or_none(entity.get('play:count')),
+            'like_count': int_or_none(entity.get('thumbs:up:count')),
+            'dislike_count': int_or_none(entity.get('thumbs:down:count')),
+            'tags': tags,
+            'comment_count': int_or_none(entity.get('comments:count')),
+            'thumbnail': thumbnail,
+        }
+
+
+class MindsFeedBaseIE(MindsBaseIE):
+    _PAGE_SIZE = 150
+
+    def _entries(self, feed_id):
+        query = {'limit': self._PAGE_SIZE, 'sync': 1}
+        i = 1
+        while True:
+            data = self._call_api(
+                'v2/feeds/container/%s/videos' % feed_id,
+                feed_id, 'page %s' % i, query)
+            entities = data.get('entities') or []
+            for entity in entities:
+                guid = entity.get('guid')
+                if not guid:
+                    continue
+                yield self.url_result(
+                    'https://www.minds.com/newsfeed/' + guid,
+                    MindsIE.ie_key(), guid)
+            query['from_timestamp'] = data['load-next']
+            if not (query['from_timestamp'] and len(entities) == self._PAGE_SIZE):
+                break
+            i += 1
+
+    def _real_extract(self, url):
+        feed_id = self._match_id(url)
+        feed = self._call_api(
+            'v1/%s/%s' % (self._FEED_PATH, feed_id),
+            feed_id, self._FEED_TYPE)[self._FEED_TYPE]
+
+        return self.playlist_result(
+            self._entries(feed['guid']), feed_id,
+            strip_or_none(feed.get('name')),
+            feed.get('briefdescription'))
+
+
+class MindsChannelIE(MindsFeedBaseIE):
+    _FEED_TYPE = 'channel'
+    IE_NAME = 'minds:' + _FEED_TYPE
+    _VALID_URL = MindsBaseIE._VALID_URL_BASE + r'(?!(?:newsfeed|media|api|archive|groups)/)(?P<id>[^/?&#]+)'
+    _FEED_PATH = 'channel'
+    _TEST = {
+        'url': 'https://www.minds.com/ottman',
+        'info_dict': {
+            'id': 'ottman',
+            'title': 'Bill Ottman',
+            'description': 'Co-creator & CEO @minds',
+        },
+        'playlist_mincount': 54,
+    }
+
+
+class MindsGroupIE(MindsFeedBaseIE):
+    _FEED_TYPE = 'group'
+    IE_NAME = 'minds:' + _FEED_TYPE
+    _VALID_URL = MindsBaseIE._VALID_URL_BASE + r'groups/profile/(?P<id>[0-9]+)'
+    _FEED_PATH = 'groups/group'
+    _TEST = {
+        'url': 'https://www.minds.com/groups/profile/785582576369672204/feed/videos',
+        'info_dict': {
+            'id': '785582576369672204',
+            'title': 'Cooking Videos',
+        },
+        'playlist_mincount': 1,
+    }

+ 7 - 2
youtube_dl/extractor/mixcloud.py

@@ -251,8 +251,11 @@ class MixcloudPlaylistBaseIE(MixcloudBaseIE):
                 cloudcast_url = cloudcast.get('url')
                 cloudcast_url = cloudcast.get('url')
                 if not cloudcast_url:
                 if not cloudcast_url:
                     continue
                     continue
+                slug = try_get(cloudcast, lambda x: x['slug'], compat_str)
+                owner_username = try_get(cloudcast, lambda x: x['owner']['username'], compat_str)
+                video_id = '%s_%s' % (owner_username, slug) if slug and owner_username else None
                 entries.append(self.url_result(
                 entries.append(self.url_result(
-                    cloudcast_url, MixcloudIE.ie_key(), cloudcast.get('slug')))
+                    cloudcast_url, MixcloudIE.ie_key(), video_id))
 
 
             page_info = items['pageInfo']
             page_info = items['pageInfo']
             has_next_page = page_info['hasNextPage']
             has_next_page = page_info['hasNextPage']
@@ -321,7 +324,8 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
     _DESCRIPTION_KEY = 'biog'
     _DESCRIPTION_KEY = 'biog'
     _ROOT_TYPE = 'user'
     _ROOT_TYPE = 'user'
     _NODE_TEMPLATE = '''slug
     _NODE_TEMPLATE = '''slug
-          url'''
+          url
+          owner { username }'''
 
 
     def _get_playlist_title(self, title, slug):
     def _get_playlist_title(self, title, slug):
         return '%s (%s)' % (title, slug)
         return '%s (%s)' % (title, slug)
@@ -345,6 +349,7 @@ class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
     _NODE_TEMPLATE = '''cloudcast {
     _NODE_TEMPLATE = '''cloudcast {
             slug
             slug
             url
             url
+            owner { username }
           }'''
           }'''
 
 
     def _get_cloudcast(self, node):
     def _get_cloudcast(self, node):

+ 168 - 21
youtube_dl/extractor/mlb.py

@@ -1,15 +1,91 @@
 from __future__ import unicode_literals
 from __future__ import unicode_literals
 
 
-from .nhl import NHLBaseIE
+import re
 
 
+from .common import InfoExtractor
+from ..utils import (
+    determine_ext,
+    int_or_none,
+    parse_duration,
+    parse_iso8601,
+    try_get,
+)
 
 
-class MLBIE(NHLBaseIE):
+
+class MLBBaseIE(InfoExtractor):
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        video = self._download_video_data(display_id)
+        video_id = video['id']
+        title = video['title']
+        feed = self._get_feed(video)
+
+        formats = []
+        for playback in (feed.get('playbacks') or []):
+            playback_url = playback.get('url')
+            if not playback_url:
+                continue
+            name = playback.get('name')
+            ext = determine_ext(playback_url)
+            if ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    playback_url, video_id, 'mp4',
+                    'm3u8_native', m3u8_id=name, fatal=False))
+            else:
+                f = {
+                    'format_id': name,
+                    'url': playback_url,
+                }
+                mobj = re.search(r'_(\d+)K_(\d+)X(\d+)', name)
+                if mobj:
+                    f.update({
+                        'height': int(mobj.group(3)),
+                        'tbr': int(mobj.group(1)),
+                        'width': int(mobj.group(2)),
+                    })
+                mobj = re.search(r'_(\d+)x(\d+)_(\d+)_(\d+)K\.mp4', playback_url)
+                if mobj:
+                    f.update({
+                        'fps': int(mobj.group(3)),
+                        'height': int(mobj.group(2)),
+                        'tbr': int(mobj.group(4)),
+                        'width': int(mobj.group(1)),
+                    })
+                formats.append(f)
+        self._sort_formats(formats)
+
+        thumbnails = []
+        for cut in (try_get(feed, lambda x: x['image']['cuts'], list) or []):
+            src = cut.get('src')
+            if not src:
+                continue
+            thumbnails.append({
+                'height': int_or_none(cut.get('height')),
+                'url': src,
+                'width': int_or_none(cut.get('width')),
+            })
+
+        language = (video.get('language') or 'EN').lower()
+
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': formats,
+            'description': video.get('description'),
+            'duration': parse_duration(feed.get('duration')),
+            'thumbnails': thumbnails,
+            'timestamp': parse_iso8601(video.get(self._TIMESTAMP_KEY)),
+            'subtitles': self._extract_mlb_subtitles(feed, language),
+        }
+
+
+class MLBIE(MLBBaseIE):
     _VALID_URL = r'''(?x)
     _VALID_URL = r'''(?x)
                     https?://
                     https?://
-                        (?:[\da-z_-]+\.)*(?P<site>mlb)\.com/
+                        (?:[\da-z_-]+\.)*mlb\.com/
                         (?:
                         (?:
                             (?:
                             (?:
-                                (?:[^/]+/)*c-|
+                                (?:[^/]+/)*video/[^/]+/c-|
                                 (?:
                                 (?:
                                     shared/video/embed/(?:embed|m-internal-embed)\.html|
                                     shared/video/embed/(?:embed|m-internal-embed)\.html|
                                     (?:[^/]+/)+(?:play|index)\.jsp|
                                     (?:[^/]+/)+(?:play|index)\.jsp|
@@ -18,7 +94,6 @@ class MLBIE(NHLBaseIE):
                             (?P<id>\d+)
                             (?P<id>\d+)
                         )
                         )
                     '''
                     '''
-    _CONTENT_DOMAIN = 'content.mlb.com'
     _TESTS = [
     _TESTS = [
         {
         {
             'url': 'https://www.mlb.com/mariners/video/ackleys-spectacular-catch/c-34698933',
             'url': 'https://www.mlb.com/mariners/video/ackleys-spectacular-catch/c-34698933',
@@ -76,18 +151,6 @@ class MLBIE(NHLBaseIE):
                 'thumbnail': r're:^https?://.*\.jpg$',
                 'thumbnail': r're:^https?://.*\.jpg$',
             },
             },
         },
         },
-        {
-            'url': 'https://www.mlb.com/news/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer/c-118550098',
-            'md5': 'e09e37b552351fddbf4d9e699c924d68',
-            'info_dict': {
-                'id': '75609783',
-                'ext': 'mp4',
-                'title': 'Must C: Pillar climbs for catch',
-                'description': '4/15/15: Blue Jays outfielder Kevin Pillar continues his defensive dominance by climbing the wall in left to rob Tim Beckham of a home run',
-                'timestamp': 1429139220,
-                'upload_date': '20150415',
-            }
-        },
         {
         {
             'url': 'https://www.mlb.com/video/hargrove-homers-off-caldwell/c-1352023483?tid=67793694',
             'url': 'https://www.mlb.com/video/hargrove-homers-off-caldwell/c-1352023483?tid=67793694',
             'only_matching': True,
             'only_matching': True,
@@ -113,8 +176,92 @@ class MLBIE(NHLBaseIE):
             'url': 'http://mlb.mlb.com/shared/video/embed/m-internal-embed.html?content_id=75609783&property=mlb&autoplay=true&hashmode=false&siteSection=mlb/multimedia/article_118550098/article_embed&club=mlb',
             'url': 'http://mlb.mlb.com/shared/video/embed/m-internal-embed.html?content_id=75609783&property=mlb&autoplay=true&hashmode=false&siteSection=mlb/multimedia/article_118550098/article_embed&club=mlb',
             'only_matching': True,
             'only_matching': True,
         },
         },
-        {
-            'url': 'https://www.mlb.com/cut4/carlos-gomez-borrowed-sunglasses-from-an-as-fan/c-278912842',
-            'only_matching': True,
-        }
     ]
     ]
+    _TIMESTAMP_KEY = 'date'
+
+    @staticmethod
+    def _get_feed(video):
+        return video
+
+    @staticmethod
+    def _extract_mlb_subtitles(feed, language):
+        subtitles = {}
+        for keyword in (feed.get('keywordsAll') or []):
+            keyword_type = keyword.get('type')
+            if keyword_type and keyword_type.startswith('closed_captions_location_'):
+                cc_location = keyword.get('value')
+                if cc_location:
+                    subtitles.setdefault(language, []).append({
+                        'url': cc_location,
+                    })
+        return subtitles
+
+    def _download_video_data(self, display_id):
+        return self._download_json(
+            'http://content.mlb.com/mlb/item/id/v1/%s/details/web-v1.json' % display_id,
+            display_id)
+
+
+class MLBVideoIE(MLBBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?mlb\.com/(?:[^/]+/)*video/(?P<id>[^/?&#]+)'
+    _TEST = {
+        'url': 'https://www.mlb.com/mariners/video/ackley-s-spectacular-catch-c34698933',
+        'md5': '632358dacfceec06bad823b83d21df2d',
+        'info_dict': {
+            'id': 'c04a8863-f569-42e6-9f87-992393657614',
+            'ext': 'mp4',
+            'title': "Ackley's spectacular catch",
+            'description': 'md5:7f5a981eb4f3cbc8daf2aeffa2215bf0',
+            'duration': 66,
+            'timestamp': 1405995000,
+            'upload_date': '20140722',
+            'thumbnail': r're:^https?://.+',
+        },
+    }
+    _TIMESTAMP_KEY = 'timestamp'
+
+    @classmethod
+    def suitable(cls, url):
+        return False if MLBIE.suitable(url) else super(MLBVideoIE, cls).suitable(url)
+
+    @staticmethod
+    def _get_feed(video):
+        return video['feeds'][0]
+
+    @staticmethod
+    def _extract_mlb_subtitles(feed, language):
+        subtitles = {}
+        for cc_location in (feed.get('closedCaptions') or []):
+            subtitles.setdefault(language, []).append({
+                'url': cc_location,
+            })
+
+    def _download_video_data(self, display_id):
+        # https://www.mlb.com/data-service/en/videos/[SLUG]
+        return self._download_json(
+            'https://fastball-gateway.mlb.com/graphql',
+            display_id, query={
+                'query': '''{
+  mediaPlayback(ids: "%s") {
+    description
+    feeds(types: CMS) {
+      closedCaptions
+      duration
+      image {
+        cuts {
+          width
+          height
+          src
+        }
+      }
+      playbacks {
+        name
+        url
+      }
+    }
+    id
+    timestamp
+    title
+  }
+}''' % display_id,
+            })['data']['mediaPlayback'][0]

+ 15 - 13
youtube_dl/extractor/mtv.py

@@ -253,6 +253,12 @@ class MTVServicesInfoExtractor(InfoExtractor):
 
 
         return try_get(feed, lambda x: x['result']['data']['id'], compat_str)
         return try_get(feed, lambda x: x['result']['data']['id'], compat_str)
 
 
+    @staticmethod
+    def _extract_child_with_type(parent, t):
+        for c in parent['children']:
+            if c.get('type') == t:
+                return c
+
     def _extract_mgid(self, webpage):
     def _extract_mgid(self, webpage):
         try:
         try:
             # the url can be http://media.mtvnservices.com/fb/{mgid}.swf
             # the url can be http://media.mtvnservices.com/fb/{mgid}.swf
@@ -278,6 +284,14 @@ class MTVServicesInfoExtractor(InfoExtractor):
         if not mgid:
         if not mgid:
             mgid = self._extract_triforce_mgid(webpage)
             mgid = self._extract_triforce_mgid(webpage)
 
 
+        if not mgid:
+            data = self._parse_json(self._search_regex(
+                r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
+            main_container = self._extract_child_with_type(data, 'MainContainer')
+            ab_testing = self._extract_child_with_type(main_container, 'ABTesting')
+            video_player = self._extract_child_with_type(ab_testing or main_container, 'VideoPlayer')
+            mgid = video_player['props']['media']['video']['config']['uri']
+
         return mgid
         return mgid
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
@@ -309,7 +323,7 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
     @staticmethod
     @staticmethod
     def _extract_url(webpage):
     def _extract_url(webpage):
         mobj = re.search(
         mobj = re.search(
-            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//media.mtvnservices.com/embed/.+?)\1', webpage)
+            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//media\.mtvnservices\.com/embed/.+?)\1', webpage)
         if mobj:
         if mobj:
             return mobj.group('url')
             return mobj.group('url')
 
 
@@ -349,18 +363,6 @@ class MTVIE(MTVServicesInfoExtractor):
         'only_matching': True,
         'only_matching': True,
     }]
     }]
 
 
-    @staticmethod
-    def extract_child_with_type(parent, t):
-        children = parent['children']
-        return next(c for c in children if c.get('type') == t)
-
-    def _extract_mgid(self, webpage):
-        data = self._parse_json(self._search_regex(
-            r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
-        main_container = self.extract_child_with_type(data, 'MainContainer')
-        video_player = self.extract_child_with_type(main_container, 'VideoPlayer')
-        return video_player['props']['media']['video']['config']['uri']
-
 
 
 class MTVJapanIE(MTVServicesInfoExtractor):
 class MTVJapanIE(MTVServicesInfoExtractor):
     IE_NAME = 'mtvjapan'
     IE_NAME = 'mtvjapan'

+ 1 - 3
youtube_dl/extractor/ninecninemedia.py

@@ -23,11 +23,9 @@ class NineCNineMediaIE(InfoExtractor):
         destination_code, content_id = re.match(self._VALID_URL, url).groups()
         destination_code, content_id = re.match(self._VALID_URL, url).groups()
         api_base_url = self._API_BASE_TEMPLATE % (destination_code, content_id)
         api_base_url = self._API_BASE_TEMPLATE % (destination_code, content_id)
         content = self._download_json(api_base_url, content_id, query={
         content = self._download_json(api_base_url, content_id, query={
-            '$include': '[Media,Season,ContentPackages]',
+            '$include': '[Media.Name,Season,ContentPackages.Duration,ContentPackages.Id]',
         })
         })
         title = content['Name']
         title = content['Name']
-        if len(content['ContentPackages']) > 1:
-            raise ExtractorError('multiple content packages')
         content_package = content['ContentPackages'][0]
         content_package = content['ContentPackages'][0]
         package_id = content_package['Id']
         package_id = content_package['Id']
         content_package_url = api_base_url + 'contentpackages/%s/' % package_id
         content_package_url = api_base_url + 'contentpackages/%s/' % package_id

+ 107 - 81
youtube_dl/extractor/ninegag.py

@@ -1,104 +1,130 @@
 from __future__ import unicode_literals
 from __future__ import unicode_literals
 
 
-import re
-
 from .common import InfoExtractor
 from .common import InfoExtractor
-from ..utils import str_to_int
+from ..utils import (
+    ExtractorError,
+    determine_ext,
+    int_or_none,
+    try_get,
+    unescapeHTML,
+    url_or_none,
+)
 
 
 
 
 class NineGagIE(InfoExtractor):
 class NineGagIE(InfoExtractor):
     IE_NAME = '9gag'
     IE_NAME = '9gag'
-    _VALID_URL = r'https?://(?:www\.)?9gag(?:\.com/tv|\.tv)/(?:p|embed)/(?P<id>[a-zA-Z0-9]+)(?:/(?P<display_id>[^?#/]+))?'
+    _VALID_URL = r'https?://(?:www\.)?9gag\.com/gag/(?P<id>[^/?&#]+)'
 
 
     _TESTS = [{
     _TESTS = [{
-        'url': 'http://9gag.com/tv/p/Kk2X5/people-are-awesome-2013-is-absolutely-awesome',
-        'info_dict': {
-            'id': 'kXzwOKyGlSA',
-            'ext': 'mp4',
-            'description': 'This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)',
-            'title': '\"People Are Awesome 2013\" Is Absolutely Awesome',
-            'uploader_id': 'UCdEH6EjDKwtTe-sO2f0_1XA',
-            'uploader': 'CompilationChannel',
-            'upload_date': '20131110',
-            'view_count': int,
-        },
-        'add_ie': ['Youtube'],
-    }, {
-        'url': 'http://9gag.com/tv/p/aKolP3',
+        'url': 'https://9gag.com/gag/ae5Ag7B',
         'info_dict': {
         'info_dict': {
-            'id': 'aKolP3',
+            'id': 'ae5Ag7B',
             'ext': 'mp4',
             'ext': 'mp4',
-            'title': 'This Guy Travelled 11 countries In 44 days Just To Make This Amazing Video',
-            'description': "I just saw more in 1 minute than I've seen in 1 year. This guy's video is epic!!",
-            'uploader_id': 'rickmereki',
-            'uploader': 'Rick Mereki',
-            'upload_date': '20110803',
-            'view_count': int,
-        },
-        'add_ie': ['Vimeo'],
-    }, {
-        'url': 'http://9gag.com/tv/p/KklwM',
-        'only_matching': True,
-    }, {
-        'url': 'http://9gag.tv/p/Kk2X5',
-        'only_matching': True,
+            'title': 'Capybara Agility Training',
+            'upload_date': '20191108',
+            'timestamp': 1573237208,
+            'categories': ['Awesome'],
+            'tags': ['Weimaraner', 'American Pit Bull Terrier'],
+            'duration': 44,
+            'like_count': int,
+            'dislike_count': int,
+            'comment_count': int,
+        }
     }, {
     }, {
-        'url': 'http://9gag.com/tv/embed/a5Dmvl',
+        # HTML escaped title
+        'url': 'https://9gag.com/gag/av5nvyb',
         'only_matching': True,
         'only_matching': True,
     }]
     }]
 
 
-    _EXTERNAL_VIDEO_PROVIDER = {
-        '1': {
-            'url': '%s',
-            'ie_key': 'Youtube',
-        },
-        '2': {
-            'url': 'http://player.vimeo.com/video/%s',
-            'ie_key': 'Vimeo',
-        },
-        '3': {
-            'url': 'http://instagram.com/p/%s',
-            'ie_key': 'Instagram',
-        },
-        '4': {
-            'url': 'http://vine.co/v/%s',
-            'ie_key': 'Vine',
-        },
-    }
-
     def _real_extract(self, url):
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        display_id = mobj.group('display_id') or video_id
+        post_id = self._match_id(url)
+        post = self._download_json(
+            'https://9gag.com/v1/post', post_id, query={
+                'id': post_id
+            })['data']['post']
+
+        if post.get('type') != 'Animated':
+            raise ExtractorError(
+                'The given url does not contain a video',
+                expected=True)
+
+        title = unescapeHTML(post['title'])
+
+        duration = None
+        formats = []
+        thumbnails = []
+        for key, image in (post.get('images') or {}).items():
+            image_url = url_or_none(image.get('url'))
+            if not image_url:
+                continue
+            ext = determine_ext(image_url)
+            image_id = key.strip('image')
+            common = {
+                'url': image_url,
+                'width': int_or_none(image.get('width')),
+                'height': int_or_none(image.get('height')),
+            }
+            if ext in ('jpg', 'png'):
+                webp_url = image.get('webpUrl')
+                if webp_url:
+                    t = common.copy()
+                    t.update({
+                        'id': image_id + '-webp',
+                        'url': webp_url,
+                    })
+                    thumbnails.append(t)
+                common.update({
+                    'id': image_id,
+                    'ext': ext,
+                })
+                thumbnails.append(common)
+            elif ext in ('webm', 'mp4'):
+                if not duration:
+                    duration = int_or_none(image.get('duration'))
+                common['acodec'] = 'none' if image.get('hasAudio') == 0 else None
+                for vcodec in ('vp8', 'vp9', 'h265'):
+                    c_url = image.get(vcodec + 'Url')
+                    if not c_url:
+                        continue
+                    c_f = common.copy()
+                    c_f.update({
+                        'format_id': image_id + '-' + vcodec,
+                        'url': c_url,
+                        'vcodec': vcodec,
+                    })
+                    formats.append(c_f)
+                common.update({
+                    'ext': ext,
+                    'format_id': image_id,
+                })
+                formats.append(common)
+        self._sort_formats(formats)
 
 
-        webpage = self._download_webpage(url, display_id)
+        section = try_get(post, lambda x: x['postSection']['name'])
 
 
-        post_view = self._parse_json(
-            self._search_regex(
-                r'var\s+postView\s*=\s*new\s+app\.PostView\({\s*post:\s*({.+?})\s*,\s*posts:\s*prefetchedCurrentPost',
-                webpage, 'post view'),
-            display_id)
+        tags = None
+        post_tags = post.get('tags')
+        if post_tags:
+            tags = []
+            for tag in post_tags:
+                tag_key = tag.get('key')
+                if not tag_key:
+                    continue
+                tags.append(tag_key)
 
 
-        ie_key = None
-        source_url = post_view.get('sourceUrl')
-        if not source_url:
-            external_video_id = post_view['videoExternalId']
-            external_video_provider = post_view['videoExternalProvider']
-            source_url = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['url'] % external_video_id
-            ie_key = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['ie_key']
-        title = post_view['title']
-        description = post_view.get('description')
-        view_count = str_to_int(post_view.get('externalView'))
-        thumbnail = post_view.get('thumbnail_700w') or post_view.get('ogImageUrl') or post_view.get('thumbnail_300w')
+        get_count = lambda x: int_or_none(post.get(x + 'Count'))
 
 
         return {
         return {
-            '_type': 'url_transparent',
-            'url': source_url,
-            'ie_key': ie_key,
-            'id': video_id,
-            'display_id': display_id,
+            'id': post_id,
             'title': title,
             'title': title,
-            'description': description,
-            'view_count': view_count,
-            'thumbnail': thumbnail,
+            'timestamp': int_or_none(post.get('creationTs')),
+            'duration': duration,
+            'formats': formats,
+            'thumbnails': thumbnails,
+            'like_count': get_count('upVote'),
+            'dislike_count': get_count('downVote'),
+            'comment_count': get_count('comments'),
+            'age_limit': 18 if post.get('nsfw') == 1 else None,
+            'categories': [section] if section else None,
+            'tags': tags,
         }
         }

+ 28 - 26
youtube_dl/extractor/njpwworld.py

@@ -6,30 +6,40 @@ import re
 from .common import InfoExtractor
 from .common import InfoExtractor
 from ..compat import compat_urlparse
 from ..compat import compat_urlparse
 from ..utils import (
 from ..utils import (
-    extract_attributes,
     get_element_by_class,
     get_element_by_class,
     urlencode_postdata,
     urlencode_postdata,
 )
 )
 
 
 
 
 class NJPWWorldIE(InfoExtractor):
 class NJPWWorldIE(InfoExtractor):
-    _VALID_URL = r'https?://njpwworld\.com/p/(?P<id>[a-z0-9_]+)'
+    _VALID_URL = r'https?://(front\.)?njpwworld\.com/p/(?P<id>[a-z0-9_]+)'
     IE_DESC = '新日本プロレスワールド'
     IE_DESC = '新日本プロレスワールド'
     _NETRC_MACHINE = 'njpwworld'
     _NETRC_MACHINE = 'njpwworld'
 
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://njpwworld.com/p/s_series_00155_1_9/',
         'url': 'http://njpwworld.com/p/s_series_00155_1_9/',
         'info_dict': {
         'info_dict': {
             'id': 's_series_00155_1_9',
             'id': 's_series_00155_1_9',
             'ext': 'mp4',
             'ext': 'mp4',
-            'title': '第9試合 ランディ・サベージ vs リック・スタイナー',
+            'title': '闘強導夢2000 2000年1月4日 東京ドーム 第9試合 ランディ・サベージ VS リック・スタイナー',
             'tags': list,
             'tags': list,
         },
         },
         'params': {
         'params': {
             'skip_download': True,  # AES-encrypted m3u8
             'skip_download': True,  # AES-encrypted m3u8
         },
         },
         'skip': 'Requires login',
         'skip': 'Requires login',
-    }
+    }, {
+        'url': 'https://front.njpwworld.com/p/s_series_00563_16_bs',
+        'info_dict': {
+            'id': 's_series_00563_16_bs',
+            'ext': 'mp4',
+            'title': 'WORLD TAG LEAGUE 2020 & BEST OF THE SUPER Jr.27 2020年12月6日 福岡・福岡国際センター バックステージコメント(字幕あり)',
+            'tags': ["福岡・福岡国際センター", "バックステージコメント", "2020", "20年代"],
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }]
 
 
     _LOGIN_URL = 'https://front.njpwworld.com/auth/login'
     _LOGIN_URL = 'https://front.njpwworld.com/auth/login'
 
 
@@ -64,35 +74,27 @@ class NJPWWorldIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
         webpage = self._download_webpage(url, video_id)
 
 
         formats = []
         formats = []
-        for mobj in re.finditer(r'<a[^>]+\bhref=(["\'])/player.+?[^>]*>', webpage):
-            player = extract_attributes(mobj.group(0))
-            player_path = player.get('href')
-            if not player_path:
-                continue
-            kind = self._search_regex(
-                r'(low|high)$', player.get('class') or '', 'kind',
-                default='low')
+        for kind, vid in re.findall(r'if\s+\(\s*imageQualityType\s*==\s*\'([^\']+)\'\s*\)\s*{\s*video_id\s*=\s*"(\d+)"', webpage):
+            player_path = '/intent?id=%s&type=url' % vid
             player_url = compat_urlparse.urljoin(url, player_path)
             player_url = compat_urlparse.urljoin(url, player_path)
-            player_page = self._download_webpage(
-                player_url, video_id, note='Downloading player page')
-            entries = self._parse_html5_media_entries(
-                player_url, player_page, video_id, m3u8_id='hls-%s' % kind,
-                m3u8_entry_protocol='m3u8_native')
-            kind_formats = entries[0]['formats']
-            for f in kind_formats:
-                f['quality'] = 2 if kind == 'high' else 1
-            formats.extend(kind_formats)
+            formats.append({
+                'url': player_url,
+                'format_id': kind,
+                'ext': 'mp4',
+                'protocol': 'm3u8',
+                'quality': 2 if kind == 'high' else 1,
+            })
 
 
         self._sort_formats(formats)
         self._sort_formats(formats)
 
 
-        post_content = get_element_by_class('post-content', webpage)
+        tag_block = get_element_by_class('tag-block', webpage)
         tags = re.findall(
         tags = re.findall(
-            r'<li[^>]+class="tag-[^"]+"><a[^>]*>([^<]+)</a></li>', post_content
-        ) if post_content else None
+            r'<a[^>]+class="tag-[^"]+"[^>]*>([^<]+)</a>', tag_block
+        ) if tag_block else None
 
 
         return {
         return {
             'id': video_id,
             'id': video_id,
-            'title': self._og_search_title(webpage),
+            'title': get_element_by_class('article-title', webpage) or self._og_search_title(webpage),
             'formats': formats,
             'formats': formats,
             'tags': tags,
             'tags': tags,
         }
         }

+ 1 - 1
youtube_dl/extractor/nrk.py

@@ -58,7 +58,7 @@ class NRKBaseIE(InfoExtractor):
 
 
     def _call_api(self, path, video_id, item=None, note=None, fatal=True, query=None):
     def _call_api(self, path, video_id, item=None, note=None, fatal=True, query=None):
         return self._download_json(
         return self._download_json(
-            urljoin('http://psapi.nrk.no/', path),
+            urljoin('https://psapi.nrk.no/', path),
             video_id, note or 'Downloading %s JSON' % item,
             video_id, note or 'Downloading %s JSON' % item,
             fatal=fatal, query=query,
             fatal=fatal, query=query,
             headers={'Accept-Encoding': 'gzip, deflate, br'})
             headers={'Accept-Encoding': 'gzip, deflate, br'})

+ 24 - 2
youtube_dl/extractor/orf.py

@@ -98,6 +98,9 @@ class ORFTVthekIE(InfoExtractor):
                 elif ext == 'f4m':
                 elif ext == 'f4m':
                     formats.extend(self._extract_f4m_formats(
                     formats.extend(self._extract_f4m_formats(
                         src, video_id, f4m_id=format_id, fatal=False))
                         src, video_id, f4m_id=format_id, fatal=False))
+                elif ext == 'mpd':
+                    formats.extend(self._extract_mpd_formats(
+                        src, video_id, mpd_id=format_id, fatal=False))
                 else:
                 else:
                     formats.append({
                     formats.append({
                         'format_id': format_id,
                         'format_id': format_id,
@@ -140,6 +143,25 @@ class ORFTVthekIE(InfoExtractor):
                 })
                 })
 
 
             upload_date = unified_strdate(sd.get('created_date'))
             upload_date = unified_strdate(sd.get('created_date'))
+
+            thumbnails = []
+            preview = sd.get('preview_image_url')
+            if preview:
+                thumbnails.append({
+                    'id': 'preview',
+                    'url': preview,
+                    'preference': 0,
+                })
+            image = sd.get('image_full_url')
+            if not image and len(data_jsb) == 1:
+                image = self._og_search_thumbnail(webpage)
+            if image:
+                thumbnails.append({
+                    'id': 'full',
+                    'url': image,
+                    'preference': 1,
+                })
+
             entries.append({
             entries.append({
                 '_type': 'video',
                 '_type': 'video',
                 'id': video_id,
                 'id': video_id,
@@ -149,7 +171,7 @@ class ORFTVthekIE(InfoExtractor):
                 'description': sd.get('description'),
                 'description': sd.get('description'),
                 'duration': int_or_none(sd.get('duration_in_seconds')),
                 'duration': int_or_none(sd.get('duration_in_seconds')),
                 'upload_date': upload_date,
                 'upload_date': upload_date,
-                'thumbnail': sd.get('image_full_url'),
+                'thumbnails': thumbnails,
             })
             })
 
 
         return {
         return {
@@ -182,7 +204,7 @@ class ORFRadioIE(InfoExtractor):
             duration = end - start if end and start else None
             duration = end - start if end and start else None
             entries.append({
             entries.append({
                 'id': loop_stream_id.replace('.mp3', ''),
                 'id': loop_stream_id.replace('.mp3', ''),
-                'url': 'http://loopstream01.apa.at/?channel=%s&id=%s' % (self._LOOP_STATION, loop_stream_id),
+                'url': 'https://loopstream01.apa.at/?channel=%s&id=%s' % (self._LOOP_STATION, loop_stream_id),
                 'title': title,
                 'title': title,
                 'description': clean_html(data.get('subtitle')),
                 'description': clean_html(data.get('subtitle')),
                 'duration': duration,
                 'duration': duration,

+ 148 - 0
youtube_dl/extractor/palcomp3.py

@@ -0,0 +1,148 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    int_or_none,
+    str_or_none,
+    try_get,
+)
+
+
+class PalcoMP3BaseIE(InfoExtractor):
+    _GQL_QUERY_TMPL = '''{
+  artist(slug: "%s") {
+    %s
+  }
+}'''
+    _ARTIST_FIELDS_TMPL = '''music(slug: "%%s") {
+      %s
+    }'''
+    _MUSIC_FIELDS = '''duration
+      hls
+      mp3File
+      musicID
+      plays
+      title'''
+
+    def _call_api(self, artist_slug, artist_fields):
+        return self._download_json(
+            'https://www.palcomp3.com.br/graphql/', artist_slug, query={
+                'query': self._GQL_QUERY_TMPL % (artist_slug, artist_fields),
+            })['data']
+
+    def _parse_music(self, music):
+        music_id = compat_str(music['musicID'])
+        title = music['title']
+
+        formats = []
+        hls_url = music.get('hls')
+        if hls_url:
+            formats.append({
+                'url': hls_url,
+                'protocol': 'm3u8_native',
+                'ext': 'mp4',
+            })
+        mp3_file = music.get('mp3File')
+        if mp3_file:
+            formats.append({
+                'url': mp3_file,
+            })
+
+        return {
+            'id': music_id,
+            'title': title,
+            'formats': formats,
+            'duration': int_or_none(music.get('duration')),
+            'view_count': int_or_none(music.get('plays')),
+        }
+
+    def _real_initialize(self):
+        self._ARTIST_FIELDS_TMPL = self._ARTIST_FIELDS_TMPL % self._MUSIC_FIELDS
+
+    def _real_extract(self, url):
+        artist_slug, music_slug = re.match(self._VALID_URL, url).groups()
+        artist_fields = self._ARTIST_FIELDS_TMPL % music_slug
+        music = self._call_api(artist_slug, artist_fields)['artist']['music']
+        return self._parse_music(music)
+
+
+class PalcoMP3IE(PalcoMP3BaseIE):
+    IE_NAME = 'PalcoMP3:song'
+    _VALID_URL = r'https?://(?:www\.)?palcomp3\.com(?:\.br)?/(?P<artist>[^/]+)/(?P<id>[^/?&#]+)'
+    _TESTS = [{
+        'url': 'https://www.palcomp3.com/maiaraemaraisaoficial/nossas-composicoes-cuida-bem-dela/',
+        'md5': '99fd6405b2d8fd589670f6db1ba3b358',
+        'info_dict': {
+            'id': '3162927',
+            'ext': 'mp3',
+            'title': 'Nossas Composições - CUIDA BEM DELA',
+            'duration': 210,
+            'view_count': int,
+        }
+    }]
+
+    @classmethod
+    def suitable(cls, url):
+        return False if PalcoMP3VideoIE.suitable(url) else super(PalcoMP3IE, cls).suitable(url)
+
+
+class PalcoMP3ArtistIE(PalcoMP3BaseIE):
+    IE_NAME = 'PalcoMP3:artist'
+    _VALID_URL = r'https?://(?:www\.)?palcomp3\.com(?:\.br)?/(?P<id>[^/?&#]+)'
+    _TESTS = [{
+        'url': 'https://www.palcomp3.com.br/condedoforro/',
+        'info_dict': {
+            'id': '358396',
+            'title': 'Conde do Forró',
+        },
+        'playlist_mincount': 188,
+    }]
+    _ARTIST_FIELDS_TMPL = '''artistID
+    musics {
+      nodes {
+        %s
+      }
+    }
+    name'''
+
+    @ classmethod
+    def suitable(cls, url):
+        return False if re.match(PalcoMP3IE._VALID_URL, url) else super(PalcoMP3ArtistIE, cls).suitable(url)
+
+    def _real_extract(self, url):
+        artist_slug = self._match_id(url)
+        artist = self._call_api(artist_slug, self._ARTIST_FIELDS_TMPL)['artist']
+
+        def entries():
+            for music in (try_get(artist, lambda x: x['musics']['nodes'], list) or []):
+                yield self._parse_music(music)
+
+        return self.playlist_result(
+            entries(), str_or_none(artist.get('artistID')), artist.get('name'))
+
+
+class PalcoMP3VideoIE(PalcoMP3BaseIE):
+    IE_NAME = 'PalcoMP3:video'
+    _VALID_URL = r'https?://(?:www\.)?palcomp3\.com(?:\.br)?/(?P<artist>[^/]+)/(?P<id>[^/?&#]+)/?#clipe'
+    _TESTS = [{
+        'url': 'https://www.palcomp3.com/maiaraemaraisaoficial/maiara-e-maraisa-voce-faz-falta-aqui-ao-vivo-em-vicosa-mg/#clipe',
+        'add_ie': ['Youtube'],
+        'info_dict': {
+            'id': '_pD1nR2qqPg',
+            'ext': 'mp4',
+            'title': 'Maiara e Maraisa - Você Faz Falta Aqui - DVD Ao Vivo Em Campo Grande',
+            'description': 'md5:7043342c09a224598e93546e98e49282',
+            'upload_date': '20161107',
+            'uploader_id': 'maiaramaraisaoficial',
+            'uploader': 'Maiara e Maraisa',
+        }
+    }]
+    _MUSIC_FIELDS = 'youtubeID'
+
+    def _parse_music(self, music):
+        youtube_id = music['youtubeID']
+        return self.url_result(youtube_id, 'Youtube', youtube_id)

+ 36 - 12
youtube_dl/extractor/peertube.py

@@ -413,7 +413,8 @@ class PeerTubeIE(InfoExtractor):
                             peertube3\.cpy\.re|
                             peertube3\.cpy\.re|
                             peertube2\.cpy\.re|
                             peertube2\.cpy\.re|
                             videos\.tcit\.fr|
                             videos\.tcit\.fr|
-                            peertube\.cpy\.re
+                            peertube\.cpy\.re|
+                            canard\.tube
                         )'''
                         )'''
     _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
     _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
     _API_BASE = 'https://%s/api/v1/videos/%s/%s'
     _API_BASE = 'https://%s/api/v1/videos/%s/%s'
@@ -450,6 +451,18 @@ class PeerTubeIE(InfoExtractor):
             'tags': ['framasoft', 'peertube'],
             'tags': ['framasoft', 'peertube'],
             'categories': ['Science & Technology'],
             'categories': ['Science & Technology'],
         }
         }
+    }, {
+        # Issue #26002
+        'url': 'peertube:spacepub.space:d8943b2d-8280-497b-85ec-bc282ec2afdc',
+        'info_dict': {
+            'id': 'd8943b2d-8280-497b-85ec-bc282ec2afdc',
+            'ext': 'mp4',
+            'title': 'Dot matrix printer shell demo',
+            'uploader_id': '3',
+            'timestamp': 1587401293,
+            'upload_date': '20200420',
+            'uploader': 'Drew DeVault',
+        }
     }, {
     }, {
         'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
         'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
         'only_matching': True,
         'only_matching': True,
@@ -526,7 +539,15 @@ class PeerTubeIE(InfoExtractor):
         title = video['name']
         title = video['name']
 
 
         formats = []
         formats = []
-        for file_ in video['files']:
+        files = video.get('files') or []
+        for playlist in (video.get('streamingPlaylists') or []):
+            if not isinstance(playlist, dict):
+                continue
+            playlist_files = playlist.get('files')
+            if not (playlist_files and isinstance(playlist_files, list)):
+                continue
+            files.extend(playlist_files)
+        for file_ in files:
             if not isinstance(file_, dict):
             if not isinstance(file_, dict):
                 continue
                 continue
             file_url = url_or_none(file_.get('fileUrl'))
             file_url = url_or_none(file_.get('fileUrl'))
@@ -548,15 +569,15 @@ class PeerTubeIE(InfoExtractor):
             formats.append(f)
             formats.append(f)
         self._sort_formats(formats)
         self._sort_formats(formats)
 
 
-        full_description = self._call_api(
-            host, video_id, 'description', note='Downloading description JSON',
-            fatal=False)
+        description = video.get('description')
+        if len(description) >= 250:
+            # description is shortened
+            full_description = self._call_api(
+                host, video_id, 'description', note='Downloading description JSON',
+                fatal=False)
 
 
-        description = None
-        if isinstance(full_description, dict):
-            description = str_or_none(full_description.get('description'))
-        if not description:
-            description = video.get('description')
+            if isinstance(full_description, dict):
+                description = str_or_none(full_description.get('description')) or description
 
 
         subtitles = self.extract_subtitles(host, video_id)
         subtitles = self.extract_subtitles(host, video_id)
 
 
@@ -578,11 +599,13 @@ class PeerTubeIE(InfoExtractor):
         else:
         else:
             age_limit = None
             age_limit = None
 
 
+        webpage_url = 'https://%s/videos/watch/%s' % (host, video_id)
+
         return {
         return {
             'id': video_id,
             'id': video_id,
             'title': title,
             'title': title,
             'description': description,
             'description': description,
-            'thumbnail': urljoin(url, video.get('thumbnailPath')),
+            'thumbnail': urljoin(webpage_url, video.get('thumbnailPath')),
             'timestamp': unified_timestamp(video.get('publishedAt')),
             'timestamp': unified_timestamp(video.get('publishedAt')),
             'uploader': account_data('displayName', compat_str),
             'uploader': account_data('displayName', compat_str),
             'uploader_id': str_or_none(account_data('id', int)),
             'uploader_id': str_or_none(account_data('id', int)),
@@ -600,5 +623,6 @@ class PeerTubeIE(InfoExtractor):
             'tags': try_get(video, lambda x: x['tags'], list),
             'tags': try_get(video, lambda x: x['tags'], list),
             'categories': categories,
             'categories': categories,
             'formats': formats,
             'formats': formats,
-            'subtitles': subtitles
+            'subtitles': subtitles,
+            'webpage_url': webpage_url,
         }
         }

+ 7 - 1
youtube_dl/extractor/periscope.py

@@ -12,6 +12,10 @@ from ..utils import (
 
 
 
 
 class PeriscopeBaseIE(InfoExtractor):
 class PeriscopeBaseIE(InfoExtractor):
+    _M3U8_HEADERS = {
+        'Referer': 'https://www.periscope.tv/'
+    }
+
     def _call_api(self, method, query, item_id):
     def _call_api(self, method, query, item_id):
         return self._download_json(
         return self._download_json(
             'https://api.periscope.tv/api/v2/%s' % method,
             'https://api.periscope.tv/api/v2/%s' % method,
@@ -54,9 +58,11 @@ class PeriscopeBaseIE(InfoExtractor):
             m3u8_url, video_id, 'mp4',
             m3u8_url, video_id, 'mp4',
             entry_protocol='m3u8_native'
             entry_protocol='m3u8_native'
             if state in ('ended', 'timed_out') else 'm3u8',
             if state in ('ended', 'timed_out') else 'm3u8',
-            m3u8_id=format_id, fatal=fatal)
+            m3u8_id=format_id, fatal=fatal, headers=self._M3U8_HEADERS)
         if len(m3u8_formats) == 1:
         if len(m3u8_formats) == 1:
             self._add_width_and_height(m3u8_formats[0], width, height)
             self._add_width_and_height(m3u8_formats[0], width, height)
+        for f in m3u8_formats:
+            f.setdefault('http_headers', {}).update(self._M3U8_HEADERS)
         return m3u8_formats
         return m3u8_formats
 
 
 
 

+ 121 - 33
youtube_dl/extractor/phoenix.py

@@ -1,45 +1,133 @@
+# coding: utf-8
 from __future__ import unicode_literals
 from __future__ import unicode_literals
 
 
-from .dreisat import DreiSatIE
+import re
 
 
+from .youtube import YoutubeIE
+from .zdf import ZDFBaseIE
+from ..compat import compat_str
+from ..utils import (
+    int_or_none,
+    merge_dicts,
+    try_get,
+    unified_timestamp,
+    urljoin,
+)
 
 
-class PhoenixIE(DreiSatIE):
+
+class PhoenixIE(ZDFBaseIE):
     IE_NAME = 'phoenix.de'
     IE_NAME = 'phoenix.de'
-    _VALID_URL = r'''(?x)https?://(?:www\.)?phoenix\.de/content/
-        (?:
-            phoenix/die_sendungen/(?:[^/]+/)?
-        )?
-        (?P<id>[0-9]+)'''
-    _TESTS = [
-        {
-            'url': 'http://www.phoenix.de/content/884301',
-            'md5': 'ed249f045256150c92e72dbb70eadec6',
-            'info_dict': {
-                'id': '884301',
-                'ext': 'mp4',
-                'title': 'Michael Krons mit Hans-Werner Sinn',
-                'description': 'Im Dialog - Sa. 25.10.14, 00.00 - 00.35 Uhr',
-                'upload_date': '20141025',
-                'uploader': 'Im Dialog',
-            }
+    _VALID_URL = r'https?://(?:www\.)?phoenix\.de/(?:[^/]+/)*[^/?#&]*-a-(?P<id>\d+)\.html'
+    _TESTS = [{
+        # Same as https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html
+        'url': 'https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html',
+        'md5': '34ec321e7eb34231fd88616c65c92db0',
+        'info_dict': {
+            'id': '210222_phx_nachgehakt_corona_protest',
+            'ext': 'mp4',
+            'title': 'Wohin führt der Protest in der Pandemie?',
+            'description': 'md5:7d643fe7f565e53a24aac036b2122fbd',
+            'duration': 1691,
+            'timestamp': 1613902500,
+            'upload_date': '20210221',
+            'uploader': 'Phoenix',
+            'series': 'corona nachgehakt',
+            'episode': 'Wohin führt der Protest in der Pandemie?',
         },
         },
-        {
-            'url': 'http://www.phoenix.de/content/phoenix/die_sendungen/869815',
-            'only_matching': True,
+    }, {
+        # Youtube embed
+        'url': 'https://www.phoenix.de/sendungen/gespraeche/phoenix-streitgut-brennglas-corona-a-1965505.html',
+        'info_dict': {
+            'id': 'hMQtqFYjomk',
+            'ext': 'mp4',
+            'title': 'phoenix streitgut: Brennglas Corona - Wie gerecht ist unsere Gesellschaft?',
+            'description': 'md5:ac7a02e2eb3cb17600bc372e4ab28fdd',
+            'duration': 3509,
+            'upload_date': '20201219',
+            'uploader': 'phoenix',
+            'uploader_id': 'phoenix',
         },
         },
-        {
-            'url': 'http://www.phoenix.de/content/phoenix/die_sendungen/diskussionen/928234',
-            'only_matching': True,
+        'params': {
+            'skip_download': True,
         },
         },
-    ]
+    }, {
+        'url': 'https://www.phoenix.de/entwicklungen-in-russland-a-2044720.html',
+        'only_matching': True,
+    }, {
+        # no media
+        'url': 'https://www.phoenix.de/sendungen/dokumentationen/mit-dem-jumbo-durch-die-nacht-a-89625.html',
+        'only_matching': True,
+    }, {
+        # Same as https://www.zdf.de/politik/phoenix-sendungen/die-gesten-der-maechtigen-100.html
+        'url': 'https://www.phoenix.de/sendungen/dokumentationen/gesten-der-maechtigen-i-a-89468.html?ref=suche',
+        'only_matching': True,
+    }]
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
+        article_id = self._match_id(url)
+
+        article = self._download_json(
+            'https://www.phoenix.de/response/id/%s' % article_id, article_id,
+            'Downloading article JSON')
+
+        video = article['absaetze'][0]
+        title = video.get('titel') or article.get('subtitel')
+
+        if video.get('typ') == 'video-youtube':
+            video_id = video['id']
+            return self.url_result(
+                video_id, ie=YoutubeIE.ie_key(), video_id=video_id,
+                video_title=title)
+
+        video_id = compat_str(video.get('basename') or video.get('content'))
 
 
-        internal_id = self._search_regex(
-            r'<div class="phx_vod" id="phx_vod_([0-9]+)"',
-            webpage, 'internal video ID')
+        details = self._download_json(
+            'https://www.phoenix.de/php/mediaplayer/data/beitrags_details.php',
+            video_id, 'Downloading details JSON', query={
+                'ak': 'web',
+                'ptmd': 'true',
+                'id': video_id,
+                'profile': 'player2',
+            })
+
+        title = title or details['title']
+        content_id = details['tracking']['nielsen']['content']['assetid']
+
+        info = self._extract_ptmd(
+            'https://tmd.phoenix.de/tmd/2/ngplayer_2_3/vod/ptmd/phoenix/%s' % content_id,
+            content_id, None, url)
+
+        duration = int_or_none(try_get(
+            details, lambda x: x['tracking']['nielsen']['content']['length']))
+        timestamp = unified_timestamp(details.get('editorialDate'))
+        series = try_get(
+            details, lambda x: x['tracking']['nielsen']['content']['program'],
+            compat_str)
+        episode = title if details.get('contentType') == 'episode' else None
+
+        thumbnails = []
+        teaser_images = try_get(details, lambda x: x['teaserImageRef']['layouts'], dict) or {}
+        for thumbnail_key, thumbnail_url in teaser_images.items():
+            thumbnail_url = urljoin(url, thumbnail_url)
+            if not thumbnail_url:
+                continue
+            thumbnail = {
+                'url': thumbnail_url,
+            }
+            m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key)
+            if m:
+                thumbnail['width'] = int(m.group(1))
+                thumbnail['height'] = int(m.group(2))
+            thumbnails.append(thumbnail)
 
 
-        api_url = 'http://www.phoenix.de/php/mediaplayer/data/beitrags_details.php?ak=web&id=%s' % internal_id
-        return self.extract_from_xml_url(video_id, api_url)
+        return merge_dicts(info, {
+            'id': content_id,
+            'title': title,
+            'description': details.get('leadParagraph'),
+            'duration': duration,
+            'thumbnails': thumbnails,
+            'timestamp': timestamp,
+            'uploader': details.get('tvService'),
+            'series': series,
+            'episode': episode,
+        })

+ 37 - 63
youtube_dl/extractor/picarto.py

@@ -1,22 +1,15 @@
 # coding: utf-8
 # coding: utf-8
 from __future__ import unicode_literals
 from __future__ import unicode_literals
 
 
-import re
-import time
-
 from .common import InfoExtractor
 from .common import InfoExtractor
-from ..compat import compat_str
 from ..utils import (
 from ..utils import (
     ExtractorError,
     ExtractorError,
     js_to_json,
     js_to_json,
-    try_get,
-    update_url_query,
-    urlencode_postdata,
 )
 )
 
 
 
 
 class PicartoIE(InfoExtractor):
 class PicartoIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)(?:/(?P<token>[a-zA-Z0-9]+))?'
+    _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)'
     _TEST = {
     _TEST = {
         'url': 'https://picarto.tv/Setz',
         'url': 'https://picarto.tv/Setz',
         'info_dict': {
         'info_dict': {
@@ -34,65 +27,46 @@ class PicartoIE(InfoExtractor):
         return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url)
         return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url)
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        channel_id = mobj.group('id')
-
-        metadata = self._download_json(
-            'https://api.picarto.tv/v1/channel/name/' + channel_id,
-            channel_id)
-
-        if metadata.get('online') is False:
+        channel_id = self._match_id(url)
+
+        data = self._download_json(
+            'https://ptvintern.picarto.tv/ptvapi', channel_id, query={
+                'query': '''{
+  channel(name: "%s") {
+    adult
+    id
+    online
+    stream_name
+    title
+  }
+  getLoadBalancerUrl(channel_name: "%s") {
+    url
+  }
+}''' % (channel_id, channel_id),
+            })['data']
+        metadata = data['channel']
+
+        if metadata.get('online') == 0:
             raise ExtractorError('Stream is offline', expected=True)
             raise ExtractorError('Stream is offline', expected=True)
+        title = metadata['title']
 
 
         cdn_data = self._download_json(
         cdn_data = self._download_json(
-            'https://picarto.tv/process/channel', channel_id,
-            data=urlencode_postdata({'loadbalancinginfo': channel_id}),
-            note='Downloading load balancing info')
-
-        token = mobj.group('token') or 'public'
-        params = {
-            'con': int(time.time() * 1000),
-            'token': token,
-        }
+            data['getLoadBalancerUrl']['url'] + '/stream/json_' + metadata['stream_name'] + '.js',
+            channel_id, 'Downloading load balancing info')
 
 
-        prefered_edge = cdn_data.get('preferedEdge')
         formats = []
         formats = []
-
-        for edge in cdn_data['edges']:
-            edge_ep = edge.get('ep')
-            if not edge_ep or not isinstance(edge_ep, compat_str):
+        for source in (cdn_data.get('source') or []):
+            source_url = source.get('url')
+            if not source_url:
                 continue
                 continue
-            edge_id = edge.get('id')
-            for tech in cdn_data['techs']:
-                tech_label = tech.get('label')
-                tech_type = tech.get('type')
-                preference = 0
-                if edge_id == prefered_edge:
-                    preference += 1
-                format_id = []
-                if edge_id:
-                    format_id.append(edge_id)
-                if tech_type == 'application/x-mpegurl' or tech_label == 'HLS':
-                    format_id.append('hls')
-                    formats.extend(self._extract_m3u8_formats(
-                        update_url_query(
-                            'https://%s/hls/%s/index.m3u8'
-                            % (edge_ep, channel_id), params),
-                        channel_id, 'mp4', preference=preference,
-                        m3u8_id='-'.join(format_id), fatal=False))
-                    continue
-                elif tech_type == 'video/mp4' or tech_label == 'MP4':
-                    format_id.append('mp4')
-                    formats.append({
-                        'url': update_url_query(
-                            'https://%s/mp4/%s.mp4' % (edge_ep, channel_id),
-                            params),
-                        'format_id': '-'.join(format_id),
-                        'preference': preference,
-                    })
-                else:
-                    # rtmp format does not seem to work
-                    continue
+            source_type = source.get('type')
+            if source_type == 'html5/application/vnd.apple.mpegurl':
+                formats.extend(self._extract_m3u8_formats(
+                    source_url, channel_id, 'mp4', m3u8_id='hls', fatal=False))
+            elif source_type == 'html5/video/mp4':
+                formats.append({
+                    'url': source_url,
+                })
         self._sort_formats(formats)
         self._sort_formats(formats)
 
 
         mature = metadata.get('adult')
         mature = metadata.get('adult')
@@ -103,10 +77,10 @@ class PicartoIE(InfoExtractor):
 
 
         return {
         return {
             'id': channel_id,
             'id': channel_id,
-            'title': self._live_title(metadata.get('title') or channel_id),
+            'title': self._live_title(title.strip()),
             'is_live': True,
             'is_live': True,
-            'thumbnail': try_get(metadata, lambda x: x['thumbnails']['web']),
             'channel': channel_id,
             'channel': channel_id,
+            'channel_id': metadata.get('id'),
             'channel_url': 'https://picarto.tv/%s' % channel_id,
             'channel_url': 'https://picarto.tv/%s' % channel_id,
             'age_limit': age_limit,
             'age_limit': age_limit,
             'formats': formats,
             'formats': formats,

+ 3 - 1
youtube_dl/extractor/pinterest.py

@@ -31,6 +31,7 @@ class PinterestBaseIE(InfoExtractor):
 
 
         title = (data.get('title') or data.get('grid_title') or video_id).strip()
         title = (data.get('title') or data.get('grid_title') or video_id).strip()
 
 
+        urls = []
         formats = []
         formats = []
         duration = None
         duration = None
         if extract_formats:
         if extract_formats:
@@ -38,8 +39,9 @@ class PinterestBaseIE(InfoExtractor):
                 if not isinstance(format_dict, dict):
                 if not isinstance(format_dict, dict):
                     continue
                     continue
                 format_url = url_or_none(format_dict.get('url'))
                 format_url = url_or_none(format_dict.get('url'))
-                if not format_url:
+                if not format_url or format_url in urls:
                     continue
                     continue
+                urls.append(format_url)
                 duration = float_or_none(format_dict.get('duration'), scale=1000)
                 duration = float_or_none(format_dict.get('duration'), scale=1000)
                 ext = determine_ext(format_url)
                 ext = determine_ext(format_url)
                 if 'hls' in format_id.lower() or ext == 'm3u8':
                 if 'hls' in format_id.lower() or ext == 'm3u8':

+ 65 - 0
youtube_dl/extractor/playstuff.py

@@ -0,0 +1,65 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    smuggle_url,
+    try_get,
+)
+
+
+class PlayStuffIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?play\.stuff\.co\.nz/details/(?P<id>[^/?#&]+)'
+    _TESTS = [{
+        'url': 'https://play.stuff.co.nz/details/608778ac1de1c4001a3fa09a',
+        'md5': 'c82d3669e5247c64bc382577843e5bd0',
+        'info_dict': {
+            'id': '6250584958001',
+            'ext': 'mp4',
+            'title': 'Episode 1: Rotorua/Mt Maunganui/Tauranga',
+            'description': 'md5:c154bafb9f0dd02d01fd4100fb1c1913',
+            'uploader_id': '6005208634001',
+            'timestamp': 1619491027,
+            'upload_date': '20210427',
+        },
+        'add_ie': ['BrightcoveNew'],
+    }, {
+        # geo restricted, bypassable
+        'url': 'https://play.stuff.co.nz/details/_6155660351001',
+        'only_matching': True,
+    }]
+    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        state = self._parse_json(
+            self._search_regex(
+                r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'state'),
+            video_id)
+
+        account_id = try_get(
+            state, lambda x: x['configurations']['accountId'],
+            compat_str) or '6005208634001'
+        player_id = try_get(
+            state, lambda x: x['configurations']['playerId'],
+            compat_str) or 'default'
+
+        entries = []
+        for item_id, video in state['items'].items():
+            if not isinstance(video, dict):
+                continue
+            asset_id = try_get(
+                video, lambda x: x['content']['attributes']['assetId'],
+                compat_str)
+            if not asset_id:
+                continue
+            entries.append(self.url_result(
+                smuggle_url(
+                    self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, asset_id),
+                    {'geo_countries': ['NZ']}),
+                'BrightcoveNew', video_id))
+
+        return self.playlist_result(entries, video_id)

+ 1 - 1
youtube_dl/extractor/pluralsight.py

@@ -393,7 +393,7 @@ query viewClip {
                 # To somewhat reduce the probability of these consequences
                 # To somewhat reduce the probability of these consequences
                 # we will sleep random amount of time before each call to ViewClip.
                 # we will sleep random amount of time before each call to ViewClip.
                 self._sleep(
                 self._sleep(
-                    random.randint(2, 5), display_id,
+                    random.randint(5, 10), display_id,
                     '%(video_id)s: Waiting for %(timeout)s seconds to avoid throttling')
                     '%(video_id)s: Waiting for %(timeout)s seconds to avoid throttling')
 
 
                 if not viewclip:
                 if not viewclip:

+ 197 - 70
youtube_dl/extractor/pornhub.py

@@ -22,11 +22,16 @@ from ..utils import (
     orderedSet,
     orderedSet,
     remove_quotes,
     remove_quotes,
     str_to_int,
     str_to_int,
+    update_url_query,
+    urlencode_postdata,
     url_or_none,
     url_or_none,
 )
 )
 
 
 
 
 class PornHubBaseIE(InfoExtractor):
 class PornHubBaseIE(InfoExtractor):
+    _NETRC_MACHINE = 'pornhub'
+    _PORNHUB_HOST_RE = r'(?:(?P<host>pornhub(?:premium)?\.(?:com|net|org))|pornhubthbh7ap3u\.onion)'
+
     def _download_webpage_handle(self, *args, **kwargs):
     def _download_webpage_handle(self, *args, **kwargs):
         def dl(*args, **kwargs):
         def dl(*args, **kwargs):
             return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs)
             return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs)
@@ -52,17 +57,79 @@ class PornHubBaseIE(InfoExtractor):
 
 
         return webpage, urlh
         return webpage, urlh
 
 
+    def _real_initialize(self):
+        self._logged_in = False
+
+    def _login(self, host):
+        if self._logged_in:
+            return
+
+        site = host.split('.')[0]
+
+        # Both sites pornhub and pornhubpremium have separate accounts
+        # so there should be an option to provide credentials for both.
+        # At the same time some videos are available under the same video id
+        # on both sites so that we have to identify them as the same video.
+        # For that purpose we have to keep both in the same extractor
+        # but under different netrc machines.
+        username, password = self._get_login_info(netrc_machine=site)
+        if username is None:
+            return
+
+        login_url = 'https://www.%s/%slogin' % (host, 'premium/' if 'premium' in host else '')
+        login_page = self._download_webpage(
+            login_url, None, 'Downloading %s login page' % site)
+
+        def is_logged(webpage):
+            return any(re.search(p, webpage) for p in (
+                r'class=["\']signOut',
+                r'>Sign\s+[Oo]ut\s*<'))
+
+        if is_logged(login_page):
+            self._logged_in = True
+            return
+
+        login_form = self._hidden_inputs(login_page)
+
+        login_form.update({
+            'username': username,
+            'password': password,
+        })
+
+        response = self._download_json(
+            'https://www.%s/front/authenticate' % host, None,
+            'Logging in to %s' % site,
+            data=urlencode_postdata(login_form),
+            headers={
+                'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+                'Referer': login_url,
+                'X-Requested-With': 'XMLHttpRequest',
+            })
+
+        if response.get('success') == '1':
+            self._logged_in = True
+            return
+
+        message = response.get('message')
+        if message is not None:
+            raise ExtractorError(
+                'Unable to login: %s' % message, expected=True)
+
+        raise ExtractorError('Unable to log in')
+
 
 
 class PornHubIE(PornHubBaseIE):
 class PornHubIE(PornHubBaseIE):
     IE_DESC = 'PornHub and Thumbzilla'
     IE_DESC = 'PornHub and Thumbzilla'
     _VALID_URL = r'''(?x)
     _VALID_URL = r'''(?x)
                     https?://
                     https?://
                         (?:
                         (?:
-                            (?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
+                            (?:[^/]+\.)?
+                            %s
+                            /(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
                             (?:www\.)?thumbzilla\.com/video/
                             (?:www\.)?thumbzilla\.com/video/
                         )
                         )
                         (?P<id>[\da-z]+)
                         (?P<id>[\da-z]+)
-                    '''
+                    ''' % PornHubBaseIE._PORNHUB_HOST_RE
     _TESTS = [{
     _TESTS = [{
         'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
         'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
         'md5': 'a6391306d050e4547f62b3f485dd9ba9',
         'md5': 'a6391306d050e4547f62b3f485dd9ba9',
@@ -103,6 +170,7 @@ class PornHubIE(PornHubBaseIE):
         'params': {
         'params': {
             'skip_download': True,
             'skip_download': True,
         },
         },
+        'skip': 'Video has been flagged for verification in accordance with our trust and safety policy',
     }, {
     }, {
         # subtitles
         # subtitles
         'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5af5fef7c2aa7',
         'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5af5fef7c2aa7',
@@ -163,12 +231,27 @@ class PornHubIE(PornHubBaseIE):
     }, {
     }, {
         'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5e4acdae54a82',
         'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5e4acdae54a82',
         'only_matching': True,
         'only_matching': True,
+    }, {
+        # Some videos are available with the same id on both premium
+        # and non-premium sites (e.g. this and the following test)
+        'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5f75b0f4b18e3',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5f75b0f4b18e3',
+        'only_matching': True,
+    }, {
+        # geo restricted
+        'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5a9813bfa7156',
+        'only_matching': True,
+    }, {
+        'url': 'http://pornhubthbh7ap3u.onion/view_video.php?viewkey=ph5a9813bfa7156',
+        'only_matching': True,
     }]
     }]
 
 
     @staticmethod
     @staticmethod
     def _extract_urls(webpage):
     def _extract_urls(webpage):
         return re.findall(
         return re.findall(
-            r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.(?:com|net|org)/embed/[\da-z]+)',
+            r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)',
             webpage)
             webpage)
 
 
     def _extract_count(self, pattern, webpage, name):
     def _extract_count(self, pattern, webpage, name):
@@ -180,12 +263,7 @@ class PornHubIE(PornHubBaseIE):
         host = mobj.group('host') or 'pornhub.com'
         host = mobj.group('host') or 'pornhub.com'
         video_id = mobj.group('id')
         video_id = mobj.group('id')
 
 
-        if 'premium' in host:
-            if not self._downloader.params.get('cookiefile'):
-                raise ExtractorError(
-                    'PornHub Premium requires authentication.'
-                    ' You may want to use --cookies.',
-                    expected=True)
+        self._login(host)
 
 
         self._set_cookie(host, 'age_verified', '1')
         self._set_cookie(host, 'age_verified', '1')
 
 
@@ -198,7 +276,8 @@ class PornHubIE(PornHubBaseIE):
         webpage = dl_webpage('pc')
         webpage = dl_webpage('pc')
 
 
         error_msg = self._html_search_regex(
         error_msg = self._html_search_regex(
-            r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
+            (r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
+             r'(?s)<section[^>]+class=["\']noVideo["\'][^>]*>(?P<error>.+?)</section>'),
             webpage, 'error message', default=None, group='error')
             webpage, 'error message', default=None, group='error')
         if error_msg:
         if error_msg:
             error_msg = re.sub(r'\s+', ' ', error_msg)
             error_msg = re.sub(r'\s+', ' ', error_msg)
@@ -206,6 +285,11 @@ class PornHubIE(PornHubBaseIE):
                 'PornHub said: %s' % error_msg,
                 'PornHub said: %s' % error_msg,
                 expected=True, video_id=video_id)
                 expected=True, video_id=video_id)
 
 
+        if any(re.search(p, webpage) for p in (
+                r'class=["\']geoBlocked["\']',
+                r'>\s*This content is unavailable in your country')):
+            self.raise_geo_restricted()
+
         # video_title from flashvars contains whitespace instead of non-ASCII (see
         # video_title from flashvars contains whitespace instead of non-ASCII (see
         # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
         # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
         # on that anymore.
         # on that anymore.
@@ -327,35 +411,49 @@ class PornHubIE(PornHubBaseIE):
 
 
         upload_date = None
         upload_date = None
         formats = []
         formats = []
-        for video_url, height in video_urls:
-            if not upload_date:
-                upload_date = self._search_regex(
-                    r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None)
-                if upload_date:
-                    upload_date = upload_date.replace('/', '')
-            ext = determine_ext(video_url)
+
+        def add_format(format_url, height=None):
+            ext = determine_ext(format_url)
             if ext == 'mpd':
             if ext == 'mpd':
                 formats.extend(self._extract_mpd_formats(
                 formats.extend(self._extract_mpd_formats(
-                    video_url, video_id, mpd_id='dash', fatal=False))
-                continue
-            elif ext == 'm3u8':
+                    format_url, video_id, mpd_id='dash', fatal=False))
+                return
+            if ext == 'm3u8':
                 formats.extend(self._extract_m3u8_formats(
                 formats.extend(self._extract_m3u8_formats(
-                    video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                    format_url, video_id, 'mp4', entry_protocol='m3u8_native',
                     m3u8_id='hls', fatal=False))
                     m3u8_id='hls', fatal=False))
-                continue
-            tbr = None
-            mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', video_url)
-            if mobj:
-                if not height:
-                    height = int(mobj.group('height'))
-                tbr = int(mobj.group('tbr'))
+                return
+            if not height:
+                height = int_or_none(self._search_regex(
+                    r'(?P<height>\d+)[pP]?_\d+[kK]', format_url, 'height',
+                    default=None))
             formats.append({
             formats.append({
-                'url': video_url,
+                'url': format_url,
                 'format_id': '%dp' % height if height else None,
                 'format_id': '%dp' % height if height else None,
                 'height': height,
                 'height': height,
-                'tbr': tbr,
             })
             })
-        self._sort_formats(formats)
+
+        for video_url, height in video_urls:
+            if not upload_date:
+                upload_date = self._search_regex(
+                    r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None)
+                if upload_date:
+                    upload_date = upload_date.replace('/', '')
+            if '/video/get_media' in video_url:
+                medias = self._download_json(video_url, video_id, fatal=False)
+                if isinstance(medias, list):
+                    for media in medias:
+                        if not isinstance(media, dict):
+                            continue
+                        video_url = url_or_none(media.get('videoUrl'))
+                        if not video_url:
+                            continue
+                        height = int_or_none(media.get('quality'))
+                        add_format(video_url, height)
+                continue
+            add_format(video_url)
+        self._sort_formats(
+            formats, field_preference=('height', 'width', 'fps', 'format_id'))
 
 
         video_uploader = self._html_search_regex(
         video_uploader = self._html_search_regex(
             r'(?s)From:&nbsp;.+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
             r'(?s)From:&nbsp;.+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
@@ -405,6 +503,10 @@ class PornHubIE(PornHubBaseIE):
 
 
 
 
 class PornHubPlaylistBaseIE(PornHubBaseIE):
 class PornHubPlaylistBaseIE(PornHubBaseIE):
+    def _extract_page(self, url):
+        return int_or_none(self._search_regex(
+            r'\bpage=(\d+)', url, 'page', default=None))
+
     def _extract_entries(self, webpage, host):
     def _extract_entries(self, webpage, host):
         # Only process container div with main playlist content skipping
         # Only process container div with main playlist content skipping
         # drop-down menu that uses similar pattern for videos (see
         # drop-down menu that uses similar pattern for videos (see
@@ -422,29 +524,9 @@ class PornHubPlaylistBaseIE(PornHubBaseIE):
                 container))
                 container))
         ]
         ]
 
 
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        host = mobj.group('host')
-        playlist_id = mobj.group('id')
-
-        webpage = self._download_webpage(url, playlist_id)
-
-        entries = self._extract_entries(webpage, host)
-
-        playlist = self._parse_json(
-            self._search_regex(
-                r'(?:playlistObject|PLAYLIST_VIEW)\s*=\s*({.+?});', webpage,
-                'playlist', default='{}'),
-            playlist_id, fatal=False)
-        title = playlist.get('title') or self._search_regex(
-            r'>Videos\s+in\s+(.+?)\s+[Pp]laylist<', webpage, 'title', fatal=False)
-
-        return self.playlist_result(
-            entries, playlist_id, title, playlist.get('description'))
-
 
 
 class PornHubUserIE(PornHubPlaylistBaseIE):
 class PornHubUserIE(PornHubPlaylistBaseIE):
-    _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
+    _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)' % PornHubBaseIE._PORNHUB_HOST_RE
     _TESTS = [{
     _TESTS = [{
         'url': 'https://www.pornhub.com/model/zoe_ph',
         'url': 'https://www.pornhub.com/model/zoe_ph',
         'playlist_mincount': 118,
         'playlist_mincount': 118,
@@ -463,14 +545,30 @@ class PornHubUserIE(PornHubPlaylistBaseIE):
     }, {
     }, {
         'url': 'https://www.pornhub.com/model/zoe_ph?abc=1',
         'url': 'https://www.pornhub.com/model/zoe_ph?abc=1',
         'only_matching': True,
         'only_matching': True,
+    }, {
+        # Unavailable via /videos page, but available with direct pagination
+        # on pornstar page (see [1]), requires premium
+        # 1. https://github.com/ytdl-org/youtube-dl/issues/27853
+        'url': 'https://www.pornhubpremium.com/pornstar/sienna-west',
+        'only_matching': True,
+    }, {
+        # Same as before, multi page
+        'url': 'https://www.pornhubpremium.com/pornstar/lily-labeau',
+        'only_matching': True,
+    }, {
+        'url': 'https://pornhubthbh7ap3u.onion/model/zoe_ph',
+        'only_matching': True,
     }]
     }]
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         mobj = re.match(self._VALID_URL, url)
         user_id = mobj.group('id')
         user_id = mobj.group('id')
+        videos_url = '%s/videos' % mobj.group('url')
+        page = self._extract_page(url)
+        if page:
+            videos_url = update_url_query(videos_url, {'page': page})
         return self.url_result(
         return self.url_result(
-            '%s/videos' % mobj.group('url'), ie=PornHubPagedVideoListIE.ie_key(),
-            video_id=user_id)
+            videos_url, ie=PornHubPagedVideoListIE.ie_key(), video_id=user_id)
 
 
 
 
 class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
 class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
@@ -483,36 +581,59 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
                 <button[^>]+\bid=["\']moreDataBtn
                 <button[^>]+\bid=["\']moreDataBtn
             ''', webpage) is not None
             ''', webpage) is not None
 
 
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        host = mobj.group('host')
-        item_id = mobj.group('id')
+    def _entries(self, url, host, item_id):
+        page = self._extract_page(url)
 
 
-        page = int_or_none(self._search_regex(
-            r'\bpage=(\d+)', url, 'page', default=None))
+        VIDEOS = '/videos'
 
 
-        entries = []
-        for page_num in (page, ) if page is not None else itertools.count(1):
+        def download_page(base_url, num, fallback=False):
+            note = 'Downloading page %d%s' % (num, ' (switch to fallback)' if fallback else '')
+            return self._download_webpage(
+                base_url, item_id, note, query={'page': num})
+
+        def is_404(e):
+            return isinstance(e.cause, compat_HTTPError) and e.cause.code == 404
+
+        base_url = url
+        has_page = page is not None
+        first_page = page if has_page else 1
+        for page_num in (first_page, ) if has_page else itertools.count(first_page):
             try:
             try:
-                webpage = self._download_webpage(
-                    url, item_id, 'Downloading page %d' % page_num,
-                    query={'page': page_num})
+                try:
+                    webpage = download_page(base_url, page_num)
+                except ExtractorError as e:
+                    # Some sources may not be available via /videos page,
+                    # trying to fallback to main page pagination (see [1])
+                    # 1. https://github.com/ytdl-org/youtube-dl/issues/27853
+                    if is_404(e) and page_num == first_page and VIDEOS in base_url:
+                        base_url = base_url.replace(VIDEOS, '')
+                        webpage = download_page(base_url, page_num, fallback=True)
+                    else:
+                        raise
             except ExtractorError as e:
             except ExtractorError as e:
-                if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
+                if is_404(e) and page_num != first_page:
                     break
                     break
                 raise
                 raise
             page_entries = self._extract_entries(webpage, host)
             page_entries = self._extract_entries(webpage, host)
             if not page_entries:
             if not page_entries:
                 break
                 break
-            entries.extend(page_entries)
+            for e in page_entries:
+                yield e
             if not self._has_more(webpage):
             if not self._has_more(webpage):
                 break
                 break
 
 
-        return self.playlist_result(orderedSet(entries), item_id)
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        host = mobj.group('host')
+        item_id = mobj.group('id')
+
+        self._login(host)
+
+        return self.playlist_result(self._entries(url, host, item_id), item_id)
 
 
 
 
 class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
 class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
-    _VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?P<id>(?:[^/]+/)*[^/?#&]+)'
+    _VALID_URL = r'https?://(?:[^/]+\.)?%s/(?P<id>(?:[^/]+/)*[^/?#&]+)' % PornHubBaseIE._PORNHUB_HOST_RE
     _TESTS = [{
     _TESTS = [{
         'url': 'https://www.pornhub.com/model/zoe_ph/videos',
         'url': 'https://www.pornhub.com/model/zoe_ph/videos',
         'only_matching': True,
         'only_matching': True,
@@ -617,6 +738,9 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
     }, {
     }, {
         'url': 'https://de.pornhub.com/playlist/4667351',
         'url': 'https://de.pornhub.com/playlist/4667351',
         'only_matching': True,
         'only_matching': True,
+    }, {
+        'url': 'https://pornhubthbh7ap3u.onion/model/zoe_ph/videos',
+        'only_matching': True,
     }]
     }]
 
 
     @classmethod
     @classmethod
@@ -627,7 +751,7 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
 
 
 
 
 class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
 class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
-    _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)'
+    _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)' % PornHubBaseIE._PORNHUB_HOST_RE
     _TESTS = [{
     _TESTS = [{
         'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
         'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
         'info_dict': {
         'info_dict': {
@@ -637,4 +761,7 @@ class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
     }, {
     }, {
         'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload',
         'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload',
         'only_matching': True,
         'only_matching': True,
+    }, {
+        'url': 'http://pornhubthbh7ap3u.onion/pornstar/jenny-blighe/videos/upload',
+        'only_matching': True,
     }]
     }]

Some files were not shown because too many files changed in this diff