Browse Source

[cda] Fix extraction (closes #17803, closes #24458, closes #24518, closes #26381)

Sergey M․ 4 years ago
parent
commit
37258c644f
1 changed files with 32 additions and 3 deletions
  1. 32 3
      youtube_dl/extractor/cda.py

+ 32 - 3
youtube_dl/extractor/cda.py

@@ -5,10 +5,16 @@ import codecs
 import re
 import re
 
 
 from .common import InfoExtractor
 from .common import InfoExtractor
+from ..compat import (
+    compat_chr,
+    compat_ord,
+    compat_urllib_parse_unquote,
+)
 from ..utils import (
 from ..utils import (
     ExtractorError,
     ExtractorError,
     float_or_none,
     float_or_none,
     int_or_none,
     int_or_none,
+    merge_dicts,
     multipart_encode,
     multipart_encode,
     parse_duration,
     parse_duration,
     random_birthday,
     random_birthday,
@@ -107,8 +113,9 @@ class CDAIE(InfoExtractor):
             r'Odsłony:(?:\s| )*([0-9]+)', webpage,
             r'Odsłony:(?:\s| )*([0-9]+)', webpage,
             'view_count', default=None)
             'view_count', default=None)
         average_rating = self._search_regex(
         average_rating = self._search_regex(
-            r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)',
-            webpage, 'rating', fatal=False, group='rating_value')
+            (r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)',
+             r'<span[^>]+\bclass=["\']rating["\'][^>]*>(?P<rating_value>[0-9.]+)'), webpage, 'rating', fatal=False,
+            group='rating_value')
 
 
         info_dict = {
         info_dict = {
             'id': video_id,
             'id': video_id,
@@ -123,6 +130,24 @@ class CDAIE(InfoExtractor):
             'age_limit': 18 if need_confirm_age else 0,
             'age_limit': 18 if need_confirm_age else 0,
         }
         }
 
 
+        # Source: https://www.cda.pl/js/player.js?t=1606154898
+        def decrypt_file(a):
+            for p in ('_XDDD', '_CDA', '_ADC', '_CXD', '_QWE', '_Q5', '_IKSDE'):
+                a = a.replace(p, '')
+            a = compat_urllib_parse_unquote(a)
+            b = []
+            for c in a:
+                f = compat_ord(c)
+                b.append(compat_chr(33 + (f + 14) % 94) if 33 <= f and 126 >= f else compat_chr(f))
+            a = ''.join(b)
+            a = a.replace('.cda.mp4', '')
+            for p in ('.2cda.pl', '.3cda.pl'):
+                a = a.replace(p, '.cda.pl')
+            if '/upstream' in a:
+                a = a.replace('/upstream', '.mp4/upstream')
+                return 'https://' + a
+            return 'https://' + a + '.mp4'
+
         def extract_format(page, version):
         def extract_format(page, version):
             json_str = self._html_search_regex(
             json_str = self._html_search_regex(
                 r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page,
                 r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page,
@@ -141,6 +166,8 @@ class CDAIE(InfoExtractor):
                 video['file'] = codecs.decode(video['file'], 'rot_13')
                 video['file'] = codecs.decode(video['file'], 'rot_13')
                 if video['file'].endswith('adc.mp4'):
                 if video['file'].endswith('adc.mp4'):
                     video['file'] = video['file'].replace('adc.mp4', '.mp4')
                     video['file'] = video['file'].replace('adc.mp4', '.mp4')
+            elif not video['file'].startswith('http'):
+                video['file'] = decrypt_file(video['file'])
             f = {
             f = {
                 'url': video['file'],
                 'url': video['file'],
             }
             }
@@ -179,4 +206,6 @@ class CDAIE(InfoExtractor):
 
 
         self._sort_formats(formats)
         self._sort_formats(formats)
 
 
-        return info_dict
+        info = self._search_json_ld(webpage, video_id, default={})
+
+        return merge_dicts(info_dict, info)