| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859 | #!/usr/bin/env python"""This script employs a VERY basic heuristic ('porn' in webpage.lower()) to checkif we are not 'age_limit' tagging some porn siteA second approach implemented relies on a list of porn domains, to activate itpass the list filename as the only argument"""# Allow direct executionimport osimport syssys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))from test.helper import get_testcasesfrom youtube_dl.utils import compat_urllib_parse_urlparsefrom youtube_dl.utils import compat_urllib_requestif len(sys.argv) > 1:    METHOD = 'LIST'    LIST = open(sys.argv[1]).read().decode('utf8').strip()else:    METHOD = 'EURISTIC'for test in get_testcases():    if METHOD == 'EURISTIC':        try:            webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read()        except:            print('\nFail: {0}'.format(test['name']))            continue        webpage = webpage.decode('utf8', 'replace')        RESULT = 'porn' in webpage.lower()    elif METHOD == 'LIST':        domain = compat_urllib_parse_urlparse(test['url']).netloc        if not domain:            print('\nFail: {0}'.format(test['name']))            continue        domain = '.'.join(domain.split('.')[-2:])        RESULT = ('.' + domain + '\n' in LIST or '\n' + domain + '\n' in LIST)    if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict']                   or test['info_dict']['age_limit'] != 18):        print('\nPotential missing age_limit check: {0}'.format(test['name']))    elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict']                         and test['info_dict']['age_limit'] == 18):        print('\nPotential false negative: {0}'.format(test['name']))    else:        sys.stdout.write('.')    sys.stdout.flush()print()
 |