# Python Bug Report 37241 # Jesse Bacon import urllib, json, shelve, gzip file = "https://nvd.nist.gov/feeds/json/cve/1.0/nvdcve-1.0-2019.json.gz" print('Fetching nvdcve-1.0-2019.json.gz') test = urllib.request.urlopen(file) data = test.read() print('Storing Gzipped File') with open('nvdcve-1.0-2019.json.gz', 'wb') as f: f.write(data) with gzip.open('./nvdcve-1.0-2019.json.gz', 'rb') as f: file_content = f.read() print('Loading JSON Content') cve_data = json.loads(file_content) print('{} records'.format(len(cve_data['CVE_Items']))) raw_records = [x['cve']['CVE_data_meta']['ID'] for x in cve_data['CVE_Items']] print('{} unique records'.format(len(set(x['cve']['CVE_data_meta']['ID'] for x in cve_data['CVE_Items'])))) print('Creating Shelve: cve_2019.shelf') db = shelve.open('cve_2019.shelf') print('Assembling Big Dictionary of 2019 Data in shelve') CVES = cve_data['CVE_Items'] for item in CVES: #print(item['cve']['CVE_data_meta']['ID']) db[item['cve']['CVE_data_meta']['ID']] = item['cve'] print('shelve reports {} unique records'.format(len(db.keys()))) print('Extracting data by keys from shelve') results = [] for x in raw_records: results.append(db[x]) print('{} extracted records'.format(len(results))) print('Number of missing records {}'.format(len([x for x in raw_records if x not in db.keys()]))) if len(set(x['cve']['CVE_data_meta']['ID'] for x in cve_data['CVE_Items'])) > len(db.keys()): print('Shelve is on strike.') else: print('data match')