import tubes
import json, gzip, glob
FILES = glob.glob("../data/extracted/*.json")
KEYS = (
("timestamp", ),
("country_code", ),
("url", ),
("file", "filename"),
("file", "project"),
("details", "installer", "name"),
("details", "python"),
("details", "system"),
("details", "system", "name"),
("details", "cpu"),
("details", "distro", "libc", "lib"),
("details", "distro", "libc", "version"),
)
def py_ver():
for file_name in FILES:
with open(file_name, "rt") as fp:
for line in fp:
data = json.loads(line)
row = []
for path in KEYS:
base = data
for part in path:
base = base.get(part, None)
if base is None:
break
row.append(base)
result = row
return result
%time py_result = py_ver()
def tubes_ver():
def make_getters(x):
getters = []
for path in KEYS:
base = x
for part in path:
base = base.get(part, 'null')
getters.append(base)
return tuple(getters)
for row in (tubes.Each(FILES)
.read_files()
.split(b'\n')
.json()
.multi(make_getters)
):
result = row
return result
%time tubes_result = tubes_ver()
tuple(py_result) == tubes_result
tubes_result