import tubes
import json, gzip, glob
FILES = glob.glob("../data/extracted/*.json")
KEYS = (
("timestamp", ),
("country_code", ),
("url", ),
("file", "filename"),
("file", "project"),
("details", "installer", "name"),
("details", "python"),
("details", "system", "name"),
("details", "cpu"),
("details", "distro", "libc", "lib"),
("details", "distro", "libc", "version"),
)
def py_ver():
result = []
for file_name in FILES:
with open(file_name, "rt") as fp:
for line in fp:
data = json.loads(line)
if data.get("country_code") != "GB":
continue
row = []
for path in KEYS:
base = data
for part in path:
base = base.get(part, None)
if base is None:
break
row.append(base)
result.append(row)
return result[-1]
%time py_result = py_ver()
def tubes_ver():
def make_getters(x):
getters = []
for path in KEYS:
base = x
for part in path:
base = base.get(part, 'null')
getters.append(base)
return tuple(getters)
return list(tubes.Each(FILES)
.read_files()
.split(b'\n')
.json()
.skip_unless(lambda x: x.get("country_code", "null").to(bytes).equals("GB"))
.multi(make_getters)
)[-1]
%time tubes_result = tubes_ver()
tuple(py_result) == tubes_result
tubes_result
py_result
(8*60) + 43
523/7.43