import tubes
import json, gzip, glob
FILES = glob.glob("../data/extracted/*.json")
KEYS = (
    ("timestamp", ),
    ("country_code", ),
    ("url", ),
    ("file", "filename"), 
    ("file", "project"), 
    ("details", "installer", "name"),
    ("details", "python"),
    ("details", "system", "name"),
    ("details", "cpu"),
    ("details", "distro", "libc", "lib"),
    ("details", "distro", "libc", "version"),
)
def py_ver():
    result = []
    for file_name in FILES:
        with open(file_name, "rt") as fp:
            for line in fp:
                data = json.loads(line)
                if data.get("country_code") != "GB":
                    continue
                row = []
                for path in KEYS:
                    base = data
                    for part in path:
                        base = base.get(part, None)
                        if base is None:
                            break
                    row.append(base)
                result.append(row)
    return result[-1]
%time py_result = py_ver()
def tubes_ver():
    def make_getters(x):
        getters = []
        for path in KEYS:
            base = x
            for part in path:
                base = base.get(part, 'null')
            getters.append(base)
        return tuple(getters)
    return list(tubes.Each(FILES)
        .read_files()
        .split(b'\n')
        .json()
        .skip_unless(lambda x: x.get("country_code", "null").to(bytes).equals("GB"))
        .multi(make_getters)
    )[-1]
%time tubes_result = tubes_ver()
tuple(py_result) == tubes_result
 tubes_result
py_result
(8*60) + 43
523/7.43