In [1]:
import tubes
In [2]:
import json, gzip, glob
In [3]:
FILES = glob.glob("../data/*.jsonz")
In [4]:
def py_ver():
    py_result = []
    for file_name in FILES:
        with gzip.open(file_name, "rt") as fp:
            for line in fp:
                data = json.loads(line)
                py_result.append(data.get("country_code"))
    return py_result
In [5]:
%time py_result = py_ver()
CPU times: user 4min 4s, sys: 2.82 s, total: 4min 7s
Wall time: 4min 14s
In [8]:
def tubes_ver():
    return list(tubes.Each(FILES)
        .read_files()
        .gunzip(stream=True)
        .split(b'\n')
        .chunk(1)
        .json()
        .get("country_code", "null"))
In [9]:
%time tubes_result = tubes_ver()
CPU times: user 18.2 s, sys: 1.26 s, total: 19.4 s
Wall time: 19.6 s
In [10]:
py_result == tubes_result
Out[10]:
True
In [14]:
(4*60) + 14
Out[14]:
254
In [16]:
254/19.6
Out[16]:
12.959183673469386