In [1]:
import tubes
In [2]:
import json, glob
In [3]:
FILES = glob.glob("../data/extracted/*.json")
In [4]:
def py_ver():
    py_result = []
    for file_name in FILES:
        with open(file_name, "rt") as fp:
            for line in fp:
                data = json.loads(line)
                py_result.append(data.get("country_code"))
    return py_result
In [5]:
%time py_result = py_ver()
CPU times: user 3min 21s, sys: 7.32 s, total: 3min 28s
Wall time: 3min 30s
In [6]:
def tubes_ver():
    return list(tubes.Each(FILES)
        .read_files()
        .split(b'\n')
        .json()
        .get("country_code", "null"))    
In [7]:
%time tubes_result = tubes_ver()
CPU times: user 4.29 s, sys: 3.48 s, total: 7.78 s
Wall time: 8.54 s
In [8]:
py_result == tubes_result
Out[8]:
True
In [9]:
(3*60) + 28
Out[9]:
208
In [10]:
208/7.78
Out[10]:
26.735218508997427