PythonStatistics - mwicat/personal GitHub Wiki
Notebook
sudo pip3 install jupyter
mkdir jupyter
cd jupyter
jupyter notebook
Setup notebook
sudo pip3 install pandas seaborn matplotlib
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="darkgrid")
Read CSV
import csv
fn = 'data.csv'
with open(fn) as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
print(row['first_name'], row['last_name'])
Write CSV
import csv
fn = 'data.csv'
with open(fn, 'wb', 1) as csvfile:
writer = csv.writer(csvfile)
writer.writerow(headers)
for row in rows:
writer.writerow(render_row(row))
Convert CSV to XLS
sudo apt-get install unoconv
unoconv --format xls data.csv
Put list into sqlite
import sqlite3
conn = sqlite3.connect('/var/tmp/storage.sqlite')
stats = [1, 2, 3]
c = conn.cursor()
c.execute('CREATE TABLE if not exists storage(user_id text, data_type integer, count integer, size integer)')
c.executemany('INSERT INTO storage VALUES (?,?,?,?)', stats)
conn.commit()
conn.close()
Install ipython notebook
sudo apt-get install pkg-config libfreetype6-dev libpng12-dev
sudo pip install ipython pyzmq jinja2 tornado jsonschema functools32 terminado matplotlib pandas numpy
Number frequency
df.x.value_counts()
Bin frequency
df = pd.DataFrame([10, 20, 30, 91], columns=['x'])
bins = np.arange(0, 101, 10)
counts, edges = np.histogram(df['x'], bins)
tbl_data = sorted(zip(edges, counts), reverse=True)
tbl_str = '\n'.join(['>= %d%%\t%d' % (x, y) for x, y in tbl_data])
List of dicts to DataFrame
df = pd.DataFrame(
(row.__dict__ for row in user_entities),
columns=user_entities[0].__dict__.keys())
Show time data
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
matplotlib.style.use('ggplot')
stats = pickle.load(open(args.stats))
ts = stats['timestamps']
s = pd.Series(index=[pd.to_datetime(x, unit='s') for x in ts], data=1)
s2 = s.cumsum()
s2.plot(grid=True)
plt.show()
Country code to continent code
from pycountry_convert import country_alpha2_to_continent_code
print(country_alpha2_to_continent_code('ID'))
Fuzzy country name to continent
sudo pip install pycountry-convert hdx-python-country
from pycountry_convert import country_alpha2_to_continent_code, country_alpha3_to_country_alpha2
from hdx.location.country import Country
def country_to_continent(country):
iso3, exact = Country.get_iso3_country_code_fuzzy(country, use_live=False)
if iso3 is None:
return None
try:
iso2 = country_alpha3_to_country_alpha2(iso3)
return country_alpha2_to_continent_code(iso2)
except KeyError:
return None
Plot memory by process
https://gist.github.com/mwicat/5eef5f526c52a12d2029039f721be52f