mirror of
https://github.com/adambard/learnxinyminutes-docs.git
synced 2024-12-24 01:51:38 +00:00
pep8 fixes (spaces and multiline statements)
in Python readability and code style matters
This commit is contained in:
parent
b2113480a4
commit
bde8645cc7
@ -9,6 +9,8 @@ This is a tutorial on how to do some typical statistical programming tasks using
|
|||||||
|
|
||||||
```python
|
```python
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# 0. Getting set up ====
|
# 0. Getting set up ====
|
||||||
|
|
||||||
""" Get set up with IPython and pip install the following: numpy, scipy, pandas,
|
""" Get set up with IPython and pip install the following: numpy, scipy, pandas,
|
||||||
@ -35,7 +37,7 @@ r.text # raw page source
|
|||||||
print(r.text) # prettily formatted
|
print(r.text) # prettily formatted
|
||||||
# save the page source in a file:
|
# save the page source in a file:
|
||||||
os.getcwd() # check what's the working directory
|
os.getcwd() # check what's the working directory
|
||||||
f = open("learnxinyminutes.html","wb")
|
f = open("learnxinyminutes.html", "wb")
|
||||||
f.write(r.text.encode("UTF-8"))
|
f.write(r.text.encode("UTF-8"))
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
@ -44,7 +46,7 @@ fp = "https://raw.githubusercontent.com/adambard/learnxinyminutes-docs/master/"
|
|||||||
fn = "pets.csv"
|
fn = "pets.csv"
|
||||||
r = requests.get(fp + fn)
|
r = requests.get(fp + fn)
|
||||||
print(r.text)
|
print(r.text)
|
||||||
f = open(fn,"wb")
|
f = open(fn, "wb")
|
||||||
f.write(r.text.encode("UTF-8"))
|
f.write(r.text.encode("UTF-8"))
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
@ -58,7 +60,9 @@ f.close()
|
|||||||
you've used R, you will be familiar with the idea of the "data.frame" already.
|
you've used R, you will be familiar with the idea of the "data.frame" already.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import pandas as pd, numpy as np, scipy as sp
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import scipy as sp
|
||||||
pets = pd.read_csv(fn)
|
pets = pd.read_csv(fn)
|
||||||
pets
|
pets
|
||||||
# name age weight species
|
# name age weight species
|
||||||
@ -86,7 +90,7 @@ pets.age[0:2]
|
|||||||
# 0 3
|
# 0 3
|
||||||
# 1 6
|
# 1 6
|
||||||
|
|
||||||
sum(pets.age)*2 # 28
|
sum(pets.age) * 2 # 28
|
||||||
max(pets.weight) - min(pets.weight) # 20
|
max(pets.weight) - min(pets.weight) # 20
|
||||||
|
|
||||||
""" If you are doing some serious linear algebra and number-crunching, you may
|
""" If you are doing some serious linear algebra and number-crunching, you may
|
||||||
@ -96,7 +100,8 @@ max(pets.weight) - min(pets.weight) # 20
|
|||||||
|
|
||||||
# 3. Charts ====
|
# 3. Charts ====
|
||||||
|
|
||||||
import matplotlib as mpl, matplotlib.pyplot as plt
|
import matplotlib as mpl
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
%matplotlib inline
|
%matplotlib inline
|
||||||
|
|
||||||
# To do data vizualization in Python, use matplotlib
|
# To do data vizualization in Python, use matplotlib
|
||||||
@ -105,13 +110,17 @@ plt.hist(pets.age);
|
|||||||
|
|
||||||
plt.boxplot(pets.weight);
|
plt.boxplot(pets.weight);
|
||||||
|
|
||||||
plt.scatter(pets.age, pets.weight); plt.xlabel("age"); plt.ylabel("weight");
|
plt.scatter(pets.age, pets.weight)
|
||||||
|
plt.xlabel("age")
|
||||||
|
plt.ylabel("weight");
|
||||||
|
|
||||||
# seaborn sits atop matplotlib and makes plots prettier
|
# seaborn sits atop matplotlib and makes plots prettier
|
||||||
|
|
||||||
import seaborn as sns
|
import seaborn as sns
|
||||||
|
|
||||||
plt.scatter(pets.age, pets.weight); plt.xlabel("age"); plt.ylabel("weight");
|
plt.scatter(pets.age, pets.weight)
|
||||||
|
plt.xlabel("age")
|
||||||
|
plt.ylabel("weight");
|
||||||
|
|
||||||
# there are also some seaborn-specific plotting functions
|
# there are also some seaborn-specific plotting functions
|
||||||
# notice how seaborn automatically labels the x-axis on this barplot
|
# notice how seaborn automatically labels the x-axis on this barplot
|
||||||
@ -141,7 +150,7 @@ ggplot(aes(x="age",y="weight"), data=pets) + geom_point() + labs(title="pets")
|
|||||||
url = "https://raw.githubusercontent.com/e99n09/R-notes/master/data/hre.csv"
|
url = "https://raw.githubusercontent.com/e99n09/R-notes/master/data/hre.csv"
|
||||||
r = requests.get(url)
|
r = requests.get(url)
|
||||||
fp = "hre.csv"
|
fp = "hre.csv"
|
||||||
f = open(fp,"wb")
|
f = open(fp, "wb")
|
||||||
f.write(r.text.encode("UTF-8"))
|
f.write(r.text.encode("UTF-8"))
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
@ -185,8 +194,9 @@ rx = re.compile(r'\d+$') # match trailing digits
|
|||||||
- http://stackoverflow.com/questions/11860476/how-to-unlist-a-python-list
|
- http://stackoverflow.com/questions/11860476/how-to-unlist-a-python-list
|
||||||
- http://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.html
|
- http://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.html
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def extractYear(v):
|
def extractYear(v):
|
||||||
return(pd.Series(reduce(lambda x,y: x+y,map(rx.findall,v),[])).astype(int))
|
return(pd.Series(reduce(lambda x, y: x + y, map(rx.findall, v), [])).astype(int))
|
||||||
|
|
||||||
hre["BirthY"] = extractYear(hre.Birth)
|
hre["BirthY"] = extractYear(hre.Birth)
|
||||||
hre["DeathY"] = extractYear(hre.Death)
|
hre["DeathY"] = extractYear(hre.Death)
|
||||||
@ -199,7 +209,7 @@ sns.lmplot("BirthY", "EstAge", data=hre, hue="Dynasty", fit_reg=False);
|
|||||||
|
|
||||||
# use scipy to run a linear regression
|
# use scipy to run a linear regression
|
||||||
from scipy import stats
|
from scipy import stats
|
||||||
(slope,intercept,rval,pval,stderr)=stats.linregress(hre.BirthY,hre.EstAge)
|
(slope, intercept, rval, pval, stderr) = stats.linregress(hre.BirthY, hre.EstAge)
|
||||||
# code source: http://wiki.scipy.org/Cookbook/LinearRegression
|
# code source: http://wiki.scipy.org/Cookbook/LinearRegression
|
||||||
|
|
||||||
# check the slope
|
# check the slope
|
||||||
@ -223,6 +233,7 @@ sns.lmplot("BirthY", "EstAge", data=hre);
|
|||||||
To see a version of the Holy Roman Emperors analysis using R, see
|
To see a version of the Holy Roman Emperors analysis using R, see
|
||||||
- http://github.com/e99n09/R-notes/blob/master/holy_roman_emperors_dates.R
|
- http://github.com/e99n09/R-notes/blob/master/holy_roman_emperors_dates.R
|
||||||
"""
|
"""
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
If you want to learn more, get _Python for Data Analysis_ by Wes McKinney. It's a superb resource and I used it as a reference when writing this tutorial.
|
If you want to learn more, get _Python for Data Analysis_ by Wes McKinney. It's a superb resource and I used it as a reference when writing this tutorial.
|
||||||
|
Loading…
Reference in New Issue
Block a user