pep8 fixes (spaces and multiline statements)

in Python readability and code style matters
2024-12-23 17:41:41 +00:00 · 2016-01-03 19:45:54 +01:00 · 2016-01-03 19:45:54 +01:00 · bde8645cc7
commit bde8645cc7
parent b2113480a4
1 changed files with 55 additions and 44 deletions
--- a/pythonstatcomp.html.markdown
+++ b/pythonstatcomp.html.markdown
@ -9,6 +9,8 @@ This is a tutorial on how to do some typical statistical programming tasks using
 ```python
 # 0. Getting set up ====
 """ Get set up with IPython and pip install the following: numpy, scipy, pandas,
@ -25,17 +27,17 @@ This is a tutorial on how to do some typical statistical programming tasks using
    already using Python, there's a benefit to sticking with one language.
 """
-import requests # for HTTP requests (web scraping, APIs)
+import requests  # for HTTP requests (web scraping, APIs)
 import os
 # web scraping
 r = requests.get("https://github.com/adambard/learnxinyminutes-docs")
-r.status_code # if 200, request was successful
+r.status_code  # if 200, request was successful
-r.text # raw page source
+r.text  # raw page source
-print(r.text) # prettily formatted
+print(r.text)  # prettily formatted
 # save the page source in a file:
-os.getcwd() # check what's the working directory
+os.getcwd()  # check what's the working directory
-f = open("learnxinyminutes.html","wb")
+f = open("learnxinyminutes.html", "wb")
 f.write(r.text.encode("UTF-8"))
 f.close()
@ -44,7 +46,7 @@ fp = "https://raw.githubusercontent.com/adambard/learnxinyminutes-docs/master/"
 fn = "pets.csv"
 r = requests.get(fp + fn)
 print(r.text)
-f = open(fn,"wb")
+f = open(fn, "wb")
 f.write(r.text.encode("UTF-8"))
 f.close()
@ -58,7 +60,9 @@ f.close()
    you've used R, you will be familiar with the idea of the "data.frame" already.
 """
-import pandas as pd, numpy as np, scipy as sp
+import pandas as pd
 import numpy as np
 import scipy as sp
 pets = pd.read_csv(fn)
 pets
 #        name  age  weight species
@ -74,20 +78,20 @@ pets
 pets.age
 pets["age"]
-pets.head(2) # prints first 2 rows
+pets.head(2)  # prints first 2 rows
-pets.tail(1) # prints last row
+pets.tail(1)  # prints last row
-pets.name[1] # 'vesuvius'
+pets.name[1]  # 'vesuvius'
-pets.species[0] # 'cat'
+pets.species[0]  # 'cat'
-pets["weight"][2] # 34
+pets["weight"][2]  # 34
 # in R, you would expect to get 3 rows doing this, but here you get 2:
 pets.age[0:2]
 # 0    3
 # 1    6
-sum(pets.age)*2 # 28
+sum(pets.age) * 2  # 28
-max(pets.weight) - min(pets.weight) # 20
+max(pets.weight) - min(pets.weight)  # 20
 """ If you are doing some serious linear algebra and number-crunching, you may
    just want arrays, not DataFrames. DataFrames are ideal for combining columns
@ -96,7 +100,8 @@ max(pets.weight) - min(pets.weight) # 20
 # 3. Charts ====
-import matplotlib as mpl, matplotlib.pyplot as plt
+import matplotlib as mpl
 import matplotlib.pyplot as plt
 %matplotlib inline
 # To do data vizualization in Python, use matplotlib
@ -105,13 +110,17 @@ plt.hist(pets.age);
 plt.boxplot(pets.weight);
-plt.scatter(pets.age, pets.weight); plt.xlabel("age"); plt.ylabel("weight");
+plt.scatter(pets.age, pets.weight)
 plt.xlabel("age")
 plt.ylabel("weight");
 # seaborn sits atop matplotlib and makes plots prettier
 import seaborn as sns
-plt.scatter(pets.age, pets.weight); plt.xlabel("age"); plt.ylabel("weight");
+plt.scatter(pets.age, pets.weight)
 plt.xlabel("age")
 plt.ylabel("weight");
 # there are also some seaborn-specific plotting functions
 # notice how seaborn automatically labels the x-axis on this barplot
@ -141,7 +150,7 @@ ggplot(aes(x="age",y="weight"), data=pets) + geom_point() + labs(title="pets")
 url = "https://raw.githubusercontent.com/e99n09/R-notes/master/data/hre.csv"
 r = requests.get(url)
 fp = "hre.csv"
-f = open(fp,"wb")
+f = open(fp, "wb")
 f.write(r.text.encode("UTF-8"))
 f.close()
@ -173,9 +182,9 @@ hre.head()
 # clean the Birth and Death columns
-import re # module for regular expressions
+import re  # module for regular expressions
-rx = re.compile(r'\d+$') # match trailing digits
+rx = re.compile(r'\d+$')  # match trailing digits
 """ This function applies the regular expression to an input column (here Birth,
    Death), flattens the resulting list, converts it to a Series object, and
@ -185,8 +194,9 @@ rx = re.compile(r'\d+$') # match trailing digits
      - http://stackoverflow.com/questions/11860476/how-to-unlist-a-python-list
      - http://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.html
 """
 def extractYear(v):
-    return(pd.Series(reduce(lambda x,y: x+y,map(rx.findall,v),[])).astype(int))
+    return(pd.Series(reduce(lambda x, y: x + y, map(rx.findall, v), [])).astype(int))
 hre["BirthY"] = extractYear(hre.Birth)
 hre["DeathY"] = extractYear(hre.Death)
@ -199,17 +209,17 @@ sns.lmplot("BirthY", "EstAge", data=hre, hue="Dynasty", fit_reg=False);
 # use scipy to run a linear regression
 from scipy import stats
-(slope,intercept,rval,pval,stderr)=stats.linregress(hre.BirthY,hre.EstAge)
+(slope, intercept, rval, pval, stderr) = stats.linregress(hre.BirthY, hre.EstAge)
 # code source: http://wiki.scipy.org/Cookbook/LinearRegression
 # check the slope
-slope # 0.0057672618839073328
+slope  # 0.0057672618839073328
 # check the R^2 value:
-rval**2 # 0.020363950027333586
+rval**2  # 0.020363950027333586
 # check the p-value
-pval # 0.34971812581498452
+pval  # 0.34971812581498452
 # use seaborn to make a scatterplot and plot the linear regression trend line
 sns.lmplot("BirthY", "EstAge", data=hre);
@ -223,6 +233,7 @@ sns.lmplot("BirthY", "EstAge", data=hre);
    To see a version of the Holy Roman Emperors analysis using R, see
      - http://github.com/e99n09/R-notes/blob/master/holy_roman_emperors_dates.R
 """
 ```
 If you want to learn more, get _Python for Data Analysis_ by Wes McKinney. It's a superb resource and I used it as a reference when writing this tutorial.