01 01 Introduction to Python Numpy - HannaAA17/Data-Scientist-With-Python-datacamp GitHub Wiki
This is the note for Numpy section of the course #introduction to python#.
- Numpy stands for: Numeric Python
- Alternative to Python List: Numpy Array
- Calculations over entire arrays.
- Numpy arrays: contain only one type.
# Create list baseball
baseball = [180, 215, 210, 210, 188, 176, 209, 200]
# Import the numpy package as np
import numpy as np
# Create a numpy array from baseball: np_baseball
np_baseball = np.array(baseball)
# Print out type of np_baseball
print(type(np_baseball))
# height is available as a regular list
# Import numpy
import numpy as np
# Create a numpy array from height_in: np_height_in
np_height_in = np.array(height_in)
# Print out np_height_in
print(np_height_in)
# Convert np_height_in to m: np_height_m
np_height_m = np_height_in*0.0254
# Print np_height_m
print(np_height_m)
# height and weight are available as a regular lists
# Import numpy
import numpy as np
# Calculate the BMI: bmi
np_height_m = np.array(height_in) * 0.0254
np_weight_kg = np.array(weight_lb) * 0.453592
bmi = np_weight_kg / np_height_m ** 2
# Create the light array
light = bmi < 21
# Print out light
print(light)
# Print out BMIs of all baseball players whose BMI is below 21
print(bmi[light])
Subsetting (using the square bracket notation on lists or arrays) works exactly the same.
np_weight[50]
# Create baseball, a list of lists
baseball = [[180, 78.4],
[215, 102.7],
[210, 98.5],
[188, 75.2]]
# Import numpy
import numpy as np
# Create a 2D numpy array from baseball: np_baseball
np_baseball = np.array(baseball)
# Print out the type of np_baseball
print(type(np_baseball))
# Print out the shape of np_baseball
print(np_baseball.shape)
Output
<script.py> output:
<class 'numpy.ndarray'>
(4, 2)
# baseball is available as a regular list of lists
# Import numpy package
import numpy as np
# Create np_baseball (2 cols)
np_baseball = np.array(baseball)
# Print out the 50th row of np_baseball
print(np_baseball[49,:])
# Select the entire second column of np_baseball: np_weight_lb
np_weight_lb = np_baseball[:,1]
# Print out height of 124th player
print(np_baseball[123,0])
# baseball is available as a regular list of lists
# updated is available as 2D numpy array
# Import numpy package
import numpy as np
# Create np_baseball (3 cols)
np_baseball = np.array(baseball)
# Print out addition of np_baseball and updated
print(np_baseball + updated)
# Create numpy array: conversion
conversion = np.array([0.0254,0.453592,1])
# Print out product of np_baseball and conversion
print(np_baseball * conversion)
# np_baseball is available
# Import numpy
import numpy as np
# Print mean height (first column)
avg = np.mean(np_baseball[:,0])
print("Average: " + str(avg))
# Print median height. Replace 'None'
med = np.median(np_baseball[:,0])
print("Median: " + str(med))
# Print out the standard deviation on height. Replace 'None'
stddev = np.std(np_baseball[:,0])
print("Standard Deviation: " + str(stddev))
# Print out correlation between first and second column. Replace 'None'
corr = np.corrcoef(np_baseball[:,0],np_baseball[:,1])
print("Correlation: " + str(corr))
Exercise: prove ' the median height of goalkeepers is higher than other player on the soccer field '
# heights and positions are available as lists
# Import numpy
import numpy as np
# Convert positions and heights to numpy arrays: np_positions, np_heights
np_positions = np.array(positions)
np_heights = np.array(heights)
# Heights of the goalkeepers: gk_heights
gk_heights = np_heights[np_positions == 'GK']
# Heights of the other players: other_heights
other_heights = np_heights[np_positions != 'GK']
# Print out the median height of goalkeepers. Replace 'None'
print("Median height of goalkeepers: " + str(np.median(gk_heights)))
# Print out the median height of other players. Replace 'None'
print("Median height of other players: " + str(np.median(other_heights)))