Python Scientific Computing Cheat Sheet#
NumPy arrays#
NumPy arrays can be used to store multiple measurements, such as accuracy or response times from a task.
import numpy as np # the standard way to import numpy
integers = np.array([1, 2, 3]) # create an array by passing a list to np.array
floats = np.array([1.1, 2.2, 3.3]) # can also store decimals in float data type
letters = np.array(["a", "b", "c"]) # can store string data in an array
print(integers, integers.dtype) # each array has a data type
print(floats, floats.dtype) # decimals are stored with float data type
print(letters, letters.dtype) # string data type indicates text format
[1 2 3] int64
[1.1 2.2 3.3] float64
['a' 'b' 'c'] <U1
Arrays may have different numbers of dimensions.
# create a 1D array from a list
array1d = np.array([1, 2])
print(array1d)
[1 2]
# create a 2D array from a list of lists
array2d = np.array([[1, 2], [3, 4]])
print(array2d)
[[1 2]
[3 4]]
Arrays have attributes that tell us about their properties. Attributes can be accessed by typing the name of the variable, a dot (.), and then the name of the attribute (for example, size).
print(array2d.size) # total number of items
print(array2d.ndim) # number of dimensions
print(array2d.shape) # size of each dimension
4
2
(2, 2)
Array expressions#
Use mathematical expressions with NumPy arrays to efficiently run calculations.
array1 = np.array([1, 2, 3, 4])
print(array1)
print(array1 + 3) # add to each of the items
print(array1 - 4) # subtract from each of the items
print(array1 * 2) # multiply by a value
print(array1 / 2) # divide by a value
print(array1 ** 3) # take values to a power
[1 2 3 4]
[4 5 6 7]
[-3 -2 -1 0]
[2 4 6 8]
[0.5 1. 1.5 2. ]
[ 1 8 27 64]
array2 = np.array([1, 1, 2, 2])
print(array1 + array2) # add arrays
print(array1 - array2) # subtract arrays
print(array1 * array2) # multiple matching elements of arrays
print(array1 / array2) # divide matching elements of arrays
print(array1 ** array2) # take array1 items to the power of array2 items
[2 3 5 6]
[0 1 1 2]
[1 2 6 8]
[1. 2. 1.5 2. ]
[ 1 2 9 16]
print(array1 @ array2) # calculate the dot product between arrays
17
Array creation functions#
Use array creation functions to create arrays with a range of values or all zeros or ones.
r1 = np.arange(1, 11) # 1 to 11 (non inclusive), with a step of 1
r2 = np.arange(1, 11, 0.5) # 1 to 11 (non inclusive), with a step of 0.5
print(r1)
print(r2)
[ 1 2 3 4 5 6 7 8 9 10]
[ 1. 1.5 2. 2.5 3. 3.5 4. 4.5 5. 5.5 6. 6.5 7. 7.5
8. 8.5 9. 9.5 10. 10.5]
l1 = np.linspace(0, 8, 5) # 5 linearly spaced values from 0 to 8
l2 = np.linspace(0, 8, 10) # 10 linearly spaced values from 0 to 8
print(l1)
print(l2)
[0. 2. 4. 6. 8.]
[0. 0.88888889 1.77777778 2.66666667 3.55555556 4.44444444
5.33333333 6.22222222 7.11111111 8. ]
z1 = np.zeros(6) # an array with all zeros
z2 = np.zeros(6, dtype=int) # same, but use integers instead of floats
z3 = np.zeros((2, 3)) # two rows and three columns
z4 = np.zeros(4, dtype=bool) # boolean array filled with False
print(z1)
print(z2)
print(z3)
print(z4)
[0. 0. 0. 0. 0. 0.]
[0 0 0 0 0 0]
[[0. 0. 0.]
[0. 0. 0.]]
[False False False False]
o1 = np.ones(6) # an array with all ones
o2 = np.ones((2, 3), dtype=int) # two rows and three columns of integers
o3 = np.ones(4, dtype=bool) # boolean array filled with True
print(o1)
print(o2)
print(o3)
[1. 1. 1. 1. 1. 1.]
[[1 1 1]
[1 1 1]]
[ True True True True]
Array indexing#
Use indexing to get data values from part of an array.
a = np.arange(1, 11)
print(a)
print(a[9]) # get the item at index 9, counting from zero
print(a[:5]) # get the first five items
print(a[-5:]) # get the last five items
[ 1 2 3 4 5 6 7 8 9 10]
10
[1 2 3 4 5]
[ 6 7 8 9 10]
matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
print(matrix)
print(matrix[1]) # get the second row (at index 1)
print(matrix[1, :]) # same, but explicitly indicate all columns included
print(matrix[:, 1]) # get the second column
print(matrix[1, 1]) # get the item in second row, second column
[[1 2 3]
[4 5 6]
[7 8 9]]
[4 5 6]
[4 5 6]
[2 5 8]
5
print(matrix[:2, :2]) # get first two rows and first two columns
print(matrix[-2:, -2:]) # get last two rows and last two columns
[[1 2]
[4 5]]
[[5 6]
[8 9]]
Array functions#
NumPy functions can be used to calculate statistics based on data in arrays.
a = np.array([5, 1, 4, 4, 5, 7, 6, 4, 4, 10])
print(np.sum(a)) # sum over all elements
print(np.mean(a)) # mean value
print(np.std(a)) # standard deviation
50
5.0
2.23606797749979
print(np.min(a)) # minimum value in the array
print(np.max(a)) # maximum value in the array
print(np.median(a)) # median value
1
10
4.5
When analyzing behavioral performance on a task, a common approach is to label each trial as 1 (correct) or 0 (incorrect). Then it is possible to calculate overall accuracy using np.mean.
correct = np.array([0, 1, 1, 0, 1]) # responses coded as 1 or 0
accuracy = np.mean(correct) # mean gives fraction correct
Many other standard mathematical functions are available also.
x = np.array([1, 2, 3, 4])
print(np.sqrt(x)) # square root
print(np.cos(x)) # standard trigonometric functions
print(np.exp(x)) # exponential
print(np.log(x)) # natural logarithm
[1. 1.41421356 1.73205081 2. ]
[ 0.54030231 -0.41614684 -0.9899925 -0.65364362]
[ 2.71828183 7.3890561 20.08553692 54.59815003]
[0. 0.69314718 1.09861229 1.38629436]
Filtering arrays#
Use boolean arrays to get subsets of data within arrays.
data = np.array([3, 4, 2, 7, 6, 5]) # array with data
filter1 = np.array([True, True, True, False, False, False]) # filter array
filter2 = np.array([False, False, False, True, True, True]) # another filter array
print(data)
print(filter1) # this filter is True for the first three elements
print(filter2) # this filter is True for the last three elements
print(data[filter1]) # index using a boolean array to get only True elements
print(data[filter2]) # second filter gets the last half instead
[3 4 2 7 6 5]
[ True True True False False False]
[False False False True True True]
[3 4 2]
[7 6 5]
If we have a data array and another array with condition labels, can use the condition to filter the data.
condition = np.array([1, 1, 1, 2, 2, 2]) # condition code for each trial
print(data)
print(condition)
print(condition == 1) # filter for condition 1
print(condition == 2) # filter for condition 2
print(data[condition == 1]) # get data for condition 1
print(data[condition == 2]) # get data for condition 2
[3 4 2 7 6 5]
[1 1 1 2 2 2]
[ True True True False False False]
[False False False True True True]
[3 4 2]
[7 6 5]
Creating array filters#
Various operations can be used to create an array filter. They work like the conditional expressions in base Python.
Use comparison operators (==, !=, <, >, <=, >=) to compare elements of an array with numbers.
print(condition == 1) # True when condition equals 1
print(condition != 1) # True when condition does not equal 1
print(condition < 2) # True when condition is less than 2
print(condition <= 2) # True when condition is less than or equal to 2
print(condition > 1) # True when condition is greater than 1
print(condition >= 1) # True when condition is greater than or equal to 1
[ True True True False False False]
[False False False True True True]
[ True True True False False False]
[ True True True True True True]
[False False False True True True]
[ True True True True True True]
Use the equal (==) and not equal (!=) operators to compare elements of an array with strings.
labels = np.array(["target", "target", "target", "lure", "lure", "lure"])
print(labels == "target") # True when label is "target"
print(labels != "target") # True when label is not "target"
print(labels == "lure") # True when label is "lure"
print(labels != "lure") # True when label is not "lure"
[ True True True False False False]
[False False False True True True]
[False False False True True True]
[ True True True False False False]
trial_types = ["target", "lure"]
memory_trial_types = ["target"]
print(np.isin(labels, trial_types)) # check if elements are in a list
print(np.isin(labels, memory_trial_types)) # False for elements not in the list
[ True True True True True True]
[ True True True False False False]
Create more complex filters using operators: & (and), | (or), ~ (not). Use parentheses around the individual expressions you are modifying.
correct = np.array([1, 1, 0, 0, 1, 0])
print((labels == "target") & (correct == 0)) # use & to require both filters
print((labels == "target") | (correct == 1)) # use | to use either filter
print(~(correct == 0)) # use ~ to get opposite of filter
[False False True False False False]
[ True True True False True False]
[ True True False False True False]
Filtering and calculating statistics#
Use array filters with array functions to calculate statistics for subsets of your data.
rt = np.array([1.2, 1.4, 1.8, 1.4, 1.3, 1.7, 2.1, 1.6]) # response time
condition = np.array([1, 1, 1, 1, 2, 2, 2, 2]) # condition code
correct = np.array([1, 1, 0, 1, 0, 0, 0, 1]) # correct or not
print(np.mean(correct)) # accuracy (fraction of correct trials)
print(np.mean(correct[condition == 1])) # accuracy for condition 1
print(np.mean(correct[condition == 2])) # accuracy for condition 2
0.5
0.75
0.25
print(np.mean(rt)) # overall mean of RT
print(np.mean(rt[condition == 1])) # condition 1
print(np.mean(rt[condition == 2])) # condition 2
print(np.mean(rt[(condition == 1) & (correct == 1)])) # condition 1 and correct
print(np.mean(rt[(condition == 2) & (correct == 1)])) # condition 2 and correct
1.5625
1.4499999999999997
1.6749999999999998
1.3333333333333333
1.6