Python Scientific Computing Cheat Sheet#

NumPy arrays#

NumPy arrays can be used to store multiple measurements, such as accuracy or response times from a task.

import numpy as np                   # the standard way to import numpy
integers = np.array([1, 2, 3])       # create an array by passing a list to np.array
floats = np.array([1.1, 2.2, 3.3])   # can also store decimals in float data type
letters = np.array(["a", "b", "c"])  # can store string data in an array
print(integers, integers.dtype)      # each array has a data type
print(floats, floats.dtype)          # decimals are stored with float data type
print(letters, letters.dtype)        # string data type indicates text format
[1 2 3] int64
[1.1 2.2 3.3] float64
['a' 'b' 'c'] <U1

Arrays may have different numbers of dimensions.

# create a 1D array from a list
array1d = np.array([1, 2])
print(array1d)
[1 2]
# create a 2D array from a list of lists
array2d = np.array([[1, 2], [3, 4]])
print(array2d)
[[1 2]
 [3 4]]

Arrays have attributes that tell us about their properties. Attributes can be accessed by typing the name of the variable, a dot (.), and then the name of the attribute (for example, size).

print(array2d.size)   # total number of items
print(array2d.ndim)   # number of dimensions
print(array2d.shape)  # size of each dimension
4
2
(2, 2)

Array expressions#

Use mathematical expressions with NumPy arrays to efficiently run calculations.

array1 = np.array([1, 2, 3, 4])
print(array1)
print(array1 + 3)   # add to each of the items
print(array1 - 4)   # subtract from each of the items
print(array1 * 2)   # multiply by a value
print(array1 / 2)   # divide by a value
print(array1 ** 3)  # take values to a power
[1 2 3 4]
[4 5 6 7]
[-3 -2 -1  0]
[2 4 6 8]
[0.5 1.  1.5 2. ]
[ 1  8 27 64]
array2 = np.array([1, 1, 2, 2])
print(array1 + array2)   # add arrays
print(array1 - array2)   # subtract arrays
print(array1 * array2)   # multiple matching elements of arrays
print(array1 / array2)   # divide matching elements of arrays
print(array1 ** array2)  # take array1 items to the power of array2 items
[2 3 5 6]
[0 1 1 2]
[1 2 6 8]
[1.  2.  1.5 2. ]
[ 1  2  9 16]
print(array1 @ array2)  # calculate the dot product between arrays
17

Array creation functions#

Use array creation functions to create arrays with a range of values or all zeros or ones.

r1 = np.arange(1, 11)       # 1 to 11 (non inclusive), with a step of 1
r2 = np.arange(1, 11, 0.5)  # 1 to 11 (non inclusive), with a step of 0.5
print(r1)
print(r2)
[ 1  2  3  4  5  6  7  8  9 10]
[ 1.   1.5  2.   2.5  3.   3.5  4.   4.5  5.   5.5  6.   6.5  7.   7.5
  8.   8.5  9.   9.5 10.  10.5]
l1 = np.linspace(0, 8, 5)   # 5 linearly spaced values from 0 to 8
l2 = np.linspace(0, 8, 10)  # 10 linearly spaced values from 0 to 8
print(l1)
print(l2)
[0. 2. 4. 6. 8.]
[0.         0.88888889 1.77777778 2.66666667 3.55555556 4.44444444
 5.33333333 6.22222222 7.11111111 8.        ]
z1 = np.zeros(6)              # an array with all zeros
z2 = np.zeros(6, dtype=int)   # same, but use integers instead of floats
z3 = np.zeros((2, 3))         # two rows and three columns
z4 = np.zeros(4, dtype=bool)  # boolean array filled with False
print(z1)
print(z2)
print(z3)
print(z4)
[0. 0. 0. 0. 0. 0.]
[0 0 0 0 0 0]
[[0. 0. 0.]
 [0. 0. 0.]]
[False False False False]
o1 = np.ones(6)                  # an array with all ones
o2 = np.ones((2, 3), dtype=int)  # two rows and three columns of integers
o3 = np.ones(4, dtype=bool)      # boolean array filled with True
print(o1)
print(o2)
print(o3)
[1. 1. 1. 1. 1. 1.]
[[1 1 1]
 [1 1 1]]
[ True  True  True  True]

Array indexing#

Use indexing to get data values from part of an array.

a = np.arange(1, 11)
print(a)
print(a[9])    # get the item at index 9, counting from zero
print(a[:5])   # get the first five items
print(a[-5:])  # get the last five items
[ 1  2  3  4  5  6  7  8  9 10]
10
[1 2 3 4 5]
[ 6  7  8  9 10]
matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
print(matrix)
print(matrix[1])     # get the second row (at index 1)
print(matrix[1, :])  # same, but explicitly indicate all columns included
print(matrix[:, 1])  # get the second column
print(matrix[1, 1])  # get the item in second row, second column
[[1 2 3]
 [4 5 6]
 [7 8 9]]
[4 5 6]
[4 5 6]
[2 5 8]
5
print(matrix[:2, :2])    # get first two rows and first two columns
print(matrix[-2:, -2:])  # get last two rows and last two columns
[[1 2]
 [4 5]]
[[5 6]
 [8 9]]

Array functions#

NumPy functions can be used to calculate statistics based on data in arrays.

a = np.array([5, 1, 4, 4, 5, 7, 6, 4, 4, 10])
print(np.sum(a))     # sum over all elements
print(np.mean(a))    # mean value
print(np.std(a))     # standard deviation
50
5.0
2.23606797749979
print(np.min(a))     # minimum value in the array
print(np.max(a))     # maximum value in the array
print(np.median(a))  # median value
1
10
4.5

When analyzing behavioral performance on a task, a common approach is to label each trial as 1 (correct) or 0 (incorrect). Then it is possible to calculate overall accuracy using np.mean.

correct = np.array([0, 1, 1, 0, 1])  # responses coded as 1 or 0
accuracy = np.mean(correct)          # mean gives fraction correct

Many other standard mathematical functions are available also.

x = np.array([1, 2, 3, 4])
print(np.sqrt(x))  # square root
print(np.cos(x))   # standard trigonometric functions
print(np.exp(x))   # exponential
print(np.log(x))   # natural logarithm
[1.         1.41421356 1.73205081 2.        ]
[ 0.54030231 -0.41614684 -0.9899925  -0.65364362]
[ 2.71828183  7.3890561  20.08553692 54.59815003]
[0.         0.69314718 1.09861229 1.38629436]

Filtering arrays#

Use boolean arrays to get subsets of data within arrays.

data = np.array([3, 4, 2, 7, 6, 5])                          # array with data
filter1 = np.array([True, True, True, False, False, False])  # filter array
filter2 = np.array([False, False, False, True, True, True])  # another filter array
print(data)
print(filter1)        # this filter is True for the first three elements
print(filter2)        # this filter is True for the last three elements
print(data[filter1])  # index using a boolean array to get only True elements
print(data[filter2])  # second filter gets the last half instead
[3 4 2 7 6 5]
[ True  True  True False False False]
[False False False  True  True  True]
[3 4 2]
[7 6 5]

If we have a data array and another array with condition labels, can use the condition to filter the data.

condition = np.array([1, 1, 1, 2, 2, 2])  # condition code for each trial
print(data)
print(condition)
print(condition == 1)        # filter for condition 1
print(condition == 2)        # filter for condition 2
print(data[condition == 1])  # get data for condition 1
print(data[condition == 2])  # get data for condition 2
[3 4 2 7 6 5]
[1 1 1 2 2 2]
[ True  True  True False False False]
[False False False  True  True  True]
[3 4 2]
[7 6 5]

Creating array filters#

Various operations can be used to create an array filter. They work like the conditional expressions in base Python.

Use comparison operators (==, !=, <, >, <=, >=) to compare elements of an array with numbers.

print(condition == 1)  # True when condition equals 1
print(condition != 1)  # True when condition does not equal 1
print(condition < 2)   # True when condition is less than 2
print(condition <= 2)  # True when condition is less than or equal to 2
print(condition > 1)   # True when condition is greater than 1
print(condition >= 1)  # True when condition is greater than or equal to 1
[ True  True  True False False False]
[False False False  True  True  True]
[ True  True  True False False False]
[ True  True  True  True  True  True]
[False False False  True  True  True]
[ True  True  True  True  True  True]

Use the equal (==) and not equal (!=) operators to compare elements of an array with strings.

labels = np.array(["target", "target", "target", "lure", "lure", "lure"])
print(labels == "target")  # True when label is "target"
print(labels != "target")  # True when label is not "target"
print(labels == "lure")    # True when label is "lure"
print(labels != "lure")    # True when label is not "lure"
[ True  True  True False False False]
[False False False  True  True  True]
[False False False  True  True  True]
[ True  True  True False False False]
trial_types = ["target", "lure"]
memory_trial_types = ["target"]
print(np.isin(labels, trial_types))         # check if elements are in a list
print(np.isin(labels, memory_trial_types))  # False for elements not in the list
[ True  True  True  True  True  True]
[ True  True  True False False False]

Create more complex filters using operators: & (and), | (or), ~ (not). Use parentheses around the individual expressions you are modifying.

correct = np.array([1, 1, 0, 0, 1, 0])
print((labels == "target") & (correct == 0))  # use & to require both filters
print((labels == "target") | (correct == 1))  # use | to use either filter
print(~(correct == 0))                        # use ~ to get opposite of filter
[False False  True False False False]
[ True  True  True False  True False]
[ True  True False False  True False]

Filtering and calculating statistics#

Use array filters with array functions to calculate statistics for subsets of your data.

rt = np.array([1.2, 1.4, 1.8, 1.4, 1.3, 1.7, 2.1, 1.6])  # response time
condition = np.array([1, 1, 1, 1, 2, 2, 2, 2])           # condition code
correct = np.array([1, 1, 0, 1, 0, 0, 0, 1])             # correct or not
print(np.mean(correct))                  # accuracy (fraction of correct trials)
print(np.mean(correct[condition == 1]))  # accuracy for condition 1
print(np.mean(correct[condition == 2]))  # accuracy for condition 2
0.5
0.75
0.25
print(np.mean(rt))                                     # overall mean of RT
print(np.mean(rt[condition == 1]))                     # condition 1
print(np.mean(rt[condition == 2]))                     # condition 2
print(np.mean(rt[(condition == 1) & (correct == 1)]))  # condition 1 and correct
print(np.mean(rt[(condition == 2) & (correct == 1)]))  # condition 2 and correct
1.5625
1.4499999999999997
1.6749999999999998
1.3333333333333333
1.6