print("Hello, World!")
print(100)
print('Word', 10)
Operator | Syntax | Description |
---|---|---|
+ | x + y | Addition |
- | x - y | Subtraction |
* | x * y | Multiplication |
/ | x / y | Division (float) |
// | x // y | Division (floor) |
** | x ** y | Exponent |
% | x % y | Modulus |
print(5 + 2.5) # 7.5
print(3 - 1.5) # 1.5
print(12 * 3) # 36
print(9 / 2) # 4.5
print(9 // 2) # 4
print(4 ** 2) # 16
print(10 % 4) # 2
Like in math, these arithmetic operators have precedence, which we can alter using parentheses.
print(5 + 4 * 3) # 5 + 12 = 17
print((5 + 4) * 3) # 9 * 3 = 27
Operator | Syntax | Description | |
---|---|---|---|
> | x > y | True if x is greater than y | |
< | x < y | True if x is less than y | |
== | x == y | True if x is equal to y | |
!= | x != y | True if x is not equal to y | |
>= | x >= y | True if x > y or x == y | |
<= | x <= y | True if x < y or x == y |
print(15 > 10) # True
print(4 < 3) # False
print(5 == 9) # False
print(5 != 9) # True
print(100 >= 100) # True
print(20 <= 10) # False
We can also compare strings:
print("Hello" == "Hello") # True
print("string" != "String") # True
print("a" > "z") # False
Characters are ordered by their ASCII values.
Operator | Syntax | Description | |
---|---|---|---|
and | x and y | True if both x and y are true | |
or | x or y | True if either x or y is true | |
not | not x | True if x is false |
# and
print(5 > 4 and 10 <= 10) # True
print(4 == 5 and 22.5 > 12) # False
print()
# or
print(4 == 5 or 22.5 > 12) # True
print(not True or 9 // 2 == 4.5) # False
print()
# not
print(not False) # True
print(not 32 > 8) # False
variable = some_value
variable_1 = "This a variable"
variable_2 = 50
variable_3 = False
print(variable_1)
print(variable_2)
print(variable_3)
x = 4
y = 2
print(x * y) # 8
print(x <= y)
print()
word_1 = "Hello"
word_2 = "World"
sentence = word_1 + word_2 # String concatenation
print(sentence)
integer = 7
decimal = -99.9
# Use the type() function to check the type of a variable
print(type(integer))
print(type(decimal))
string_1 = 'Hello, World!' # single quotes
string_2 = "Hello, World!" # double quotes
print(string_1 == string_2)
Strings can also contain numbers and other characters:
binary_string = '0111001010'
mixed_string = 'a1b2c3*d$4'
print(type(binary_string))
number_int = 23
number_str = str(23)
print(number_str)
print(type(number_str))
float_var = 3.14
int_var = int(float_var)
print(int_var)
print(type(int_var))
pi_string = '3.14'
pi_float = float(pi_string)
print(pi_float)
print(type(pi_float))
string_1 = 'Hello, World!'
print(string_1.lower()) # hello, world!
print(string_1.upper()) # HELLO, WORLD!
print(len(string_1))
str.replace(old, new)
string = 'Hi, everybody'
print(string.replace('Hi', 'Bye'))
str.startswith(value)
dna = 'GTCAGTTAACGTACGTTA'
greeting = 'Hello, World!'
print(dna.startswith('G'))
print(greeting.startswith('Hello'))
print(dna.startswith('T'))
rna = 'ACUGGCCUUUACGUGCCC'
string = 'genetics'
print(rna.endswith('CCC'))
print(string.endswith('s'))
print(string.endswith('g'))
W | O | R | D |
---|---|---|---|
0 | 1 | 2 | 3 |
D --> Index 3
We can access specific characters in a string using their index numbers.
some_string[index]
string = 'Python'
first_char = string[0]
third_char = string[2]
print(first_char)
print(third_char)
string = 'Summer'
print(string[-1]) # Prints the last character
print(string[-2])
substring = string[start_idx : end_idx]
Note: Returns everything from start_idx up to, but not including, the character at the end_idx position.
x = '012345'
print(x[0:4]) # 0123
string = "Bioinformatics"
substring_1 = string[0:3] # Bio
print(substring_1)
substring_2 = string[3:] # Informatics
print(substring_2)
string[:5] == string[0:5]
college = 'Hunter College'
print(college[:6])
print(college[7:])
my_list = [1,5,'String']
alpha_list = ['a', 'b', 'c', 'd']
num_list = [2, 5, 22, 9]
mixed_list = ['a', 1, 'b', 90.99, True, 4==5, 20 % 6, True or False, num_list]
print(alpha_list)
print(num_list)
print(mixed_list)
first_element = my_list[0]
second_element = my_list[1]
last_element = my_list[-1]
num_list = [2, 5, 22, 9]
second_num = num_list[1] # 5
last_num = num_list[-1] # 9
print(second_num + last_num) # 14
subset = my_list[start_idx : end_idx]
subset = my_list[2:8]
Note: Returns everything from start_idx up to, but not including, the element at the end_idx position.
num_list = [0,1,2,3,4,5,6,7,8,9]
print(num_list[:7])
fruits = ['apple', 'banana', 'orange']
print(fruits)
fruits[1] = 'pineapple'
print(fruits)
Note: We are not able to do the same with strings because they are immutable (can't be changed).
Code below would not run and would raise an error:
string = 'apple'
string[0] = 'e'
numbers = [-20, -10, 0, 10, 20]
print(len(numbers))
print(max(numbers))
print(min(numbers))
list.append(item)
fruits = ['apple', 'banana', 'orange']
print(fruits)
fruits.append('pineapple')
print(fruits)
fruits.append('pear')
print(fruits)
list.insert(position, item)
print(fruits)
fruits.insert(1, 'peach')
print(fruits)
print(fruits[1])
berries = ['strawberry', 'blueberry']
fruits_and_berries = fruits + berries
print(fruits_and_berries)
There are several methods to remove elements from a list.
list.remove( ) removes a specific element.
list.remove(item)
fruits = ['apple', 'banana', 'orange', 'peach']
print(fruits)
fruits.remove('orange')
print(fruits)
list.pop(index)
print(fruits)
fruits.pop()
print(fruits)
nested_list = [[1.0, 2.1, 3.2], ['a', 'b', 'c'], [True, False, False]]
nested_list = [[1.0, 2.1, 3.2], ['a', 'b', 'c'], [True, False, False]]
print(nested_list[0][0]) # Prints 1.0
print(nested_list[1][2]) # Prints 'c'
Like other programming languages, Python uses control flow statements that alter sequential flow of the program.
The most well-known control flow statement is the if statement.
Some code
if condition:
Some block of code
More code
num_list = [1,1,2,3,5,8,13,21]
num = num_list[5] # 8
if num % 2 == 0:
print('even')
num2 = num_list[0] # 1
if num2 % 2 == 0:
print('even') # Should give no output
num = -5
if num > 0:
print('positive')
else:
print('negative')
if condition:
#Body of if
elif some other condition:
#Body of elif
else:
#Body of else
num = 0
if num > 0:
print('positive')
elif num < 0:
print('negative')
else:
print('zero')
for item in some_sequence:
block of code
# Double every number in the list
numbers = [1, 1, 2, 3, 5, 8, 13, 21, 34, 55]
for num in numbers:
print(num * 2)
# Iterate over a range of numbers
# Extract even numbers and append them to a new list
numbers = range(100) # Creates a sequence of numbers from 0 to 99
evens = [] # New empty list to store even numbers
for num in numbers: # Goes through each number in the sequence
if num % 2 == 0:
evens.append(num)
print(evens)
# Iterate over a string and print a new string that has all the vowels removed
vowels = ['a', 'e', 'i', 'o', 'u'] # Vowel list
string = 'The quICk brOwn Fox jumps OveR thE laZy Dog' # Some characters are in uppercase
new_string = '' # Create an empty string
for character in string: # Go through every character in the string
if character.lower() not in vowels: # Check if the character is NOT a vowel
new_string = new_string + character
print(new_string)
for number in range(10): # Numbers 0 - 9
print(number)
if number == 5: # Exits the loop if number is equal to 5
break
for number in range(10):
if number % 2 == 0:
continue
print(number)
even_numbers = []
for number in range(100):
if number % 2 == 0:
even_numbers.append(number)
even_numbers = [number for number in range(100) if number % 2 == 0]
Both codes result in same output.
Syntax:
new_list = [expression for variable in some_iterable]
# Squares
sequence = [1,2,3,4,5,6]
squares = [num ** 2 for num in sequence]
print(squares)
string.split(separator)
If a separator argument is not provided, the string is split on whitespace.
sentence = 'This is a sentence.'
words = sentence.split()
print(words)
college_string = 'CUNY$Hunter$College'
college_list = college_string.split('$')
print(college_list)
joined_string = str.join(sequence)
If the sequence contains any non-string values, Python raises an error.
month_lst = ['June', 'July', 'August']
separator = '*'
month_str = separator.join(month_lst)
print(month_str)
chars = ['a', 'b', 'c', 'd']
string = ''.join(chars)
print(string)
numbers = [42, 1, 6, 2, 0]
print(len(numbers)) # Prints 5
numbers = [42, 1, 6, 2, 0]
length = 0 # Initialize a variable with a value of 0
for item in numbers: # Loop through the list
length += 1 # Increment the length variable by 1 for each item in the list
print(length)
Syntax
def function_name(parameters):
some statements
return some value
def my_len(sequence):
length = 0
for item in sequence:
length += 1
return length
# Test out the my_len() function
def my_len(sequence):
length = 0
for item in sequence:
length += 1
return length
my_list = ['a', 'b', 'c', 'd']
print(len(my_list)) # len()
print(my_len(my_list)) # my_len()
# A function that takes in a list of numbers as its argument and returns the sum of its values
def my_sum(list_of_numbers):
total = 0
for number in list_of_numbers:
total += number
return total
total = my_sum([2, 4, 6, 8, 10])
print(total)
# A function that takes in two numbers as its arguments: base and power
# The function should return the base raised to the given power
def power(base, power):
return base ** power
print(power(2, 2))
print(power(2, 5))
print(power(5, 6))
# Convert each number to a string
num_list = [8, 65, 32, 9, 100]
string_list = map(str, num_list)
print(list(string_list))
lambda arguments: expression
num_list = [1, 4, 8, 20, 45, 24, 56]
# Double every value in the num_list
doubled = map(lambda x: x * 2, num_list)
print(list(doubled))
my_dictionary = {key_1: value_1, key_2: value_2, key_3: value_3}
student_grades = {'English': 80,
'Physics': 85,
'Biology': 92
}
print(student_grades)
value = dictionary[key]
student_grades = {'English': 80,
'Physics': 85,
'Biology': 92
}
# Get Biology grade
print(student_grades['Biology'])
capitals = {'France': 'Paris', 'Italy': 'Rome', 'Germany': 'Berlin', 'Spain': 'Madrid'}
print(capitals['Italy'])
print(capitals['Spain'])
Changing a value:
student_grades = {'English': 80,
'Physics': 85,
'Biology': 92
}
# Change English grade
student_grades['English'] = 95
Adding a new key-value pair:
student_grades = {'English': 95,
'Physics': 85,
'Biology': 92
}
# Add a new subject and a grade
student_grades['History'] = 90
Updated dictionary should look like this:
student_grades = {'English': 95,
'Physics': 85,
'Biology': 92,
'History': 90
}
fruit_dict = {'apple': 3, 'orange': 5, 'pear': 3}
print(fruit_dict)
fruit_dict['apple'] = 5
fruit_dict['orange'] = 2
print(fruit_dict)
fruit_dict['banana'] = 2
print(fruit_dict)
dict.pop(key)
fruit_dict = {'apple': 3, 'orange': 5, 'banana': 3}
print(fruit_dict)
fruit_dict.pop('orange') # Removes orange
print(fruit_dict)
student_grades = {'English': 85, 'Physics': 90, 'Biology': 92, 'History': 99, 'Calculus': 91}
print(student_grades.keys())
key_list = list(student_grades.keys())
print(key_list)
print(student_grades.values()) # Print as a view
print(list(student_grades.values())) # Print as a list
print(student_grades.items())
print(list(student_grades.items()))
for subject in student_grades:
print(subject)
for subject, grade in student_grades.items():
print(subject, grade)
# A function that counts the frequency of each character in a string
def count_chars(string):
char_count = {} # Initialize an empty dictionary
for char in string:
if char in char_count: # Check if the character is already in the dictionary
char_count[char] += 1 # Increment the value by 1 if the character is already in the dict
else:
char_count[char] = 1 # If the character is not in the dictionary yet, set the value to 1
return char_count # Return the character count dictionary
dna = 'cccggtcggccgacaacaggtcgattcataatatt'
print(count_chars(dna))
Modules are files containing Python definitions and statements that are made to use in other Python programs.
Python has many built-in modules as part of its standard library.
import module
module.function()
import math # Import the module we wish to use
number = 4
number_factorial = math.factorial(number) # Use the math.factorial() function from the math module
print(number_factorial)
print(math.sqrt(16)) # Square root
from module import function
function(some_variable)
from statistics import mean, stdev
data = [86, 65, 90, 100, 72, 89, 52]
print('Mean: ', mean(data))
print('Standard deviation: ', stdev(data))
import numpy as np
np.array([1,2,3,4])
import numpy as np
data_list = [1,2,3,4,5]
data_array = np.array(data_list)
print('List: ', data_list)
print('NumPy array: ', data_array)
Creating a 2 dimensional array:
nested_list = [[1,2,3], [4,5,6], [7,8,9]]
array = np.array(nested_list)
print(array)
ndarray.shape
print(array.shape) # Prints (number of rows, number of columns)
ndarray.size
print(array.size)
ndarray.ndim
print(array.ndim)
x = np.array( [10, 20, 30, 40] )
y = np.array( [1, 2, 3, 4] )
# Addition
print(x + y)
# Subtraction
print(x - y)
# Multiplication by a constant
print(x * 3)
print(y * 2)
array1d = np.array([10, 20, 30, 40, 50])
print(array1d[0])
print(array1d[-1])
print(array1d[2])
array2d = np.array([[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15], [16, 17, 18, 19]])
print(array2d)
second_row = array2d[1]
print(second_row)
array2d[start_index:end_index]
Produces a 2D ndarray
print(array2d[2:])
print()
print(array2d[1:3]) # From index 1 up to but not including index 3
array2d[row, column]
Produces a single Python object
print(array2d[4, 1])
array[:, column]
Produces a 1D ndarray
print(array2d)
print()
print(array2d[:,2])
array2d[:, start_col:end_col]
Produces a 2D ndarray
print(array2d)
print()
print(array2d[:, 0:2])
arr2d[:,[columns]] # Pass in a list of column indexes
Produces a 2D ndarray
cols = [0,2,3]
print(array2d[:, cols])
print(np.array( [2, 20, 6, 10, 8] ) < 10)
ndarray[boolean_array]
# Create a new array containing positive values
my_array = np.array( [-2, -154, 62, 0, -843, 200, 478] )
bool_array = my_array > 0
print(bool_array)
filtered = my_array[bool_array]
print(filtered)
print(my_array[my_array > 0]) # shortcut
We can also do this with 2D arrays:
array2d = np.array([[10, 100, 1000], [20, 200, 2000], [30, 300, 3000]])
print(array2d[ array2d < 1000 ])
Series
DataFrame
import pandas as pd
pd.Series() # Empty series object
import pandas as pd
empty_series = pd.Series()
print(empty_series)
pd.Series(data)
data_series_1 = pd.Series(['a', 'b', 'c', 'd'])
print(data_series_1)
dict_series = pd.Series({'a': 1, 'b': 2, 'c': 3})
print(dict_series)
pd.Series(data, index)
data = ['a', 'b', 'c', 'd']
data_series_2 = pd.Series(data, index = ['A', 'B', 'C', 'D'])
print(data_series_2)
pd.DataFrame() # Empty DataFrame object
empty_df = pd.DataFrame()
print(empty_df)
pd.DataFrame(data, index, columns, dtype)
data = [['Biology', 89], ['Physics', 94], ['English', 85], ['History', 100]]
df = pd.DataFrame(data, columns = ['Subject', 'Grade'], dtype = float)
print(df)
df
pd.read_csv(filepath, sep=',', header = 'infer', names = None, index_col = None)
data = pd.read_csv('iris.csv')
data.head() # Displays the first 5 lines of data
data.head(2) # Display first 2 rows
data.tail(3) # Last 3 rows
print(data.shape) # (number of rows, number of columns)
print(data.index)
print()
print(data.columns)
print(data.info())
print(data['petal.length'].head(10)) # Returns pandas Series
Explicit syntax
DataFrame.loc[:, ['col_1', 'col_2']]
Shorthand syntax
DataFrame[ ['col_1', 'col_2'] ]
print(data[['sepal.width', 'variety']].head(10))
DataFrame.loc[:, 'col_1':'col_5']
print(data.loc[:, 'petal.length': 'variety'].head())
# Because the original DataFrame used integers as row labels, I copied the first 5 rows and relabeled them
# for illustration
index_labels = ['a', 'b', 'c', 'd', 'e']
data2 = data.head().copy()
data2.index = index_labels
data2
DataFrame.loc[row_label]
print(data2.loc['b'])
DataFrame.loc[ [row_1, row_2] ]
print(data2.loc[['a', 'c', 'e']])
DataFrame.loc[row_1:row_2]
DataFrame[row1:row2]
print(data2['b':'d'])
DataFrame.loc[row_label, column_label]
print(data2.loc['b', 'variety'])
DataFrame.iloc[row_index]
print(data.head())
print()
print(data.iloc[1])
DataFrame.iloc[row_1:row_n]
print(data.iloc[0:4])
DataFrame.iloc[:, column]
data.iloc[:, 2].head()
DataFrame.iloc[row_index, col_index]
print(data.iloc[1,3]) # Row index = 1, Column index = 3
Things to remember:
print(data['variety'].value_counts())
print(data['sepal.length'].max())
print(data['sepal.length'].min())
print(data['petal.length'].mean())
print(data['petal.width'].median())
print(data['petal.length'].describe())
print(data[['sepal.length', 'sepal.width']].mean(axis = 0)) # calculate mean values of columns
DataFrame.apply(function, axis = 0)
# Apply sum function to specified columns
data[['sepal.width', 'petal.width']].apply(sum, axis = 0) # Adds values along rows
data.loc[:,'sepal.length':'petal.width'].apply(sum, axis = 1).head() # Adds values along each column
# Using lambda (anonymous) function
data[['sepal.width', 'petal.width']].apply(lambda x: x / 2, axis = 0 ).head() # Double the values
It can generate scatter plots, histograms, box plots, bar charts, etc.
To import the plotting module we use:
import matplotlib.pyplot as plt
plt.plot(x, y)
plt.show()
#Embed figures within the notebook
%matplotlib inline
import matplotlib.pyplot as plt
x = [1,2,3,4,5,6]
y = [1,4,9,16,25,36]
plt.plot(x, y)
plt.show()
plt.title(title_string) # Plot title
plt.xlabel(x_label_string) # x axis label
plt.ylabel(y_label_string) # y axis label
x = [1,2,3,4,5,6]
y = [1,4,9,16,25,36]
plt.plot(x, y)
plt.title('Example Line Chart')
plt.xlabel('Numbers')
plt.ylabel('Squares')
plt.show()
x_1 = [0,1,2,3,4,5,6,7]
y_1 = [0,1,4,9,16,25,36,49]
y_2 = [0,3,6,9,12,15,18,21]
plt.plot(x_1, y_1, c = 'blue', label = 'squared')
plt.plot(x_1, y_2, c = 'green', label = 'tripled')
plt.title('Example 2')
plt.xlabel('x axis')
plt.ylabel('y axis')
plt.legend() # Add legend
plt.show()
fig = plt.figure()
ax1 = fig.add_subplot(2, 1, 1)
ax2 = fig.add_subplot(2, 1, 2)
Syntax explanation:
ax = fig.add_subplots(number_of_rows, number_of_columns, plot_index)
fig = plt.figure()
ax1 = fig.add_subplot(2, 1, 1)
ax2 = fig.add_subplot(2, 1, 2)
# 2 rows and 1 column
fig = plt.figure(figsize = (10,5)) # Create a figure object
ax1 = fig.add_subplot(2, 1, 1) # Plot 1
ax2 = fig.add_subplot(2, 1, 2) # Plot 2
ax1.plot(x_1, y_1, c = 'blue')
ax1.title.set_text('Figure 1')
ax2.plot(x_1, y_2, c = 'red')
ax2.title.set_text('Figure 2')
plt.show()
# 1 row and 2 columns
fig = plt.figure(figsize = (12,5)) # Create a figure object
ax1 = fig.add_subplot(1, 2, 1) # Plot 1
ax2 = fig.add_subplot(1, 2, 2) # Plot 2
ax1.plot(x_1, y_1, c = 'blue')
ax1.title.set_text('Figure 1')
ax2.plot(x_1, y_2, c = 'red')
ax2.title.set_text('Figure 2')
plt.show()
data.head()
plt.boxplot(data['petal.length'])
plt.title('Boxplot example')
plt.show()
fig = plt.figure(figsize = (15, 10))
ax1 = fig.add_subplot(2,2,1)
ax2 = fig.add_subplot(2,2,2)
ax3 = fig.add_subplot(2,2,3)
ax4 = fig.add_subplot(2,2,4)
ax1.hist(data['sepal.length'])
ax1.title.set_text('Sepal Length')
ax2.hist(data['petal.length'])
ax2.title.set_text('Petal Length')
ax3.hist(data['sepal.width'])
ax3.title.set_text('Sepal Width')
ax4.hist(data['petal.width'])
ax4.title.set_text('Petal Width')
plt.show()
plt.style.use('ggplot')
plt.style.use('ggplot')
fig = plt.figure(figsize = (15, 10))
ax1 = fig.add_subplot(2,2,1)
ax2 = fig.add_subplot(2,2,2)
ax3 = fig.add_subplot(2,2,3)
ax4 = fig.add_subplot(2,2,4)
ax1.hist(data['sepal.length'])
ax1.title.set_text('Sepal Length')
ax2.hist(data['petal.length'])
ax2.title.set_text('Petal Length')
ax3.hist(data['sepal.width'])
ax3.title.set_text('Sepal Width')
ax4.hist(data['petal.width'])
ax4.title.set_text('Petal Width')
plt.show()