Python — Sorting

Source: Deep Learning on Medium


Go to the profile of Develop It

Insertion
Selection
Bubble
Merge
Quick

#Tim Sort is the build-in sorting algorithm in python.
#It uses a hybrid of merge sort and insertion sort.
####Performace of Tim Sort is Quite a lot Fast.

#If the input is randomly ordered with or without duplicates (10,000 values)
#() shows near about same
#Tim > (Quick > Merge) > (Insertion > Selection) > Bubble
#with many duplicates if there are 1,000,000 values then quick sort fails because of stack overflow due to too many recursive calls.

#If the input is Already sorted. (10,000 values)
#Tim > Bubble > Insertion > Quick > Merge > Selection

#In the Insertion sort and Bubble sort there are checks, so they work well if we have a sorted or mostly sorted lists. Lists are in general mostly sorted.

#If the input is Reverse sorted. (10,000 values)
#Tim > Quick > Merge >Selection > Insertion > Bubble

#You should understand the use cases before choosing the best sorting algorithm.
#Before choosing an algorithm for you job, you should check out that what your users are going to use it for.

# — — — — — — — — — — — — — — — — — — — — — — —

#Insertion Sort
#It is not a fast sorting algorithm because it uses nested loops.
#It is useful only for small data sets less than 10,000 records.
#O(n²)
import numpy as np
b = np.array([3, 5, 2, 1, 7, 4 ])

#By Swaping Numbers

def insertion_sort_for(a):
 for i in range(1, len(a)):
 for j in range(i-1, -1, -1):
 if a[j+1] < a[j]:
 a[j], a[j+1] = a[j+1], a[j]
 else:
 break
 print(a)

def insertion_sort_while(a):
 i = 1
 while i < len(a):
 j = i — 1
 while a[j+1] < a[j] and j >= 0:
 a[j+1], a[j] = a[j], a[j+1]
 j -= 1
 i += 1 
 print(a)

print(‘b = ‘, b)
insertion_sort_for(b)

c = np.array([3, 5, 2, 1, 7, 4])
print(‘c = ‘, c)
insertion_sort_while(c)

#By Shifting Numbers and saving the number to which the other number is shifted in curNum
#Shifting is twice as Faster than swaping

def insertion_sort_for_shift(a):
 for i in range(1, len(a)):
 curNum = a[i]
 for j in range(i-1, -1, -1):
 if curNum < a[j]:
 a[j+1] = a[j]
 if j==0:
 a[j] = curNum
 else:
 a[j+1] = curNum
 break
 print(a)

d = np.array([3, 5, 2, 1, 7, 4])
print(‘d = ‘, d)
insertion_sort_for_shift(d)

# — — — — — — — — — — — — — — — — — — — — — — –

#Selection Sort
#It is not a fast sorting algorithm because it uses nested loops.
#It is useful only for small data sets less than 10,000 records.
#O(n²)

#Using Swap.
def Selection_sort_for(a):
 for i in range(0, len(a) — 1):
 for j in range(i+1, len(a)):
 if a[i] > a[j]:
 a[i], a[j] = a[j], a[i]
 print(a)

def Selection_sort_while(a):
 i = 0
 while i < len(a) — 1:
 j = i + 1
 while j < len(a):
 if a[j] < a[i]:
 a[i], a[j] = a[j], a[i]
 j += 1
 i += 1
 print(a)

#Using Min_value and Swap
def Selection_sort_for_min_value(a):
 for i in range(0, len(a) — 1):
 Min_value = a[i]
 for j in range(i+1, len(a)):
 if Min_value >= a[j]:
 Min_value = a[j]
 k = j
 if a[i] != Min_value:
 a[k], a[i] = a[i], Min_value
 print(a)

e = np.array([3, 5, 2, 1, 7, 4, 1, 3])
print(‘e = ‘, e)
Selection_sort_for(e)

f = np.array([3, 5, 2, 1, 7, 4, 1, 3])
print(‘f = ‘, f)
Selection_sort_while(f)

g = np.array([3, 5, 2, 1, 7, 4, 1, 3])
print(‘g = ‘, g)
Selection_sort_for_min_value(g)

# — — — — — — — — — — — — — — — — — — — — — — –

#Bubble Sort
#It is not a fast sorting algorithm because it uses nested loops.
#It is useful only for small data sets less than 10,000 records.
#O(n²)

def Bubble_sort_for(a):
 for i in range(0, len(a) — 1):
 for j in range(0, len(a) -1 — i):
 if a[j+1] < a[j]:
 a[j+1], a[j] = a[j], a[j+1]
 print(a)

#def Bubble_sort_while(a):

h = np.array([3, 5, 2, 1, 7, 4, 1, 3])
print(‘h = ‘, h)
Bubble_sort_for(h)

# — — — — — — — — — — — — — — — — — — — — — — –

#Merge Sort
#It is Recursive as it is a method which calls itself.
#It is also known as Divide-and-Conquer algorithm.
#Very Efficient for large datasets.
#O(nlogn)
#Merge Sort does log n merge steps because merge step doubles the list size.
#Eg: 8 -> 4 -> 2 -> 1 log(8) = 3 steps.
#It does n work for each merge step because it must look at every item.
#n + n + n = 3n => nlogn = n(3) = 3n

def merge_sort(a):
 merge_sort_split_2(a, 0, len(a) — 1)
 print(a)

def merge_sort_split_2(a, first, last):
 if first < last:
 middle = (first + last)//2
 merge_sort_split_2(a, first, middle)
 merge_sort_split_2(a, middle + 1, last)
 merge(a, first, middle, last)

def merge(a, first, middle, last):
 L = a[first:middle+1]
 R = a[middle+1:last+1]
 L.append(999999999) #Use length instead to know the end of the list.
 R.append(999999999)
 i = j = 0
 for k in range(first, last + 1):
 if L[i] <= R[j]:
 a[k] = L[i]
 i += 1
 else:
 a[k] = R[j]
 j += 1

i = [3, 5, 2, 1, 7, 4, 9, 6]
print(‘i = ‘, i)
merge_sort(i)

# — — — — — — — — — — — — — — — — — — — — — — –

#Quick Sort

#It is Recursive as it is a method which calls itself.
#It is also known as Divide-and-Conquer algorithm.
#Very Efficient for large datasets.
#Worst case: O(n²) Performance depends largely on pivot selection.
#Average case: O(nlogn)

#Pivot is the item we use for Comparing with other items.
#The items lower than pivot are put on the left side of pivot and the items larger than pivot are put on the right side of pivot.

#Selecting Pivot is the Key to Quick Sort.
#Often we select the first item for pivot but that’s not always the best choice.
#Sometimes the last item is choosen as pivot but that’s not always the best choice.
#Middle item is not a bad choice for pivot.
#We can also take the median of first, middle and last item
#We can also choose the pivot randomly to ensure O(nlogn) performance.

#Median of first, middle and last item.
#Bring your median to first position by swapping it with the value at first position.

#With Border we are refering to the index.
#Now we will consider a border value such that everything to left of the border is smaller than the pivot.
#After that we go on comparing this border value with the value at its right and then what we do is we swap those two values if we find the border value greater than the next value.
#Now we will get a new border and we move on until we get a border where all the left values are smaller than the pivot.

#17, 41, 5, 22, 54, 6, 29, 3, 13
#i) Choose pivot as the median of 17, 54, 13
#ii) Now put the 17 which is the median at the zeroth index. As it is already present there then that’s ok.
#iii) Now take the border as the first index 41 and swap it with 5 as it is <17.
#17, 5, 41, 22, 54, 6, 29, 3, 13
#iv) Now take the border as the second index 41 and swap it with 6 as it is <17.
#17, 5, 6, 22, 54, 41, 29, 3, 13
#v) Now take the border as the third index 22 and swap it with 3 as it is <17.
#17, 5, 6, 3, 54, 41, 29, 22, 13
#vi) Now take the border as the fourth index 54 and swap it with 13 as it is <17.
#17, 5, 6, 3, 13, 41, 29, 22, 54
#vii) Now as everything to the left of the border 13 is <17, Swap border with pivot.
#13, 5, 6, 3, 17, 41, 29, 22, 54
#viii) Then apply Recursion by calling quick sort on the left and right partitions.

def quick_sort(a):
 quick_sort_split(a, 0, len(a) — 1)

def quick_sort_2(a, low, high):
 if low < high:
 pivot = partition(a, low, high)
 quick_sort_split(a, low, pivot-1)
 quick_sort_split(a, pivot+1, high)

def partition(a, low, high):
 pivotIndex = get_pivot(a, low, high)
 pivot = a[pivotIndex]
 a[low], a[pivotIndex] = a[pivotIndex], a[low]
 border = low

for i in range(low, high + 1):
 if (a[i] < pivot):
 border += 1
 a[i], a[border] = a[border], a[i]
 a[low], a[border] = a[border], a[low]

return(border)

def get_pivot(a, low, hi):
 middle = (low + high)//2
 if a[low] < a[middle] and a[middle] < a[high]:
 pivot = middle
 elif a[low] < a[high]:
 pivot = low
 pivot = high
 return pivot

#One way to increase the performance of quick sort is by using selection sort if the number of values are less than a particular threshold.
#So as the process of quick sort goes on and the value decreases and after sometime it will reach below the threshold then we can use selection sort.
def quick_sort_2(a, low, high):
 if high — low < threshold and low < high:
 Selection_sort_for(a, low, high)
 elif low < high:
 pivot = partition(a, low, high)
 quick_sort_split(a, low, pivot-1)
 quick_sort_split(a, pivot+1, high)

# — — — — — — — — — — — — — — — — — — — — — — –