Tangible Computing
12. Sorting and Big O notation




12.1 Selection Sort

In class we implemented selection sort for lists of integers. In this sort we found the largest item, and swapped it with the last item in the list, then the second largest, swapping with the second last position, and so on. When we looked at the behaviour of our algorithm on lists of size n, we noted that the number of compare-and-swap operations was roughly n^2. That is, selection sort takes O(n^2) time to sort a list of n elements. The sort takes place inside the original list, so does not require any additional space (other than a few working cells).

12.2 Big O Notation

To talk about the complexity of an algorithm we introduced Big O Notation to talk about what class of functions contain a given function. So for example, we said that the sort time T(n) of section sort was O(n^2). By this we mean that T(n) ∈ O(n^2), where O(n^2) is the set of all the "quadratic" functions.

Here are some common containment relationships:
O(1) ⊆ O(log n) ⊆ O(n) ⊆ O(n log n) ⊆ O(n^2) ⊆ O(n^3)
For any positive k, O((log n)^k) ⊆ O(n) and O(n^k) ⊆ O(2^n)




12.3 Merge Sort

If you have two lists that are already in order, then they can be quickly merged into a third single ordered list, by repeatedly taking the smallest remaining item from among the unmerged lists. By "quickly" we mean O(n) where n is the total number of items in the lists.

This leads to an interesting observation. Suppose L is a list of n elements. If we divided L into 2 lists of n/2 elements each, sorted them, then merged, we could sort L.

Let L[i...j) be the sublist L[i], L[i+1], ..., L[j-1] of j-i elements.

Note: The [ ) notation is the interval that is closed on the left, and open on the right. So, a integer x is in [a, b) iff a <= x < b.

Then we can sort recursively as follows:
MSort(L[i ... j)) = L[i ... j) if i+1 = j
MSort(L[i ... j)) = Merge( MSort( L[i ... (i+j)/2 ) ), MSort( L[(i+j)/2 ... j) ) )
This is called a merge sort. We observed that when n is a power of 2, that is n=2^k for some k, then the merge sort naturally broke into a tree-like shape, where the size of the lists being merged were 2^(k-1), 2^(k-2), ..., 2^1. The total number of operations at each layer of the tree was O(n), and there are O(log n) layers, so the total amount of work to sort using merge sort is O(n log n). This is significantly better than the O(n^2) time used by selection sort, once n is bigger than 100. (Try it on the Arduino and find out the break even point)

We developed this algorithm in class. In the first form we allocated the temporary array on the stack.

code/Sorting/MergeSort/MergeSortStack/merge_sort_stack.cpp

    // example of merge sort where we allocate the working array on the stack
    #include <Arduino.h>
    #include <mem_syms.h>
     
    void merge(int16_t *Left, uint16_t Left_len, int16_t *Right, uint16_t Right_len, int16_t *S) {
        // position of next element to be processed
        uint16_t Left_pos = 0;
        uint16_t Right_pos = 0;
     
        // position of next element of S to be specified
        // note: S_pos = Left_pos+Right_pos
        uint16_t S_pos = 0;
     
        // false, take from right, true take from left
        uint8_t pick_from_left = 0;
     
        while ( S_pos < Left_len + Right_len ) {
     
        // pick the smallest element at the head of the lists
        // move smallest of Left[Left_pos] and Right[Right_pos] to S[S_pos] 
        if ( Left_pos >= Left_len ) {
            pick_from_left = 0;
            }
        else if ( Right_pos >= Right_len ) {
            pick_from_left = 1;
            }
        else if ( Left[Left_pos] <= Right[Right_pos] ) {
            pick_from_left = 1;
            }
        else {
            pick_from_left = 0;
            }
     
        if ( pick_from_left ) {
            S[S_pos] = Left[Left_pos];
            Left_pos++;
            S_pos++;
            }
        else {
            S[S_pos] = Right[Right_pos];
            Right_pos++;
            S_pos++;
            }
        
        }
    }
     
     
    // sort in place, i.e. A will be reordered
    void merge_sort(int16_t A[], uint16_t A_len) {
     
        // see if we have enough space to sort, keep 128 bytes for working,
        // maybe that's enough!
     
        if ( AVAIL_MEM < A_len * sizeof(int16_t)  + 128 ) {
            Serial.print("ACK, no stack available for sorting! Only have ");
            Serial.println(AVAIL_MEM);
            while ( 1 ) {}
            }
     
        if ( A_len < 2 ) {
            return;
            }
     
        if ( A_len == 2 ) {
            if ( A[0] > A[1] ) {
                int16_t temp = A[0];
                A[0] = A[1];
                A[1] = temp;
                }
            return;
            }
     
        // split A in half, sort left, sort right, then merge
        // A[0], ..., A[split_point-1]
        uint16_t split_point = A_len / 2;
     
        merge_sort(A, split_point);
     
        merge_sort(A+split_point, A_len-split_point);
     
        {
            // Now allocate our working array on the stack
            int16_t S[A_len];
     
            merge(A, split_point, A+split_point, A_len-split_point, S);
     
            // and copy back into the original array
            for (uint16_t i=0; i < A_len; i++) {
                A[i] = S[i];
                }
     
            // at this point S should be released from the stack
            }
     
        }
     
    void setup() {
        Serial.begin(9600);
     
        Serial.println("********* THIS IS THE BEGINNING *********");
        randomSeed(analogRead(0));
     
        uint16_t Test_len = 64;
        int16_t Test[Test_len];
     
        Serial.print("In: ");
        for (uint16_t i=0; i < Test_len; i++) {
            Test[i] = random(0, 100);
            if ( 1 ) {
                Serial.print(Test[i]);
                Serial.print(" ");
                }
            }
        Serial.println();
     
        merge_sort(Test, Test_len);
        
        if ( 1 ) {
            Serial.print("Out: ");
            for (uint16_t i=0; i < Test_len; i++) {
                if ( i < Test_len-1 && Test[i] > Test[i+1] ) {
                    Serial.print("Out of order!!");
                    }
     
                Serial.print(Test[i]);
                Serial.print(" ");
                }
            Serial.println();
            }
        }
     
    void loop() {
        }


In the second form we allocated the temporary array using malloc.

code/Sorting/MergeSort/MergeSortMalloc/merge_sort_malloc.cpp

    // example of merge sort where we allocate the working array with malloc
    #include <Arduino.h>
    #include <mem_syms.h>
     
    void merge(int16_t *Left, uint16_t Left_len, int16_t *Right, uint16_t Right_len, int16_t *S) {
        // position of next element to be processed
        uint16_t Left_pos = 0;
        uint16_t Right_pos = 0;
     
        // position of next element of S to be specified
        // note: S_pos = Left_pos+Right_pos
        uint16_t S_pos = 0;
     
        // false, take from right, true take from left
        uint8_t pick_from_left = 0;
     
        while ( S_pos < Left_len + Right_len ) {
     
        // pick the smallest element at the head of the lists
        // move smallest of Left[Left_pos] and Right[Right_pos] to S[S_pos] 
        if ( Left_pos >= Left_len ) {
            pick_from_left = 0;
            }
        else if ( Right_pos >= Right_len ) {
            pick_from_left = 1;
            }
        else if ( Left[Left_pos] <= Right[Right_pos] ) {
            pick_from_left = 1;
            }
        else {
            pick_from_left = 0;
            }
     
        if ( pick_from_left ) {
            S[S_pos] = Left[Left_pos];
            Left_pos++;
            S_pos++;
            }
        else {
            S[S_pos] = Right[Right_pos];
            Right_pos++;
            S_pos++;
            }
        
        }
    }
     
     
    // sort in place, i.e. A will be reordered
    void merge_sort(int16_t A[], uint16_t A_len) {
     
        // see if we have enough space to sort, keep 128 bytes for working,
        // maybe that's enough!
     
        if ( AVAIL_MEM < A_len * sizeof(int16_t)  + 128 ) {
            Serial.print("ACK, no stack available for sorting! Only have ");
            Serial.println(AVAIL_MEM);
            while ( 1 ) {}
            }
     
        if ( A_len < 2 ) {
            return;
            }
     
        if ( A_len == 2 ) {
            if ( A[0] > A[1] ) {
                int16_t temp = A[0];
                A[0] = A[1];
                A[1] = temp;
                }
            return;
            }
     
        // split A in half, sort left, sort right, then merge
        // A[0], ..., A[split_point-1]
        uint16_t split_point = A_len / 2;
     
        merge_sort(A, split_point);
     
        merge_sort(A+split_point, A_len-split_point);
     
        {
            // Now allocate our working array on the heap
            int16_t *S = (int16_t *) malloc( A_len * sizeof(int16_t) );
         
            if ( ! S ) {
                Serial.println("ARRGH, No heap memory left.");
                while(1) { }
                }
     
            merge(A, split_point, A+split_point, A_len-split_point, S);
     
            // and copy back into the original array
            for (uint16_t i=0; i < A_len; i++) {
                A[i] = S[i];
                }
     
            // at this point S can be released from the heap
            free(S);
            }
     
        }
     
    void setup() {
        Serial.begin(9600);
     
        Serial.println("********* THIS IS THE BEGINNING *********");
        randomSeed(analogRead(0));
     
        uint16_t Test_len = 64;
        int16_t Test[Test_len];
     
        Serial.print("In: ");
        for (uint16_t i=0; i < Test_len; i++) {
            Test[i] = random(0, 100);
            if ( 1 ) {
                Serial.print(Test[i]);
                Serial.print(" ");
                }
            }
        Serial.println();
     
        merge_sort(Test, Test_len);
        
        if ( 1 ) {
            Serial.print("Out: ");
            for (uint16_t i=0; i < Test_len; i++) {
                if ( i < Test_len-1 && Test[i] > Test[i+1] ) {
                    Serial.print("Out of order!!");
                    }
     
                Serial.print(Test[i]);
                Serial.print(" ");
                }
            Serial.println();
            }
        }
     
    void loop() {
        }


We also developed this algorithm in a previous class: code/Sorting/MergeSort/MergeSort01/MergeSort01.pde

    #include <bassert.h>
    #include <mem_syms.h>
     
    int * merge(int *list1, int len1, int *list2, int len2) {
     
    /* 
    Pre: list list1 of length len1, list list2 of length len2 
         both sorted in ascending order.
     
    Post: returns a pointer to a list (on heap) of length 
         len1+len2 in increasing order resulting from the ordered
         merge of len1 and len2
     
    Invariant: list list1 and list2 are not modified.
     
    Problem: who is responsible for freeing the merged list later when it is
    not needed?
    */
     
     
      int lenr = len1 + len2;
      int *result;
      int next1 = 0;    // index of next element to take from from list 1
      int next2 = 0;    // index of next element to take from from list 1
      int nextr = 0;    // index of next element to insert into the result list
      
      bassert( len1 > 0 || len2 > 0, 2);
     
      result = (int *) malloc( lenr * sizeof(int));
      bassert(result != 0, 5);
     
      while ( len1 + len2 > 0 ) {
        // select smaller of next element of list1 or list2 if there is one
     
          
        byte take_from_1 = 
            (len2 == 0) || (len1 > 0 && list1[next1] <= list2[next2]);
           
           if ( take_from_1 ) {
             result[nextr] = list1[next1];
             len1--; 
             next1++;
           } else {
             result[nextr] = list2[next2];
             len2--; 
             next2++;
           }
           nextr++;
        
      }
      return result; 
     
    }
     
    int * mergesort(int *l, int len) {
    /*
     
    Pre: l is a list of len elements.
     
    Post: returns a pointer to a new list of len elements that is l sorted in
        ascending order.
     
    Invariant: list l is not modified.
     
    Problem: Since we never free the intermediate storage we use for 
      the recursion we get a memory leak.  Also, how much storage do we 
      allocate?  Each level of the recursion allocates n (across the
      width of the recursion) so we allocate an extra O(n log n) of space.
    */
     
      // split the list roughly in half
      if ( len <= 1 ) { return l; }
      int len1 = len / 2;
      return merge(
            mergesort(l, len1), len1,
            mergesort(l+len1, len-len1), len-len1);
    }
     
     
    void setup() {
      Serial.begin(9600);
      
      int len = 32;
      int l[32];
      int *r;
      
      for (int i=0; i < len; i++ ) { l[i] = random(100); }
      
      Serial.println("In:");
      for (int i=0; i < len; i++ ) { 
        Serial.print(l[i]); Serial.print(" "); 
      }
      Serial.println("");
     
      Serial.println("Out:");
      r = mergesort(l, len);
      for (int i=0; i < len; i++ ) { 
        Serial.print(r[i]); Serial.print(" "); 
      }
     
    }
     
    void loop() {
    }
It has a serious problem. Since we never free the intermediate storage we use for the recursion we get a memory leak. And how much storage do we allocate? Each level of the recursion allocates n (across the width of the recursion) so we allocate an extra O(n log n) of space. We don't have this kind of space to waste on the Arduino.

We took our initial experiment and improved it. We solved the memory leak problem in the merge routine by having the caller supply the output list for the result of the merge. We also use pointer arithmetic which is faster than array indexing. Note use of post increment (*p++), which means use the value pointed to by p, then increment p.

Then we observed that the recursion is very regular. In fact, it was just a very compact way of saying take the original list, divide it into pairs of items. Merge the pairs, then the 4's, then the 8's until you are finished. We don't have to do this recursively at all.

Then we observed that merging requires as an extra list of the length we want to merge into. So if we were willing to modify the original list, we only need an additional temporary list of the same length as the list to be sorted. This can be allocated once on the stack at the start of the merge routine.

The resulting sort function looks complicated, but most of the complexity is caused by handling lists that are not a power of two in length. There is also the ping-ponging between the input list and the tempory list as to which is the source and which is the destination. This can require a final correcting copy at the end of the sort.

code/Sorting/MergeSort/MergeSort02/MergeSort02.pde

    #include <bassert.h>
    #include <mem_syms.h>
     
    void merge(int *lout, int *list1, int len1, int *list2, int len2) {
    /*
     
    Merge from 2 input lists into a user supplied output list.
     
    Pre: list list1 of length len1, list list2 of length len2 
         both sorted in ascending order.
     
         list lout is at least length len1+len2
     
    Post: merges list1 and list2 by increasing order into lout, which will
        have len1+len2 elements
     
    Invariant: list1 and list2 are unchanged.
     
    This version solves the memory leak problem.  It also uses pointer 
    arithmetic which is faster than array indexing.
    */
     
      while ( len1 > 0 || len2 > 0 ) {
          
        // pick from list 1 if list 2 is empty, or list 1 < list 2
        if ( (len2 == 0) || (len1 > 0 && *list1 <= *list2) ) {
          // note use of post increment, which performs the assignment, 
          // *lout = *list1, // then increments the pointers
          *lout++ = *list1++;
          len1--; 
        } else {
          *lout++ = *list2++;
          len2--; 
        }
      }
    }
     
    /*
        pre: listin is a pointer to a list of len elements
            needs sufficient stack space to allocate a temporary list, plus
            work variables.
     
        post: the contents of listin will be rearranged to be sorted in 
            ascending order.
     
        The merge sort works by breaking the input into blocks of length
        block_len, each of which is ordered.  This is the source of the 
        merger.  
     
        Then adjacent pairs of blocks are merged into a block of size 
        2*block_len that is placed in the destination.
     
        The source and destination are swapped and the process continues 
        util the block_len is bigger than the list to be sorted.
    */
     
    void mergesort(int *listin, int len) {
      int block_len; // current length of merge block
     
      // listin and tmp alternate roles as source and destination
      // 0 => destination is tmp, 1 => destination is listin
      byte buf_parity; 
     
      // a pair of adjacent blocks to be merged has a left and right block
     
      int *src;     // current position of left array in source
      int *dst;     // current position in destination
     
      int remain;   // elements remaining to be processed   
      int left_len; // length of left block to merge
      int right_len;  // length of right block to merge
      int merged_len; // length of destination merge
     
      Serial.print("Avail mem: "); Serial.println(AVAIL_MEM);
      bassert( AVAIL_MEM > len * sizeof(*listin) + 200, 7);
     
      int tmp[len];
     
      if ( len <= 1 ) { return; }
     
      /* go through the list working on blocks of length block_len = 2 ** k
     
        src - pointer to source data, in blocks of length block_len, except
            possibly for the last block which could be partial.
     
        dst - pointer to destination where merged blocks are placed
     
        buf_parity
            true => merging from listin into tmp
            false => merging from tmp into listin
      */
     
      // keep sorting so long as block size is less than list length
      // if = or greater, list is sorted
      buf_parity = 1;
      block_len = 1;
     
      while ( block_len < len ) {
     
        // merge from src into dst
        if ( buf_parity ) {
          src = listin;
          dst = tmp;
          }
        else {
          src = tmp;
          dst = listin;
          }
     
        // number of elements remaining to be merged
        remain = len;
     
        while ( remain > 0 ) {
     
          // merge the next two blocks together
          if ( remain <= block_len ) {
            // 1 block or less remaining
            left_len = remain;
            right_len = 0;
            }
          else if ( remain < block_len + block_len ) {
            // 1 full block, and a partial last block
            left_len = block_len;
            right_len = remain - block_len;
            }
          else {
            // 2 full blocks to merge
            left_len = block_len;
            right_len = block_len;
            }
     
          // left list starts at current src, right list at src+left_len, 
          // right list ends at src+left_len+right_len -1 
     
          merged_len = left_len + right_len;
        
          merge(dst, src, left_len, src+left_len, right_len);
     
          // move along both src and desitination to next pair of blocks
          dst += merged_len;
          src += merged_len;
          remain = remain - merged_len;
          }
     
        // swap src and dst
        buf_parity = ! buf_parity;
        block_len = block_len + block_len;
        }
     
      // if sorted list is in tmp, copy back into input list
      if ( ! buf_parity ) {
        int tlen = len;
        src = tmp;
        dst = listin;
        while ( tlen-- ) { *dst++ = *src++; }
        }
      return;
    }
     
    unsigned long start_time;
    unsigned long stop_time;
     
    void start_clock() {
        start_time = micros();
        }
     
    unsigned long int stop_clock() {
        unsigned long elapsed_time;
        stop_time = micros();
        elapsed_time = stop_time - start_time;
        Serial.print("Elapsed time ");
        Serial.print(elapsed_time);
        Serial.println(" uS");
        return elapsed_time;
        }
     
    byte is_sorted(int *l, int len) {
        for (int i=0; i < len-1; i++) {
            if ( l[i] > l[i+1] ) { return 0; }
            }
        return 1;
        }
     
    void display_list(int *l, int len) {
      for (int i=0; i < len; i++ ) { 
        Serial.print(l[i]); Serial.print(" "); 
      }
    }
     
    void setup() {
      Serial.begin(9600);
      
      for (int len = 100; len < 10000; len += 200) {
          Serial.print("Testing with len "); Serial.println(len);
     
          // make sure we have enough memory
          bassert(AVAIL_MEM > len * sizeof(int) + 256, 3);
     
          int l[len];
          
          for (int i=0; i < len; i++ ) { l[i] = random(100); }
          
          Serial.println("In:");
          // display_list(l, len);
          Serial.println("");
     
          Serial.println("Out:");
          
          start_clock();
          mergesort(l, len);
          stop_clock();
          if ( is_sorted(l, len) ) {
            Serial.println("Success: list is sorted");
            }
          else {
            Serial.println("Error: list not sorted!");
            display_list(l, len);
            }
     
          // display_list(l, len);
      }
    }
     
    void loop() {
    }


12.4 Quick Sort

Selection sort does not require an additional temporary list (it is in place), but is O(n^2). Merge sort is O(n log n) but requires a temporary list? Is there a sort that is that is in place and takes O(n log n)?

Not a simple one. But there is a sort that does not require a additional temporary list, and is almost always O(n log n). It is called quick sort.

A key idea behind quicksort is that it is easy to merge two sorted lists if all the elements of one list are not greater than the smallest element of the other list. You simply append the second list to the first. That is, the merge is trivially in-place.

The only problem is that you need to divide the input list into these sublists. If you knew what the median of the values of the list was, then you could partition the list into one with values less than the median, and one with values greater than (or equal) to the median.

But it takes time to find the median (the obvious way is to sort!) There are linear time, i.e. O(n), selection algorithms, but they are quite complex. A good strategy is to simply pick a random point in the list to be sorted and used that. Better strategies are described in the quicksort article.

Here is partially completed code for quicksort. code/Sorting/QuickSort/QuickSort01/QuickSort01.ino

    /*
     Outline of Quick Sort
     */
     
    /* partition
     
       pre: list is an array of len elements.
       post: list is rearranged such that (pivot is a randomly chosen element)
         list[0] ... list[pivot-1] < list[pivot]
         list[pivot+1] ... list[len-1] > list[pivot]
     
       returns the index of the pivot.
    */
     
    int partition(int *list, int len)
    {
      // select a pivot index and initalize the final_pivot index
      int final_pivot = 0;
     
      // move pivot index to end of the list
     
      // move items less than pivot_val to front of array and 
      // keep advancing the final_pivot point
     
      // move pivot to final place
     
      // return the final_pivot index
      return final_pivot;
    }
     
    /* quicksort
     
       pre: list is an array of len elements.
       post: list is in sorted (ascending) order
    */
     
    void quicksort(int *list, int len)
    {
      // If len is greater than one, recursively call quicksort
    }
     
    unsigned long start_time;
    unsigned long stop_time;
         
    void start_clock() {
      start_time = micros();
    }
         
    unsigned long int stop_clock() {
      unsigned long elapsed_time;
      stop_time = micros();
      elapsed_time = stop_time - start_time;
      Serial.print("Elapsed time ");
      Serial.print(elapsed_time);
      Serial.println(" uS");
      return elapsed_time;
    }
         
    byte is_sorted(int *l, int len) {
      for (int i=0; i < len-1; i++) {
        if ( l[i] > l[i+1] ) { return 0; }
      }
      return 1;
    }
         
    void display_list(int *l, int len) {
      for (int i=0; i < len; i++ ) { 
        Serial.print(l[i]); Serial.print(" "); 
      }
    }
         
    void setup() {
      Serial.begin(9600);
          
      for (int len = 100; len < 3000; len += 200) {
        Serial.print("Testing with len "); Serial.println(len);
         
        int l[len];
              
        for (int i=0; i < len; i++ ) { l[i] = random(100); }
              
        //Serial.println("In:");
        //display_list(l, len);
        //Serial.println("");
     
        start_clock();
        quicksort(l, len);
        stop_clock();
         
        //Serial.println("Out:");
        //display_list(l, len);
        //Serial.println("");
              
        if ( is_sorted(l, len) ) {
          Serial.println("Success: list is sorted");
        }
        else {
          Serial.println("Error: list not sorted!");
        }
         
      }
    }
         
    void loop() {
    }
     
The completed code is code/Sorting/QuickSort/QuickSort02/QuickSort02.ino

12.5 Stable Sorting

A stable sort is one in which the process of sorting preserves the original ordering of any equal elements in the list. This is important when you are sorting on two properties. For example, you first sort your mail inbox by date, then you sort by sender. All the mail from a given sender will be in the same order as when it was sorted by date.

Is our implementation of merge sort stable? What about quicksort? Quicksort can be made stable, but by doing so it cannot be done in-place. The problem is that the partitioning step can reorder equal elements. (Can you invent an example that illustrates this?)

12.6 Animated Sorting Algorithms

An excellend sorting animation page is http://www.sorting-algorithms.com/

12.7 The Sorting Library

The standard C library has generic routines for quicksort, mergesort, and another common sort called heapsort. In the Arduino C library, only qsort is available. The calling sequence is
void qsort(void *base, size_t nel, size_t width,
    int (*compar)(const void *, const void *));
The qsort() function sorts an array of nel objects, the initial member of which is pointed to by base. The size of each object is specified by width. The contents of the array base are sorted in ascending order according to a comparison function pointed to by compar, which requires two arguments pointing to the objects being compared.

The comparison function must return an integer less than, equal to, or greater than zero if the first argument is considered to be respectively less than, equal to, or greater than the second.
12. Sorting and Big O notation
Tangible Computing / Version 3.20 2013-03-25