Transcript lec4

Algorithms and Data Structures*
• Objective: To review the fundamental algorithms
and data structures that are commonly used in
programs. To see how to use and implement these
algorithms and data structures in different
languages and to see what language and library
support exists for them.
–
–
–
–
–
Sorting and Searching
Arrays and Vectors
Lists
Hash Tables
Generic programming
*This material comes from chapter 2 of
Brian Kernighan and Rob Pike,
The Practice of Programming
Topics
•
•
•
•
•
•
•
Binary Search
Quicksort
Big “Oh”
Vectors
Lists
Hash Tables
C, C++, Java, Perl
Linear Search
typdef struct Nameval Nameval;
struct Nameval {
char *name;
int value;
}
/* HTML characters, e.g. AElig is a ligature of A and E. */
/* values are Unicode/ISO10646 encoding. */
Nameval htmlchars[] = {
“Aelig”,
0x00c6,
“Aacute”, 0x00c1,
“Acirc”, 0x00c2,
/* … */
“zeta”,
0x03b6
};
Linear Search
/* lookup: sequential search for name in tab; return index */
int lookup(char *name, Nameval tab[], int ntab)
{
int i;
for (i = 0; i < ntab; i++)
if (strcmp(name, tab[i].name) == 0)
return i;
return –1; /* no match */
}
Binary Search
/* lookup: binary search for name in tab; return index or –1 if not found. */
int lookup(char *name, Nameval tab[], int ntab)
{
int low, high, mid, cmp;
low = 0;
high = ntab – 1;
while (low <= hight) {
mid = (low + high)/2;
cmp = strcmp(name, tab[mid].name);
if (cmp < 0)
high = mid – 1;
else if (cmp > 0)
low = mid + 1;
else /* found match */
return mid;
}
return –1; /* no match */
}
Quicksort
• pick one element of the array (pivot)
• partition the other elements into two groups
– those less than the pivot
– those that are greater than or equal to the pivot
• recursively sort each group
Partition
p
unexamined
last i
p
0
n-1
<p
1
last
<p
0
>= p
unexamined
i
p
last
n-1
>= p
n-1
Quicksort
/* quicksort: sort v[0]..v[n-1] into increasing order. */
void quicksort(int v[], int n)
{
int i, last;
if (n <= 1)
/* nothing to do */
return;
swap(v, 0, rand()%n);
/* move pivot element to v[0] */
last = 0;
for (i = 1; i < n; i++)
/* partition */
if (v[i] < v[0])
swap(v, ++last, i);
swap(v, 0, last);
/* restore pivot */
quicksort(v, last);
/* recursively sort each part. */
quicksort(v+last+1, n-last-1);
}
Swap
/* swap: interchange v[i] and v[j]. */
void swap(int v[], int i, int j)
{
int temp;
temp = v[i];
v[i] = v[j];
v[j] = temp;
}
Libraries
•
•
•
•
C: qsort
C++: sort (algorithm library from STL)
java.util.collections.sort
perl: sort
qsort (strings)
char *str[N];
qsort(str, N, sizeof(str[0]), scmp);
/* scmp: string compare of *p1 and *p2 */
int scmp(const void *p1, const void *p2)
{
char *v1, *v2;
v1 = *((char**) p1);
v2 = *((char**) p2);
return strcmp(v1, v2);
}
qsort (int)
int arr[N];
qsort(arr, N, sizeof(arr[0]), icmp);
/* icmp: integer compare of *p1 and *p2 */
int icmp(const void *p1, const void *p2)
{
int v1, v2;
v1 = *((int*) p1);
v2 = *((int*) p2);
if (v1 < v2)
return –1;
else if (v1 == v2)
return 0;
else
return 1;
}
Big “Oh”
Notation
O(1)
O(log n)
O(n)
O(nlog n)
O(n2)
O(n3)
O(2n)
Name
constant
logarithmic
linear
n log n
quadratic
cubic
exponential
Example
array index
binary search
string comparison
quicksort/mergesort
insertion sort
matrix multiplication
set partitioning
*It is more precise to use  for order classes
Timing
• Unix time command
– [jjohnson@ws56 lec1]$ time sign <
/usr/share/dict/words | sort | squash > temptime
– 0.11user 0.01system 0:00.27elapsed 43%CPU
(0avgtext+0avgdata 0maxresident)k
– 0inputs+0outputs (91major+13minor)pagefaults 0swaps
• clock()
• counting cycles
nlog n vs. n2
Growing Arrays
• Arrays provide O(1) access and insertion time
• Sorted arrays provide O(log n) search time and
O(n) insertion time [have to move elements]
• If the number of elements in an array is not known
ahead of time it may be necessary to resize the
array.
• Involves dynamic memory allocation and copying
• To minimize the cost it is best to resize in chunks
Growing Arrays in C
typedef struct Nameval Nameval;
struct Nameval {
char *name;
int value;
};
struct NVtab {
int
nval;
/* current number of values */
int
max;
/* allocated number of values */
Nameval *nameval; /* array of name-value pairs */
};
enum { NVINIT = 1, NVGROW = 2 };
Growing Arrays
/* addname: add new name and value to nvtab */
int addname(Nameval newname)
{
Nameval *nvp;
if (nvtab.nameval == NULL) { /* first time */
nvtab.nameval = (Nameval *) malloc(NVINIT * sizeof(Nameval));
if (nvtab.nameval == NULL)
return –1;
nvtab.max = NVINIT;
nvtab.nval = 0;
} else if (nvtab.nval >= nvtab.max) { /* grow */
nvp = (Nameval *) realloc(nvtab.nameval, (NVGROW*nvtab.max)*sizeof(Nameval));
if (nvp == NULL)
return –1;
nvtab.max *= NVGROW;
nvtab.nameval = nvp;
}
nvtab.nameval[nvtab.nval] = newname;
return nvtab.nval++;
}
Lists
• A sequence of elements
• Space is allocate for each new element and
consecutive elements are linked together
with a pointer.
• O(1) time to insert at front, O(n) to append
unless pointer to last element kept, O(n)
traversal time.
head
NULL
data 1
data 2
data 3
data 4
Lists in C
typedef struct Nameval Nameval;
struct Nameval {
char *name;
int
value;
Nameval *next; /* in list */
};
/* newitem: create new item from name and value */
Nameval *newitem(char *name, int value)
{
Nameval *newp;
newp = (Nameval *) emalloc(sizeof(Nameval));
newp->name = name;
newp->value = value;
newp->next = NULL;
return newp;
}
Lists in C
/* addfront: add newp to front of listp */
Nameval *addfront(Nameval *listp, Nameval *newp)
{
newp->next=listp;
return newp;
}
nvlist = addfront(nvlist, newitem(“smiley”, 0x263A));
Prepend Element in Front of List
/* addend: add newp to end of listp */
Nameval *addend(Nameval *listp, Nameval *newp)
{
Nameval *p;
if (listp == NULL)
return newp;
for (p = listp; p != NULL; p = p->next)
;
p->next = newp;
return listp;
}
Append Element to Back of List
/* addend: add newp to end of listp */
Nameval *addend(Nameval *listp, Nameval *newp)
{
Nameval *p;
if (listp == NULL)
return newp;
for (p = listp; p != NULL; p = p->next)
;
p->next = newp;
return listp;
}
Lookup Element in List
/* lookup: sequential search for name in listp */
Nameval *lookup(Nameval *listp, char *name)
{
for ( ; listp != NULL; listp = listp->next)
if strcmp(name, listp->name) == 0)
return listp;
return NULL; /* no match */
}
Apply Function to Elements in
List
/* apply: execute fn for each element of listp */
void apply(Nameval *listp, void (*fn)(Nameval*, void*) , void *arg)
{
for ( ; listp != NULL; listp = listp->next)
(*fn)(listp, arg); /* call the function */
}
void (*fn)(Nameval*, void*) is a pointer to a void function of two
arguments – the first argument is a pointer to a Nameval and the
second is a generic pointer.
Print Elements of a List
/* printnv: print name and value using format in arg */
void printnv(Nameval *p, void *arg)
{
char *fmt;
fmt = (char*) arg;
printf(fmt, p->name, p->value);
}
apply(nvlist, printnv, “%s: %x\n”);
Count Elements of a List
/* inccounter: increment counter *arg */
void inccounter(Nameval *p, void *arg)
{
int *ip;
/* p is unused */
ip = (int *) arg;
(*ip)++;
}
int n;
n = 0;
apply(nvlist, inccounter, &n);
printf(“%d elements in nvlist\n”, n);
Free Elements in List
/* freeall: free all elements of listp */
void freeall(Nameval *listp)
{
Nameval *next;
for ( ; listp != NULL; listp = next) {
next = listp->next;
/* assumes name is freed elsewhere */
free(listp)
}
}
? for ( ; listp != NULL; listp = listp->next)
?
free(listp);
Delete Element in List
/* delitem: delete first “name” from listp */
Nameval *delitem(Nameval *listp, char *name)
{
Nameval *p, *prev;
prev = NULL;
for (p = listp; p != NULL; p = p->next) {
if (strcmp(name, p->name) == 0) {
if (prev == NULL)
listp = p->next;
else
prev->next = p->next;
free(p);
return listp;
}
prev = p;
}
eprintf(“delitem: %s not in list”, name);
return NULL; /* can’t get here */
}
Trees
• A binary tree is either NULL or contains a node
with a left and right child which are themselves
trees.
• Nodes contain values
• In a binary search tree (BST) the values in the left
child are smaller than the value at the node and the
values in the right child are greater than the value
at the node.
• O(log n) expected search and insertion time
• in-order traversal provides elements in sorted
order.
Binary Search Tree
typedef struct Nameval Nameval;
struct Nameval {
char
*name;
int
value;
Nameval *left; /* lesser */
Nameval *right /* greater */
};
“smiley”
0x263A
“Aacute”
“smiley”
0x263A
0x00c1
“smiley”
“zeta”
0x263A
0x03b6
NULL NULL
“smiley”
“AElig”
0x263A
0x00c6
“smiley”
“Acirc”
0x263A
0x00c2
NULL NULL
NULL NULL
Binary Search Tree Lookup
/* lookup: look up name in tree treep *.
Nameval *lookup(Nameval *treep, char *name)
{
int cmp;
if (treep == NULL)
return NULL;
cmp = strcmp(name, tree->name);
if (cmp == 0)
return treep;
else if (cmp < 0)
return lookup(treep->left, name);
else
return lookup(treep->right, name);
}
Hash Tables
• Provides key lookup and insertion with constant
expected cost
• Used to create table lookup where it is not
possible to reserve a slot for each possible element
• hash function maps key to index (should evenly
distribute keys)
• duplicates stored in a chain (list) – other
mechanisms are possible.
Hash Table
typedef struct Nameval Nameval;
struct Nameval {
char
*name;
int
value;
Nameval *next; /* in chain */
};
Nameval *symtab[NHASH];
/* a symbol table */
NULL
NULL
NULL
NULL
NULL
NULL
name 1
data 1
name 2
data 2
NULL
name 3
data 3
Hash Table Lookup
/* lookup: find name in symtab, with optional create */
Nameval *lookup(char *name, int create, int value)
{
int h;
Nameval *sym;
h = hash(name);
for (sym = symtab[h]; sym != NULL; sym = sym->next)
if (strcmp(name, sym->name) == 0)
return sym;
if (create) {
sym = (Nameval *) emalloc(sizeof(Nameval));
sym->name = name; /* assumed allocated elsewhere */
sym->value = value;
sym->next = symtab[h];
symtab[h] = sym;
}
return sym;
}
Hash Function
enum { MULTIPLIER = 31};
/* hash: compute hash value of string */
unsigned int hash(char* str)
{
unsigned int h;
unsigned char *p;
h = 0;
for (p = (unsigned char *) str; *p != ‘\0’; p++)
h = MULTIPLIER * h + *p;
return h % NHASH;
}
Standard Template Library
• vector
– vector<string> V;
• list
– list<string> L;
• map
– map<string,int> symtab;
– symtab[“smiley”] = 0x263A;
– symtab[“zeta”] = 0x03b6;
• set
– set<string> S;
Iterators
• iterator
–
–
–
–
list<string>::iterator pos;
pos = L.begin();
pos++;
L.insert(pos,”before”); /* insert before iterator
position */
– for (pos = L.begin(); pos != L.end(); pos++)
–
cout << *pos << endl;
perl
• lists
• hashes
Java Collection
• java.util.Collections
– list
• sort
• shuffle
– map
– set