We have seen a variety of basic data types in Python, including integers, strings, lists, tuples, and dictionaries.
We have also seen how object oriented programming allows us to define classes that have methods and properties to encapsulate data.
Now, we will use classes to define additional data structures. If you consider the primitive data types as atomic elements, then data structures can be viewed as molecules that are formed by combining various elements.
In this notebook, we shall define and discuss the following data types:
A common use of classes is to implement data structures. Below is an example of a stack, which is a LIFO - last in first out - structure. It is a collection.
Items are added to the stack with push and removed with pop.
We will see that the python virtual machine for interpreting byte code is based on a stack architecture.
class stack:
''' A class for a stack data structure. It is LIFO - last in, first out. '''
def __init__(self, items = []):
''' Constructor for a stack. Initialize the list of items and the size. '''
## Why not say: self.items = items ?
self.items = items[:]
self.size = len(items)
def __repr__(self):
''' return a string that evaluates to the stack. '''
return "stack({})".format(list(self.items))
def isempty(self):
''' predicate: is the stack empty?'''
return self.items == []
def push(self, item):
''' add an item to the end of the stack. '''
self.items.append(item)
self.size += 1
def peek(self):
''' return the end of the stack, if not empty. '''
if self.isempty():
print ("Error: stack is empty")
else:
return self.items[-1]
def pop(self):
''' Remove and return the item at the end of the stack.
If the stack is empty, print error message. '''
if self.isempty():
print ("Error: stack is empty")
else:
self.size -= 1
return self.items.pop()
def rotate(self):
''' swap the top two items in the stack. '''
if self.size < 2:
print ("Error: stack has fewer than 2 elements")
else:
self.items[-1], self.items[-2] = self.items[-2], self.items[-1]
def __iter__(self):
"""Return iterator for the stack. Used in for loop or list comprehension. """
if self.isempty():
return None
else:
index = self.size -1
while index >= 0:
yield self.items[index]
index -= 1
def __eq__(self, other):
''' equality predicate for stacks. (==) '''
if type(other) != type(self):
return False
if self.items == other.items:
return True
else:
return False
def copy(self):
''' copy constructor - clone the current instance. '''
s = stack(self.items)
return s
help(stack)
Help on class stack in module __main__: class stack(builtins.object) | stack(items=[]) | | A class for a stack data structure. It is LIFO - last in, first out. | | Methods defined here: | | __eq__(self, other) | equality predicate for stacks. (==) | | __init__(self, items=[]) | Constructor for a stack. Initialize the list of items and the size. | | __iter__(self) | Return iterator for the stack. Used in for loop or list comprehension. | | __repr__(self) | return a string that evaluates to the stack. | | copy(self) | copy constructor - clone the current instance. | | isempty(self) | predicate: is the stack empty? | | peek(self) | return the end of the stack, if not empty. | | pop(self) | Remove and return the item at the end of the stack. | If the stack is empty, print error message. | | push(self, item) | add an item to the end of the stack. | | rotate(self) | swap the top two items in the stack. | | ---------------------------------------------------------------------- | Data descriptors defined here: | | __dict__ | dictionary for instance variables (if defined) | | __weakref__ | list of weak references to the object (if defined) | | ---------------------------------------------------------------------- | Data and other attributes defined here: | | __hash__ = None
Let's take our stack out for a test drive.
s = stack()
s.push(1)
s.push(2)
s.push(3)
s.push(4)
s
stack([1, 2, 3, 4])
s.isempty()
False
s.peek()
4
s.pop()
4
s
stack([1, 2, 3])
s.rotate()
s
stack([1, 3, 2])
type(s) == stack
True
s2 = s.copy()
s == s2
True
s.items
[1, 3, 2]
s2.items
[1, 3, 2]
s.items == s2.items
True
s2.rotate()
s2
stack([1, 2, 3])
s == s2
False
s2.rotate()
s == s2
True
s
stack([1, 3, 2])
def test(s):
''' test for the iterator. '''
for i in s:
print (i)
return [x for x in s]
test(s)
2 3 1
[2, 3, 1]
s2.rotate()
test(s2)
3 2 1
[3, 2, 1]
def revstr(str):
''' revstr(str) uses a stack to reverse a string.
It works on a copy and does not modify the original string.'''
s = stack()
for c in str:
s.push(c)
result = []
while (not s.isempty()):
result.append(s.pop())
return ''.join(result)
revstr('hello world!')
'!dlrow olleh'
Write a procedure balanced(string)
that reads string
, and determines
whether its parentheses are "balanced."
Hint: for left delimiters, push onto stack; for right delimiters, pop from stack and check whether popped element matches right delimiter.
def balanced(string):
pass
We will import the staff solution to demonstrate the functions.
import hw4a
hw4a.balanced('(()))')
False
hw4a.balanced('()')
True
hw4a.balanced('((())())')
True
hw4a.balanced('abcd(1234)dfg')
True
hw4a.balanced('')
True
hw4a.balanced('abcdef)')
False
hw4a.balanced('abc(')
False
hw4a.balanced(')(')
False
In the homework, we ask you to write the queue class.
Write a queue data structure, similar to the stack above. Whereas a stack is LIFO (last in first out), a queue is FIFO = first in, first out
See Skiena, page 71. The Algorithm Design Manual Steven Skiena
class queue:
''' A queue data structure: First In First Out FIFO.'''
def __init__(self, stuff=[]):
''' Constructor for a queue object. '''
pass
def __str__(self):
''' Render queue instance as a string. '''
pass
def __repr__(self):
''' Render queue instance as a string that evaluates to the object. '''
pass
def isempty(self):
''' Is the queue empty? true or false'''
pass
def enqueue(self, item):
''' Add an item to the queue'''
pass
def dequeue(self):
''' remove next item from the queue. error message if queue is empty'''
pass
def peek(self):
''' return the next item without removing it.
Error message if queue is empty.'''
pass
def __iter__(self):
'''define the iterator for queue. Used in for or list comprehension
similar to iterator for stack. '''
pass
def __eq__(self, other):
''' overload equality operator'''
pass
def copy(self):
''' copy constructor - clone the current instance'''
pass
d = hw4a.queue()
d.enqueue(9)
d.enqueue(1)
d.enqueue(2)
d == d.copy()
True
d.peek()
9
d.data
[9, 1, 2]
[x for x in d]
[9, 1, 2]
2 in d
True
5 in d
False
d.dequeue()
9
d.dequeue()
1
d.isempty()
False
d.dequeue()
2
d.isempty()
True
2 in d
False
d.dequeue()
'queue is empty'
Create a queue using two stacks: s1 and s2.
enqueue() pushes items on s1.
dequeue() pops s2, unless s2 is empty, in which case keep popping s1 onto s2 until s1 is empty. Then pop s2.
peek is similar to dequeue, except no final pop.
class queue2:
''' queue implemented using two stacks. '''
def __init__(self, stuff1 = [], stuff2 = []):
''' initialize stacks. '''
self.s1 = stack(stuff1[:])
self.s2 = stack(stuff2[:])
def __str__(self):
pass
def __repr__(self):
pass
def isempty(self):
''' is the queue empty? true or false'''
return self.s1.isempty() and self.s2.isempty()
def enqueue(self, item):
''' add an item to the queue'''
pass
def dequeue(self):
''' remove next item. error message if queue is empty'''
pass
def peek(self):
''' return the next item without removing it.
return error message if queue is empty'''
pass
def __iter__(self):
''' define the iterator for queue2. Used in for or list comprehension
HINT:
convert stacks to lists.
extend the stack 2 list with the reverse of the stack 1 list
use a for loop to iterate through the extended list,
yielding the item'''
pass
def __eq__(self, other):
''' overload equality operator
true if both stacks are respectively equal
use the convert stacks to list method given above for __iter__'''
pass
def copy(self):
''' copy constructor for queue '''
pass
d2 = hw4a.queue2()
d2.enqueue(9)
d2.enqueue(1)
d2.enqueue(2)
d2
queue2(stack([9, 1, 2]), stack([]))
d2 == d2.copy()
True
d2 == hw4a.queue2([9,1,2])
True
d2 == hw4a.queue2([1,2])
False
d2.peek()
9
d2
queue2(stack([]), stack([2, 1, 9]))
[x for x in d2]
[9, 1, 2]
2 in d2
True
5 in d2
False
d2.dequeue()
9
d2
queue2(stack([]), stack([2, 1]))
d2.isempty()
False
d2.dequeue()
1
d2.dequeue()
2
d2.isempty()
True
2 in d2
False
d2.dequeue()
'queue is empty'
Write a procedure to reverse a queue. It modifies the original queue! It should work with either q implementation. That is, the function should use the standard methods, enqueue and dequeue which are common to both implementations. This demonstrates the value of encapsulation.
def reverseq(q):
pass
q = hw4a.queue()
q.enqueue(1)
q.enqueue(2)
q.enqueue(3)
q.enqueue(4)
q
queue([1, 2, 3, 4])
hw4a.reverseq(q)
queue([4, 3, 2, 1])
q
queue([4, 3, 2, 1])
q2 = hw4a.queue2()
q2.enqueue(1)
q2.enqueue(2)
q2.enqueue(3)
q2.enqueue(4)
hw4a.reverseq(q2)
queue2(stack([4, 3, 2, 1]), stack([]))
q2
queue2(stack([4, 3, 2, 1]), stack([]))
type(q2)
hw4a.queue2
type(q)
hw4a.queue
Python dicts are implemented as hash tables.
Reading: Skiena pages 89-93
Video: hash tables
Create a hash table. It will be a list of size buckets. Each bucket will itself contain a list. If two items fall in the same bucket, the respective list will contain both items.
See Skiena page 89
Create a hash function using the djb2 algorithm.
We will show you some bad hash functions below.
class myhash:
def __init__(self, size = 20):
''' construct a hash tble of a given size,
that is, with size buckets. '''
pass
def __repr__(self):
pass
def __str__(self):
pass
def isempty(self):
''' is the hash table empty? '''
return self.count == 0
def put(self, key, value):
''' add an item with the given key and value
if there is already an item with the given key, remove it.
no duplicate keys'''
pass
def get(self,key):
''' retrieve the value for the given key'''
pass
def remove(self,key):
''' remove the item for the given key'''
pass
def hashfun(self,key, debug=False):
''' create a hash function using the djb2 algorithm
http://www.cse.yorku.ca/~oz/hash.html
If the optional debug parameter is true
Print out the value of the hash
'''
pass
def __iter__(self):
''' iterate through the buckets and their respective contents
'''
pass
def __eq__(self, other):
''' overload the equality operator '''
pass
def copy(self):
''' copy constructor - clone the current instance. '''
pass
h = hw4a.myhash(20)
h.put("one",1)
h.put("two",2)
h.put("three",3)
str(h)
"myhash([[], [], [], [], [], [], [], [('one', 1)], [], [], [], [], [], [], [], [], [], [('three', 3)], [], [('two', 2)]])"
h
myhash(20)
h == h.copy()
True
h2 = hw4a.myhash()
h2.put("one",1)
h2.put("two",2)
h2.put("three",3)
h == h2
True
h2.put("four",4)
h == h2
False
"one" in h
True
"zero" in h
False
[x for x in h]
['one', 'three', 'two']
[(x,h.get(x)) for x in h]
[('one', 1), ('three', 3), ('two', 2)]
[x for x in h2]
['one', 'four', 'three', 'two']
dir(h)
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'copy', 'count', 'get', 'hashfun', 'isempty', 'put', 'remove', 'size', 'table']
h.table
[[], [], [], [], [], [], [], [('one', 1)], [], [], [], [], [], [], [], [], [], [('three', 3)], [], [('two', 2)]]
h.hashfun("one")
7
h.hashfun("four")
9
h.hashfun('three')
17
h.hashfun('two')
19
h.get('three')
3
h.get('four')
False
h.remove('three')
h.get('three')
False
dd = {}
dd['a']
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) <ipython-input-42-872fa1bddbd4> in <module> ----> 1 dd['a'] KeyError: 'a'
dd.get('a')
h.hashfun('one', True)
'Hash of: one = 193501607 ==> 7'
h.hashfun('two', True)
'Hash of: two = 193507359 ==> 19'
Hash functions are common and useful in many programming applications. They are critical in many cryptography systems. For examples, bitcoin depends on its hash function being (nearly) impossible to invert (one-way). We will return to the topic of cryptography in a few weeks.
A crypto hash function h(x) must provide the following:
Below are some bad hash functions.
def badhash(str):
return len(str)
badhash('hello world!')
12
badhash("this is test")
12
This function achieves the first three objectives: compression, efficiency, and one-way. However, it is not collision resistant. Lots of strings will end up in the same bucket. You want a function that will spread the keys around to different buckets.
Dynamic hash tables can grow over time. The hash table will start with a table size of N, and once N/2 items have been inserted, the table will expand to 2N. Doing so insures that the table does not fill up. Of course this technique will not be effective if the hash function throws every key in the same handful of buckets.
Below is another bad function. This one sums the ASCII values of the characters in the string.
def badhash2(str):
result = 0
for c in str:
result += ord(c)
return result
ord('a')
97
ord('A')
65
badhash2('hello world!')
1149
badhash2("this is test")
1172
It is a slight improvement over the first hash. However, it still is deplorable.
x = ''.join(sorted('hello world!'))
x
' !dehllloorw'
badhash2(x)
1149
If you have the same characters in a different order, you get the same hash value. Let's try to fix that problem.
def badhash3(str):
result = 0
for c in str:
result += ord(c)
result *= 2
return result
badhash3('hello world!')
845554
badhash3('this is test')
887900
x
' !dehllloorw'
badhash3(x)
406942
By inserting the multiplication step, we have reduced the collision problem.
The djb2 hash function follows this approach of combining addition with multiplication. Note that ((hash << 5) + hash)
is the same as multiplying by 33. It is just faster, since multiplication is typically much slower than
shifts and addition. Here is the C++ code for djb2.
unsigned long hash(unsigned char *str) { unsigned long hash = 5381; int c; while (c = *str++) hash = ((hash << 5) + hash) + c; /* hash * 33 + c */ return hash; }
Use your hash function to implement remove duplicates for strings.
Hint: you want to use the hash table to answer the question: have I seen this character already?
def removedups(string):
pass
hw4a.removedups('abcabcabc')
'abc'
hw4a.removedups('cbacbacba')
'cba'
hw4a.removedups('abcabcabcdef')
'abcdef'
Video: heap sort
In computer science, a heap is a specialized tree-based data structure which is essentially an almost complete tree that satisfies the heap property: in a max heap, for any given node C, if P is a parent node of C, then the key (the value) of P is greater than or equal to the key of C. In a min heap, the key of P is less than or equal to the key of C. The node at the "top" of the heap (with no parents) is called the root node.
Below is a max heap.
We use the python heapq algorithm for a min heap.
from heapq import *
heap = []
data = [1,3,5,7,9,2,4,6,8,0]
for item in data:
heappush(heap, item)
heap
[0, 1, 2, 6, 3, 5, 4, 7, 8, 9]
ordered = []
def h1():
while heap:
ordered.append(heappop(heap))
ordered
[]
h1()
ordered
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
data
[1, 3, 5, 7, 9, 2, 4, 6, 8, 0]
heapify(data)
data
[0, 1, 2, 6, 3, 5, 4, 7, 8, 9]
data.sort()
data
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
data == ordered
True
Reading: Skiena pages 109-115
Implement a min heap per the description in Skiena.
class heap:
def __init__(self, size = 10):
pass
def __str__(self):
pass
def __repr__(self):
pass
def isempty(self):
pass
def insert(self,item):
''' add a new element to the heap and adjust as needed '''
pass
def bubbleup(self, n):
''' This could be tricky. I am defining it for you. '''
if heap.parent(n) == -1:
return
if self.data[heap.parent(n)] > self.data[n]:
self.data[n],self.data[heap.parent(n)] = self.data[heap.parent(n)],self.data[n]
self.bubbleup(heap.parent(n))
def extractmin(self):
''' remove the smallest element and adjust the heap '''
pass
def bubbleDown(self,p):
''' This could be tricky. I am defining it for you. '''
c = self.child(p)
min_index = p
for i in [0, 1]:
if ((c + i) <= self.count):
if self.data[min_index] > self.data[c + i]:
min_index = c+i
if min_index != p:
self.data[p], self.data[min_index] = self.data[min_index], self.data[p]
self.bubbleDown(min_index)
@staticmethod
def parent(n):
''' I define this for you. '''
if (n == 1):
return (-1)
else:
return int(n/2)
@staticmethod
def child(n):
''' I define this for you. '''
return (2 * n)
def __iter__(self):
''' define the iterator for heap. Used in for or list comprehension'''
pass
def __eq__(self, other):
''' overload equality operator'''
pass
def copy(self):
''' copy constructor - clone the current instance '''
pass
import hw4a
hh = hw4a.heap(10)
hh
heap(10)
str(hh)
'heap( [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] )'
hh.insert(12)
str(hh)
'heap( [0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0] )'
hh.insert(4)
str(hh)
'heap( [0, 4, 12, 0, 0, 0, 0, 0, 0, 0, 0] )'
hh.insert(8)
str(hh)
'heap( [0, 4, 12, 8, 0, 0, 0, 0, 0, 0, 0] )'
hh == hh.copy()
True
hh2 = hw4a.heap(10)
hh2.insert(12)
hh2.insert(4)
hh2.insert(8)
str(hh2)
'heap( [0, 4, 12, 8, 0, 0, 0, 0, 0, 0, 0] )'
hh == hh2
True
hh2.insert(40)
hh == hh2
False
str(hh2)
'heap( [0, 4, 12, 8, 40, 0, 0, 0, 0, 0, 0] )'
4 in hh
True
40 in hh
False
[x for x in hh]
[4, 12, 8]
[x for x in hh2]
[4, 12, 8, 40]
hh.child(1)
2
hh.child(2)
4
hh.parent(4)
2
hh.extractmin()
4
str(hh)
'heap( [0, 8, 12, 0, 0, 0, 0, 0, 0, 0, 0] )'
hh.extractmin()
8
dir(hh)
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'bubbleDown', 'bubbleup', 'child', 'copy', 'count', 'data', 'extractmin', 'insert', 'isempty', 'parent', 'size']
h.count
3
h.size
20
Write a function that takes in a list of positive integers of size n and returns a sorted list containing the n/2 smallest elements. Use a heap.
def smallest(lst = [4,2,5,6,8,11,99,6,77]):
pass
hw4a.smallest()
[2, 4, 5, 6]
hw4a.smallest([])
[]
hw4a.smallest([1])
[]
hw4a.smallest([1,2])
[1]
hw4a.smallest([3,4,5,6,2,3,4,5,6,7,88,22,11,33,22,44])
[2, 3, 3, 4, 4, 5, 5, 6]
## binary search tree
class bst:
def __init__(self, value, parent = None):
self.left = None
self.right = None
self.value = value
self.parent = parent
def __repr__(self):
return "bst({})".format(self.value)
def insert(self, value):
''' no duplicates'''
if self.value == value:
return self
if self.value > value:
if self.left:
return self.left.insert(value)
self.left = bst(value, parent=self)
return self.left
else:
if self.right:
return self.right.insert(value)
self.right = bst(value, parent=self)
return self.right
def preorder(self, indent = 0):
if self.left: self.left.preorder(indent+1)
print ('-' * indent, self)
if self.right: self.right.preorder(indent+1)
def inorder(self, indent = 0):
print ('-' * indent, self)
if self.left: self.left.inorder(indent+1)
if self.right: self.right.inorder(indent+1)
def postorder(self, indent=0):
if self.left: self.left.postorder(indent+1)
if self.right: self.right.postorder(indent+1)
print ('-' * indent, self)
def find(self, value):
if self.value == value:
return self
if self.value > value:
if self.left:
return self.left.find(value)
return False
else:
if self.right:
return self.right.find(value)
return False
def successor(self):
if self.right:
return self.right.min()
if self.parent.left == self:
return self.parent
if self.parent.right == self:
s = self
p = self.parent
while p and p.right and p.right == s:
s = p
p = s.parent
# print (s, p)
return p or False
def min(self):
if self.left:
return self.left.min()
return self
## iterator uses inorder traversal
def __iter__(self):
if self.left:
yield from self.left
yield self.value
if self.right:
yield from self.right
## there is a bug in this code
def dfs(self, value, trace=False):
if self.value == value:
return self
else:
if trace:
print (self)
if self.left:
return self.left.dfs(value, trace)
if self.right:
return self.right.dfs(value, trace)
return False
def height(self):
''' get the height (or depth) of a tree - like earlier hw problem'''
if not self:
return 0
left = right = 0
if self.left:
left = self.left.height()
if self.right:
right = self.right.height()
return 1 + max(left, right)
# predicate to indicate if bst is balanced
def isbalanced(self):
if not self:
return True
left = right = True
hleft = hright = 0
if self.left:
left = self.left.isbalanced()
hleft = self.left.height()
if self.right:
right = self.right.isbalanced()
hright = self.right.height()
return left and right and abs(hleft - hright) <= 1
# convert unbalanced tree to balanced tree
def balance(self):
# create inorder list of nodes
nodes = []
for node in self:
nodes.append(node)
# recursively divide list in half, adding to balanced tree
return self.balanceutil(nodes,0,len(nodes)-1)
def balanceutil(self,nodes,start,end):
if start > end:
return None
mid = (start + end)//2
root = bst(nodes[mid])
root.left = self.balanceutil(nodes,start,mid-1)
root.right = self.balanceutil(nodes,mid+1,end)
return root
bst(15)
x = bst(10)
x.insert(5)
x.insert(7)
x.insert(6)
x.insert(8)
x.insert(9)
x.insert(15)
bst(15)
## graph class
class node:
count = 0
nodelist = []
def __init__(self,name,value):
self.name = name
self.value = value
self.neighbors = []
self.count = node.count
node.count += 1
node.nodelist.append(self)
def __repr__(self):
return "node({}, {})".format(self.name, self.value)
def __str__(self):
return "node({}, {})".format(self.name, self.value)
def addneighbor(self, neighbor):
if neighbor not in self.neighbors:
self.neighbors.append(neighbor)
neighbor.addneighbor(self)
def connected(self):
count = self.connectedaux({}, 0)
return count == node.count
def connectedaux(self, visited, count):
# print (self, count)
if not self in visited:
visited[self] = True
count += 1
for n in self.neighbors:
count = n.connectedaux(visited, count)
return count
def dfs(self, value):
x = self.dfsaux(value, {})
return x
def dfsaux(self, value, visited):
print (self, visited)
if not self in visited:
visited[self] = True
if self.value == value:
print ("***")
return self
for n in self.neighbors:
n.dfsaux(value, visited)
return None
def bfs(self, value):
x = self.bfsaux(value, {}, [])
return x
def bfsaux(self, value, visited, queue):
print (self, visited, queue)
if not self in visited:
visited[self] = True
if self.value == value:
print ("***")
return (self, value)
queue.extend(self.neighbors)
print (":::", queue)
if queue != []:
n = queue.pop(0)
n.bfsaux(value, visited, queue)
return None
def astar(self, value):
pass
n1 = node('n1',1)
n2 = node('n2',2)
n3 = node('n3',3)
n4 = node('n4',4)
n1.addneighbor(n2)
n1.addneighbor(n3)
n3.addneighbor(n4)
n5 = node('n5',5)