#! /usr/bin/python3.6

import wordsegment
import numpy
import random
import itertools
import binascii 
from Crypto.Util.strxor import strxor


# ********************************************************
# CS 200 HW #7   DUE Wednesday, 12/11/23 at 11:59 pm
#                via gradescope
# ********************************************************
# Note: you will submit three files: hw7.py, encode.sh, and decode.sh
# ********************************************************
#
# ********************************************************
# Name:
# Email address:
# ********************************************************

# This file may be opened in python 3
# Lines beginning with hashes are comments.

# If you are asked to write a procedure, please make sure it has 
# the specified name, and the specified number and order of arguments.  
# The names of the formal arguments need not be the same
# as in the problem specification.

# For each problem, the intended inputs to your procedures
# are specified (for example, "positive integers") 
# and your procedures need not do anything reasonable 
# for other possible inputs.

# You may write auxiliary procedures in addition to
# the requested one(s) -- for each of your auxiliary 
# procedures, please include a comment explaining
# what it does, and giving an example or two.

# You may also use procedures you have written 
# elsewhere in this assignment or previous assignments.
# They only need to be defined once somewhere within this file.

# Reading
# See: http://zoo.cs.yale.edu/classes/cs200/lectures/crypto.html

# ********************************************************
# ** problem 0 ** (1 easy point) 
# Replace the number 0 in the definition below to indicate
# the number of hours you spent doing this assignment
# Decimal numbers (eg, 6.237) are fine.  Exclude time
# spent reading.

def hours():
    return 0


# ********************************************************
# ** problem 1 ** (19 points)

# load the bigrams, unigrams, etc.
S = wordsegment.Segmenter()
S.load()

'''
The Caesar Cypher

This is one of the oldest cyphers.  You simply shift each letter of
the alphabet by the same number.  For example, if N=2, then A is C, B is D, 
... X is Z, Y is A, and Z is B.

Write the following three functions:

cencode(s,n) which encodes string s by shifting n positions.  The
program converts s to lowercase and removes all non-alphabetic
characters.

cdecode(s,n) which decodes a string s that was encoded by cencode(s,n)

Thus, cdecode(cencode(s,n),n) == s

decodecaesar(s) which decodes a string s that was encoded using
cencode(s,n) but you don't know n.  The function should try all
possible decoding, and return those that are plausible English
phrases.  Use the wordsegment module to solve the string segmentation
problem.


Examples:

>>> cencode("this is an example of cencode", 3) => 'wklvlvdqhadpsohrifhqfrgh'
>>> cencode("this is an example of cencode", 20) => 'nbcmcmuhyrugjfyizwyhwixy'
>>> cencode("one, two, three, four", 55) => 'rqhwzrwkuhhirxu'

>>> cdecode(cencode("this is an example of cencode", 3),3) => 'thisisanexampleofcencode'
>>> cdecode(cencode("this is an example of cencode", 20), 20) => 'thisisanexampleofcencode'
>>> cdecode(cencode("one, two, three, four", 55),55) => 'onetwothreefour'

>>> decodecaesar(cencode("one, two, three, four", 55)) => ['one two three four']
>>> decodecaesar(cencode("this is an example of cencode", 20)) => ['this is an example of cen code']
>>> decodecaesar(cencode("now is the time for all good men to come to the aid of their country", 100)) => ['now is the time for all good men to come to the aid of their country']
>>> decodecaesar(cencode("to be or not to be, that is the question", 10)) => ['to be or not to be that is the question']
'''

def cencode(s, n):
    pass

def cdecode(s,n):
    pass

def decodecaesar(s):
    pass


# ********************************************************
# ** problem 2 ** (20 points)

'''
Transposition Cypher

A transposition cypher scrambles the letters of a message.  As discussed
in the text, we place the text in a matrix (using the arrays from the 
numpy module), and exchange the rows and the columns.

Write the following six functions 

- makematrix(s, shape) which creates a numpy array
from a string with the given shape, 

- flatten(array) which flattens a numpy array, converting it into a list

- permute_rows(array, neworder) which permutes the rows of a matrix.

- permute_cols(array, neworder) which permutes the columns of a matrix.

- transencode(s, shape) which converts a string into a matrix of the
  given shape, and random permutes both the rows and columns (using
  random.shuffle()).  It returns a flattened string.

- transdecode(s, shape, limit=4) which takes a string, permuted by
  transencode, and decodes it by brute force, trying all possible
  permutations, checking each one using wordsegment. It returns all
  possible strings with more than limit-1 words

'''

def makematrix(s,shape):
    pass

def flatten(array):
    pass

def permute_rows(arr, neworder):
    pass

def permute_cols(arr, neworder):
    pass

def transencode(s, shape):
    pass

def transdecode(s, shape, limit=4):
    pass

'''
Examples:

>>> x39 = makematrix(x, [3,9])
>>> x93 = makematrix(x, [9,3])
>>> x39
array([['o', 'n', 'e', 't', 'w', 'o', 't', 'h', 'r'],
       ['e', 'e', 'f', 'o', 'u', 'r', 'f', 'i', 'v'],
       ['e', 's', 'i', 'x', 's', 'e', 'v', 'e', 'n']], 
      dtype='<U1')
>>> x93
array([['o', 'n', 'e'],
       ['t', 'w', 'o'],
       ['t', 'h', 'r'],
       ['e', 'e', 'f'],
       ['o', 'u', 'r'],
       ['f', 'i', 'v'],
       ['e', 's', 'i'],
       ['x', 's', 'e'],
       ['v', 'e', 'n']], 
      dtype='<U1')
>>> permute_rows(x39, [2,0,1])
array([['e', 's', 'i', 'x', 's', 'e', 'v', 'e', 'n'],
       ['o', 'n', 'e', 't', 'w', 'o', 't', 'h', 'r'],
       ['e', 'e', 'f', 'o', 'u', 'r', 'f', 'i', 'v']], 
      dtype='<U1')
>>> permute_rows(x39, [1,2,0])
array([['e', 'e', 'f', 'o', 'u', 'r', 'f', 'i', 'v'],
       ['e', 's', 'i', 'x', 's', 'e', 'v', 'e', 'n'],
       ['o', 'n', 'e', 't', 'w', 'o', 't', 'h', 'r']], 
      dtype='<U1')
>>> permute_cols(x93, [2,0,1])
array([['e', 'o', 'n'],
       ['o', 't', 'w'],
       ['r', 't', 'h'],
       ['f', 'e', 'e'],
       ['r', 'o', 'u'],
       ['v', 'f', 'i'],
       ['i', 'e', 's'],
       ['e', 'x', 's'],
       ['n', 'v', 'e']], 
      dtype='<U1')
>>> permute_cols(x93, [1,2,0])
array([['n', 'e', 'o'],
       ['w', 'o', 't'],
       ['h', 'r', 't'],
       ['e', 'f', 'e'],
       ['u', 'r', 'o'],
       ['i', 'v', 'f'],
       ['s', 'i', 'e'],
       ['s', 'e', 'x'],
       ['e', 'n', 'v']], 
      dtype='<U1')

>>> q = 'onetwo'
>>> q1 = transencode(q, [2,3])
>>> q1
'wotneo'
>>> q2 = transdecode(q1, [2,3], 2)
>>> q2
['wot neo', 'wto noe', 'ow teno', 'ot we on', 'two one', 'to woen', 'neo wot', 'noew to', 'eno owt', 'eo not w', 'one two', 'oen to w']

'''

# ********************************************************
# ** problem 3 ** (10 points)

'''
From Stamp book, page 45, no. 15

The following message was encrypted with a double transposition (of
the type discussed in the text) using a matrix of 7 rows and 10
columns. Hint: The first word is "there." The last word is the name of
a European capital.

Decrypt the message and put the result in prob13answer below.

'''

prob13 = 'IAUTMOCSMNIMREBOTNELSTRHEREOAEVMWIHTSEEATMAEOHWHSYCEELTTEOHMUOUFEHTRFT'
prob13answer = ''

# ********************************************************
# ** problem 4 ** (10 points)

'''
XOR Encoding

Most of modern cryptography relies on the XOR function.  It has the
following marvelous property:

plaintext XOR key = code
code XOR key = plaintext

Thus, if someone knows the key, they can decode the text.  Absent the
key, an intruder is pretty much out of luck.

Write the following two functions:

- encodexorbase64(string, key) which takes a string and key, each in
  byte (b') format, and does the following:

   * converts each to strings of equal length (if the string is
   shorter than the key, the key is truncated; if the key is shorter
   than the string, it is replicated to be of equal length)

   * converts those strings to base64 encoding (use binascii module)

   * xor's the base64 strings (use strxor from Crypto)

   * returns a tuple with the xor'd string and the base64 encoded key 


- decodexorbase64(string, key) which takes the output of the previous
  function (xor'd string and base64 encoded key) and returns the
  original string, in b' format.
'''

def encodexorbase64(string, key):
    pass

def decodexorbase64(code, keyb64):
    pass

'''
Examples:

>>> str = b'string'
>>> key = b'kermit'
>>> code = encodexorbase64(str, key)
>>> code
(b'\x02\x01\x04\x00\x03\x00Y^\x00', b'a2VybWl0\n')
>>> decodexorbase64(code[0], code[1])
b'string'
>>> str2 = b'stringstringmore'
>>> code2 = encodexorbase64(str2, key)
>>> code2
(b'\x02\x01\x04\x00\x03\x00Y^\x02\x01\x04\x00\x03\x00Y^\x03eo\x008\x00\x00\x00\x00', b'a2VybWl0a2VybWl0a2VybQ==\n')
>>> decodexorbase64(code2[0], code2[1])
b'stringstringmore'


'''


# ********************************************************
# ** problem 5 ** (20 points)

'''
In the directory, /c/cs200/hws/hw7, there is a file

encode.sh

which is the skeleton of a bash shell script.  It uses the UNIX
openssl command to encode files given as command line arguments.  It
uses the "enc -e" option, along with one of the following encoding
schemes, chosen at random (using $RANDOM)

aes-128-cbc aes-128-ecb des des3 des-cbc rc5 idea

The password for the encryption is in the file password.  (It is
"YELLOW SUBMARINE")

Here is an example output:

bash-4.4$ ./encode.sh poem*
ARGS: poem1 poem2 poem3 poem4
encoding: poem1 as poem1.4.out
encoding: poem2 as poem2.5.out
encoding: poem3 as poem3.0.out
encoding: poem4 as poem4.4.out

That is, each file, poem1, poem2, etc., is encoded to a respective
output file.  The numbers are the index into the array of encoding
schemes.  That is, 0 is aes-128-cbc, 6 is idea.

If we run it again, we get different results.

bash-4.4$ ./encode.sh poem*
ARGS: poem1 poem2 poem3 poem4
encoding: poem1 as poem1.3.out
encoding: poem2 as poem2.6.out
encoding: poem3 as poem3.4.out
encoding: poem4 as poem4.4.out
bash-4.4$ 

Write the bash shell script encode.sh which produces these results.
'''

# ********************************************************
# ** problem 6 ** (20 points)

'''
In the directory, /c/cs200/hws/hw7, there is a file

decode.sh

which is the skeleton of a bash shell script.  It uses the UNIX
openssl command to decode files given as command line arguments.  It
uses the "enc -d" option, along with one of the following encoding
schemes:

aes-128-cbc aes-128-ecb des des3 des-cbc rc5 idea

The password for the encryption is in the file password.  (It is
"YELLOW SUBMARINE")

Since it does not know how the file was encoded, it will try ALL of the
schemes, and delete the output files for those that did not work.  That is, if
openssl enc -d fails, it will set the status variable ($?) to 1.

Error output should be redirected to /dev/null

Here is an example output:

bash-4.4$ ./decode.sh poem1.3.out
ARGS: poem1.3.out
decoding: poem1.3.out
decoding with aes-128-cbc 0
status: 1 . Deleting file: poem1.3.out.0
decoding with aes-128-ecb 1
status: 1 . Deleting file: poem1.3.out.1
decoding with des 2
status: 1 . Deleting file: poem1.3.out.2
decoding with des3 3
status: 0 . Saving file: poem1.3.out.3
decoding with des-cbc 4
status: 1 . Deleting file: poem1.3.out.4
decoding with rc5 5
status: 1 . Deleting file: poem1.3.out.5
decoding with idea 6
status: 1 . Deleting file: poem1.3.out.6
bash-4.4$ diff poem1 poem1.3.out.3 
bash-4.4$ 

Note that the decoded file is the same as the original
poem1 == poem1.3.out.3

Write the bash shell script decode.sh which produces these results.

'''


# ********************************************************
# ********************************************************

### test function from google python course
### =======================================
# Provided simple test() function used in main() to print
# what each function returns vs. what it's supposed to return.
def test(got, expected):
    if (hasattr(expected, '__call__')):
        OK = expected(got)
    else:
        OK = (got == expected)
    if OK:
        prefix = ' OK '
    else:
        prefix = '  X '
    print ('%s got: %s expected: %s' % (prefix, repr(got), repr(expected)))


# Provided main() calls the above functions with interesting inputs,
# using test() to check if each result is correct or not.
def main():
  print ('hours')
  print('# is it greater than 0?')
  test(hours(), lambda x: x > 0)

  print ('cencode')
  test(cencode("this is an example of cencode", 3), 'wklvlvdqhadpsohrifhqfrgh')
  test(cencode("this is an example of cencode", 20), 'nbcmcmuhyrugjfyizwyhwixy')
  test(cencode("one, two, three, four", 55), 'rqhwzrwkuhhirxu')

  print ('cdecode')
  test(cdecode(cencode("this is an example of cencode", 3),3),'thisisanexampleofcencode')
  test(cdecode(cencode("this is an example of cencode", 20), 20), 'thisisanexampleofcencode')
  test(cdecode(cencode("one, two, three, four", 55),55), 'onetwothreefour')

  print ('decodecaesar')  
  test(decodecaesar(cencode("one, two, three, four", 55)), ['one two three four'])
  test(decodecaesar(cencode("this is an example of cencode", 20)), ['this is an example of cen code'])
  test(decodecaesar(cencode("now is the time for all good men to come to the aid of their country", 100)), ['now is the time for all good men to come to the aid of their country'])
  test(decodecaesar(cencode("to be or not to be, that is the question", 10)), ['to be or not to be that is the question'])

  print ('makematrix + flatten')  
  x = 'onetwothreefourfivesixseven'
  x39 = makematrix(x, [3,9])
  x93 = makematrix(x, [9,3])
  
  test(flatten(makematrix(x, [9,3])), flatten(numpy.array([['o', 'n', 'e'],
       ['t', 'w', 'o'],
       ['t', 'h', 'r'],
       ['e', 'e', 'f'],
       ['o', 'u', 'r'],
       ['f', 'i', 'v'],
       ['e', 's', 'i'],
       ['x', 's', 'e'],
       ['v', 'e', 'n']], 
      dtype='<U1')))

  print ('permute_rows')  
  test(flatten(permute_rows(x39, [2,0,1])), 
       flatten(numpy.array([['e', 's', 'i', 'x', 's', 'e', 'v', 'e', 'n'],
       ['o', 'n', 'e', 't', 'w', 'o', 't', 'h', 'r'],
       ['e', 'e', 'f', 'o', 'u', 'r', 'f', 'i', 'v']], 
      dtype='<U1')))

  test(flatten(permute_rows(x39, [1,2,0])), flatten(numpy.array([['e', 'e', 'f', 'o', 'u', 'r', 'f', 'i', 'v'],
       ['e', 's', 'i', 'x', 's', 'e', 'v', 'e', 'n'],
       ['o', 'n', 'e', 't', 'w', 'o', 't', 'h', 'r']], 
      dtype='<U1')))

  print ('permute_cols')  
  test(flatten(permute_cols(x93, [2,0,1])), flatten(numpy.array([['e', 'o', 'n'],
       ['o', 't', 'w'],
       ['r', 't', 'h'],
       ['f', 'e', 'e'],
       ['r', 'o', 'u'],
       ['v', 'f', 'i'],
       ['i', 'e', 's'],
       ['e', 'x', 's'],
       ['n', 'v', 'e']], 
      dtype='<U1')))

  test(flatten(permute_cols(x93, [1,2,0])), flatten(numpy.array([['n', 'e', 'o'],
       ['w', 'o', 't'],
       ['h', 'r', 't'],
       ['e', 'f', 'e'],
       ['u', 'r', 'o'],
       ['i', 'v', 'f'],
       ['s', 'i', 'e'],
       ['s', 'e', 'x'],
       ['e', 'n', 'v']], 
      dtype='<U1')))
  

  print ('transencode + transdecode')  
  q = 'onetwo'
  test(transencode(q, [2,3]), 'wotneo')
  q1 = 'wotneo'
  print ('(may not match since random.shuffle is, well, random)')
  
  test(transdecode(q1, [2,3], 2), ['wot neo', 'wto noe', 'ow teno', 'ot we on', 'two one', 'to woen', 'neo wot', 'noew to', 'eno owt', 'eo not w', 'one two', 'oen to w'])
   
  print ('encodexorbase64 + decodexorbase64')

  str = b'string'
  key = b'kermit'
  code = encodexorbase64(str, key)
  test(code, (b'\x02\x01\x04\x00\x03\x00Y^\x00', b'a2VybWl0\n'))
  test(decodexorbase64(code[0], code[1]), b'string')
  str2 = b'stringstringmore'
  code2 = encodexorbase64(str2, key)
  test(code2, (b'\x02\x01\x04\x00\x03\x00Y^\x02\x01\x04\x00\x03\x00Y^\x03eo\x008\x00\x00\x00\x00', b'a2VybWl0a2VybWl0a2VybQ==\n'))
  test(decodexorbase64(code2[0], code2[1]), b'stringstringmore')


if __name__ == '__main__':
     main()