#!/usr/bin/python
import mechanize
import sys
import csv
import time

# Parse the response from IBDB, returning the category with the most "votes"
# (i.e. occurrences)
def read_response(resp):
    categories = ["musical", "play", "special"]
    
    resp = resp.lower()
    
    cat_i = [resp.count(c) for c in categories]
    cat_i[0] -= 1 # Net out base rate occurrences in the page
    cat_i[1] -= 8
    
    sys.stderr.write(str(cat_i) + "\n")

    if (max(cat_i) > 0):
        cat = categories[cat_i.index(max(cat_i))]
    else:
        cat = "NA"

    return cat

in_fn = sys.argv[1] # List of shows
out_fn = sys.argv[2] # Output file

# Initialize a browser that looks like Firefox
br = mechanize.Browser()
br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US;' +\
                  'rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9' +\
                  'Firefox/3.0.1')]

# Read in the list of shows
shows = {}
show_data = {}
f = open(in_fn, "rU")
reader = csv.reader(f)

for r in reader:
    s = r[0]
    shows[s] = 1

show = shows.keys()

# Query each show
i = 0
for s in shows:
    sys.stderr.write(s + ": ")
    
    # Search for that show's name, and then parse the result
    search_page = br.open("http://www.ibdb.com/advSearchShows.php")
    br.select_form(nr=0)
    br.form["ShowProperName"] = s
    br.submit()
    show_data[s] = read_response(br.response().read())

    i += 1
    time.sleep(1) # To avoid overloading the IBDB server


# Write the output
f = open(out_fn, "wb")
writer = csv.writer(f)

writer.writerow(["Show", "Category"])
for s in show_data.keys():
    writer.writerow([s, show_data[s]])
    print ",".join([s, show_data[s]])
    sys.std.out.flush()

f.close()