#!/usr/bin/env python

# Copyright (C) 2009 Cornell University
# All rights reserved

from __future__ import with_statement

import sys
import socket
import cPickle
import StringIO
import random
import hashlib
import Image
import os.path
from Crypto.Cipher import DES
from Crypto.Cipher import XOR

KEY_LEN = 8
HASH_LEN = 20
SALT_LEN = 20

alpha = 10**5
receive = 1024 * 1024

indexserver_addr = ('127.0.0.1', 7000)
contentserver_addr = ('127.0.0.1', 7001)
captcha_addr = ('127.0.0.1', 7002)

def dohash(val, num):
    result = val
    for x in xrange(num):
        result = hashlib.sha1(result).digest()
    return result

def randombytes(bytes):
    result = ''
    for x in xrange(bytes):
        result += chr(random.randint(0, 255))
    return result

def solvecaptcha(captcha):
    io = StringIO.StringIO(captcha)
    Image.open(io).show()
    return raw_input('Captcha solution: ')
                                                        
def encrypt(val, key):
    e = DES.new(key, DES.MODE_ECB)
    padbytes = 8 - (len(val) % 8)
    val += ' ' * (padbytes - 1) + chr(padbytes)  # pad to 8 bytes and store number of pad bytes in the last byte
    return e.encrypt(val)

def decrypt(cipher, key):
    e = DES.new(key, DES.MODE_ECB)
    val = e.decrypt(cipher)
    padbytes = ord(val[-1])
    return val[:-padbytes]  # strip padding

def sendtoserver(data, addr):
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    s.connect(addr)
    s.sendall(data)
    s.shutdown(socket.SHUT_WR)
    resp = ""
    while True:
        data = s.recv(receive)
        if not data:
            break
        resp += data
    s.close()
    return resp

def fetchcaptchas(num):
    return cPickle.loads(sendtoserver(cPickle.dumps(num), captcha_addr))

def addtoindexserver(alphas, captchas, captchasalts, bindhashes):
    sendtoserver(cPickle.dumps(('put', tuple(alphas), zip(tuple(captchas), tuple(captchasalts), tuple(bindhashes)))), indexserver_addr)

def addtocontentserver(cipher, bvals, captchasols):
    sendtoserver(cPickle.dumps(('multiput', tuple(captchasols), (cipher, tuple(bvals)))), contentserver_addr)

def lookupindexserver(alphas):
    return cPickle.loads(sendtoserver(cPickle.dumps(('get', tuple(alphas))), indexserver_addr))

def lookupcontentserver(captchasols):
    return cPickle.loads(sendtoserver(cPickle.dumps(('get', tuple(captchasols))), contentserver_addr))

def add(filename, keywords):
    captchas = fetchcaptchas(len(keywords))
    captchasalts = []  # so collisions in the captcha solutions don't cause key collisions
    for c in captchas:
        captchasalts.append(randombytes(SALT_LEN))
    alphas = []
    bindhashes = []
    for k, (cap, sign) in zip(keywords, captchas):
        bindhashes.append(dohash(k + cap, alpha))
        alphas.append(dohash(k, alpha))
    key = randombytes(KEY_LEN)
    with open(filename) as fin:
        content = fin.read()
    addtoindexserver(alphas, captchas, captchasalts, bindhashes)
    realcaptchas = lookupindexserver(alphas)
    captchasols = []
    captchahashes = []
    bvals = []
    for k, ((cap, sign), capsalt, bindhash) in zip(keywords, realcaptchas):
        if bindhash != dohash(k + cap, alpha):
            print 'binding hash error: bad captcha returned from index server'
            sys.exit(1)
        alpha_1 = dohash(k + cap, alpha)
        bvals.append(XOR.new(key).encrypt(alpha_1)[:KEY_LEN])
        sol = solvecaptcha(cap)
        captchasols.append(dohash(sol + capsalt, 1))
        captchahashes.append(dohash(cap, 1))
    cipher = encrypt(chr(len(captchahashes)) + ''.join(captchahashes) + content, key)
    addtocontentserver(cipher, bvals, captchasols)

def search(savedir, keywords):
    alphas = []
    for k in keywords:
        alphas.append(dohash(k, alpha))
    captchas = lookupindexserver(alphas)
    captchasols = []
    for k, ((cap, sign), capsalt, bindhash) in zip(keywords, captchas):
        if bindhash != dohash(k + cap, alpha):
            print 'binding hash error: bad captcha returned from index server'
            sys.exit(1)
        sol = solvecaptcha(cap)
        captchasols.append(dohash(sol + capsalt, 1))
    results = lookupcontentserver(captchasols)
    if len(results) > 0:
        alpha_1 = dohash(keywords[0] + captchas[0][0][0], alpha)
    for i, r in enumerate(results):
        done = False
        cipher, bvals = r
        for b in bvals:
            key = XOR.new(alpha_1).encrypt(b)[:KEY_LEN]
            content = decrypt(cipher, key)
            numhashes = ord(content[0])
            content = content[1:]
            captchahashes = set()
            for x in xrange(numhashes):
                captchahashes.add(content[:HASH_LEN])
                content = content[HASH_LEN:]
            if dohash(captchas[0][0][0], 1) in captchahashes:
                with open(os.path.join(savedir, 'result' + str(i)), 'w') as fout:
                    fout.write(content)
                done = True
                break
        if not done:
            print 'error decrypting search result'
            sys.exit(1)

def usage():
    print 'Usage: %s < upload | search > parameters' % sys.argv[0]

def usageadd():
    print 'Usage: %s upload filename keyword1 [ keyword2 [ keyword3 [ ... ] ] ]' % sys.argv[0]

def usagesearch():
    print 'Usage: %s search savedir keyword1 [ keyword2 [ keyword3 [ ... ] ] ]' % sys.argv[0]

def parsecmd():
    if len(sys.argv) < 2:
        usage()
        sys.exit(2)
    op = sys.argv[1]
    if op not in ('upload', 'search'):
        usage()
        sys.exit(2)
    if len(sys.argv) < 4:
        if op == 'upload':
            usageadd()
        else:
            usagesearch()
        sys.exit(2)
    filename = sys.argv[2]
    keywords = sys.argv[3:]
    return (op, filename, keywords)

def main():
    op, filename, keywords = parsecmd()
    if op == 'upload':
        add(filename, keywords)
    else:
        search(filename, keywords)

if __name__ == '__main__':
    main()
