import utils import os, copy from utils import * # Similarity parameters. alpha = 4 # Indel penalty. beta = 1 # Extension penalty. # Weight table for matching up genetic symbols "actg...". # The set of 16 symbols has been mapped in the preprocessing # of the data to the 16 characters beginning with ' '. # Masking the characters with 0xF yields indices for this table # in the range 0-15. */ match_weights = [ [ 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 ], [ -1, 3, -1, 2, -1, 2, -1, 1, -1, 2, -1, 1, -1, 1, -1, 0 ], [ -1, -1, 3, 2, -1, -1, 2, 1, -1, -1, 2, 1, -1, -1, 1, 0 ], [ -1, 2, 2, 2, -1, 1, 1, 1, -1, 1, 1, 1, -1, 0, 0, 0 ], [ -1, -1, -1, -1, 3, 2, 2, 1, -1, -1, -1, -1, 2, 1, 1, 0 ], [ -1, 2, -1, 1, 2, 2, 1, 1, -1, 1, -1, 1, 1, 1, 0, 0 ], [ -1, -1, 2, 1, 2, 1, 2, 1, -1, -1, 1, 0, 1, 0, 1, 0 ], [ -1, 1, 1, 1, 1, 1, 1, 1, -1, 0, 0, 0, 0, 0, 0, 0 ], [ -1, -1, -1, -1, -1, -1, -1, -1, 3, 2, 2, 1, 2, 1, 1, 0 ], [ -1, 2, -1, 1, -1, 1, -1, 0, 2, 2, 1, 1, 1, 1, 0, 0 ], [ -1, -1, 2, 1, -1, -1, 1, 0, 2, 1, 2, 1, 1, 0, 1, 0 ], [ -1, 1, 1, 1, -1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0 ], [ -1, -1, -1, -1, 2, 1, 1, 0, 2, 1, 1, 0, 2, 1, 1, 0 ], [ -1, 1, -1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0 ], [ -1, -1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 ], [ -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], ] conv={' ':0, '!':1, '"':2, '#':3, '$':4, '%':5, '&':6, '\'':7, '(':8, ')':9, '*':10, '+':11, ',':12, '-':13, '.':14, '/':15} def char2index(c): return conv[c] def MW(c1, c2): return match_weights[char2index(c1)][char2index(c2)] def similarity(top_side, left_side, maxVal, cols=None, top_edge=None, top_offset=0, left_offset=0): 'Compute similarity matrix for sub-block.' if not cols: maxentry=len(left_side)+1 cols = [[Entry() for i in range(maxentry)] for i in range(2)] for bi in xrange(len(top_side)): b = top_side[bi] # Rotate column buffers. temp = cols[0] cols[0] = cols[1] cols[1] = temp if top_edge: cols[0][0] = top_edge[bi] else: cols[0][0] = utils.Entry() # Compute p,q,d values for current column. # Note we need to refer to two entries in each column, # hence the "++"s in the body of the loop. mwr = match_weights[char2index(b)] # TODO cp=cols[0]; dp=cols[1] ci=0; di=0 for ai in xrange(len(left_side)): a = left_side[ai] d = dp[di].d + mwr[char2index(a)] # TODO if (d < 0): d = 0 di+=1 p = dp[di].d - alpha t = dp[di].p - beta if (p < t): p = t q = cp[ci].d - alpha t = cp[ci].q - beta if (q < t): q = t # Maximize d. if (p > d): d = p if (q > d): d = q ci+=1 cp[ci].d = d cp[ci].p = p cp[ci].q = q # Remember overall max. if (d > maxVal.max): maxVal.max = d maxVal.max_i = ai+left_offset maxVal.max_j = bi+top_offset if top_edge: top_edge[bi] = copy.copy(cp[ci]);