#!/usr/bin/python
# You may need to change this to /usr/local/bin/python

program_name = 'xpm2frameshift'

# Name: 
#    xpm2frameshift
# Version:
#    v0.2,  2007-8-03
# Author:
#    Andrew Jewett, Shea Group, University of California at Santa Barbara
# License:
#    GNU LGPL and GPL (General Public Licence)
# Comments:
#    Be forgiving.  It's my first python program
#
# Description below:

docs = 'syntax (example):\n' \
+ '\n' \
+ '   '+program_name+' hbond.ndx groups1.ndx groups2.ndx [DA,AD] < hbmap.xpm\n' \
+ '\n' \
+ 'Description:\n' \
+ '\n' \
+ ' This program reads .xpm files generated by \"g_hbond -hbm ...\".\n' \
+ ' for a system of two polymers and determines the \"frame-shift\" between\n'\
+ ' them, by examining the the hydrogen bond pattern.\n' \
+ ' This .xpm file should correspond to a trajectory of two biopolymers\n' \
+ ' interacting with eachother.  For each frame in the trajectory,\n' \
+ ' '+program_name+' calclates which residues are hydrogen-bonded together.\n' \
+ ' From this information, it calculates the average value of (i-j) where\n' \
+ ' i and j denote the location in the sequence of two residues from\n'\
+ ' either molecule which are hydrogen bonded together, averaged over all\n'\
+ ' of the hydrogen bonds connecting the two molecules.\n' \
+ '    As an input this program requires a pair of files (\"groups1.ndx\" and \n'\
+ ' \"groups2.ndx\" in this example), containing a list of which atoms beloning\n' \
+ ' to each of the residues from either molecule. (One group per residue.\n'\
+ ' The two files correspond to the two molecules.)\n' \
+ '\n' \
+ 'Input:\n' \
+ '\n' \
+ ' hbmap.xpm  An .xpm file created by \"g_hbond -hbm\" that you want to analyze\n' \
+ '            (See documentation for g_hbond.)\n' \
+ '\n' \
+ 'Arguments:\n' \
+ '\n' \
+ ' hbond.ndx    The ndx file created by \"g_hbond -hbn ...\"\n' \
+ ' groups1.ndx     The number of hydrogen bonds is counted between every pair\n' \
+ ' groups2.ndx     of groups in these two index files: groups1.ndx & groups2.ndx.\n' \
+ ' DA or AD     The DA/AD argument is optional (see below).\n' \
+ '\n' \
+ '        If the optional 4th argument \"DA\" is specified, then we will\n' \
+ '        only consider donor atoms from the first group (in groups1.ndx)\n' \
+ '        and acceptor atoms from the second group (in groups2.ndx)\n' \
+ '        (The reverse is true for AD.)\n' \
+ '\n'\
+ 'Performance Issues:\n' \
+ '\n'\
+ ' This program can consume lots of memory, due to the order that data is\n' \
+ ' saved to the xpm file, especially for long trajectories.  If the program\n'\
+ ' is performing very slowly (running out of memory), you can get around the\n'\
+ ' problem by running ghbond multiple times using the \"-b\" and \"-e\" flags,\n'+ ' in order to generate multiple smaller .xpm files.  You can run' \
+program_name+'\n'\
+ ' multiple times and concatenate the results together.\n'


def TotNumBonds(numbonds):
	tot_num_bonds = 0
	for i in range(0,len(numbonds)):
		for j in range(0,len(numbonds[0])):
			tot_num_bonds += numbonds[i][j]
	return tot_num_bonds

def FrameShift(numbonds):
	tot_num_bonds = 0
	frame_shift = 0.
	for i in range(0,len(numbonds)):
		for j in range(0,len(numbonds[0])):
			frame_shift += (i-j)*numbonds[i][j]
			tot_num_bonds += numbonds[i][j]
	if tot_num_bonds == 0:
		return 0.
	frame_shift /= tot_num_bonds
	return frame_shift


#  Read_hbond2atompairs(file)
# Read in the "hbond.ndx" file generated by "g_hbond -hbn hbond.ndx":
# Convert this information into a dictionary of tuples.
# (A simple list of tuples would also work, and be simpler.  Oh well.)
# Later, when we read in the body of the .xpm file, this lookup table
# will tell how the position in the .xpm file tells us
# which pair of (heavy) atoms are connected by the hydrogen bond?


def Read_hbond2atompairs(file):
	missing = -1
	lines = file.readlines()
	hb2ap = [] # <-which (DA) pairs of atoms correspond to each h-bond?
	redundant = [] # <-if we have read that pair already indicate that here
	               # (To avoid counting the same donor/acceptor pair twice)
	pairs = {} # <-used to keep track of which pairs we have read in so far

	for i in range(len(lines)-1, 0, -1):
		if (lines[i].find('[') == -1):
			tokens = lines[i].split()
			if len(tokens) == 3:
				[D, H, A] = lines[i].split()
				hb2ap.append((int(D), int(A)))
				if pairs.get((int(D),int(A)),missing)==missing:
					redundant.append(False)
					pairs[(int(D),int(A))] = True
				else:
					redundant.append(True)

			elif len(tokens) > 0:
				print '\n ---- Format error: ----\n' \
				'Where: the .ndx file that should have been created by \"g_hbond -hbn\"\n' \
				'Explanation: Most likely, you have specified your files in the wrong order.\n\n' \
				' ---- error details: ---- \n' \
				'You have the wrong number of entries on line ' \
				+str(i+1)+'\n'+'of the .ndx file.\n'
				'This file is supposed to contain h-bond triplets (DHA) in the last group.\n' \
				'Consequently, the last group should contain 3 atoms per line.'
				sys.exit(-2)
		else:
			break

	# We started this loop at the end of the list and read it in in reverse
	# order. I would think we need to reverse it again:  Consequently:
	#hb2ap.reverse()     # <-- oops.  NO!
	#redundant.reverse() # <-- oops.  no
	#COMMENTING PREVIOUS LINES: Don't reverse this list! g_hbond apparently
	#   creates the hbond.ndx list in the OPPOSITE order as the hbmap.xpm
	#   ..So just leave hb2ap the way it is (reversed).

	return hb2ap,redundant  # <-- Return a list of pairs of atoms, hb2ap
	                        #  The caller is warned if a pair is redundant


# Scan through the .xpm file to find the dimensions of the map to find:
#   The number of save points in the trajectory (= number of columns), and
#   the number of possible h-bonds (= number of rows).
#   Note: Table must be pre-allocated.

def ReadTableDimensions(file):
	for line in file:
		if line.find('*') == -1:
			dimensions = line.replace('\"', "").split()
			return int(dimensions[0]), int(dimensions[1])

# TranposeTable() reads in all the lines of a file. After removing the quotes
# the remaining data is assumed to be a 2-dimensional array of characters.
# We copy this two dimensional array of characters to a table (a 2-D list)
# however we transpose it as we copy it.
# (Why do we transpose the data?
#  Because g_hbond saves creates a .xpm file with the data saved
#  in a way which the array in a strange format where the data from 
#  different times, appears in different COLUMNS (not rows).
#  This is the opposite order that we need to access the data, 
#  so we transpose it now.)

def TransposeTable(file, table):
	j = 0
	for line in file:
		if (line.find('*') == -1) and (line.find('"') != -1):
			line_no_quotes = line.replace('\"','') # get rid of "
			for i in range(0,len(line_no_quotes)):
				c = line_no_quotes[i]
				if ((c != '\n') and (c != ',')):
					#print 'i,j,\'c\' = '+str(i)+','+str(j)+'\''+c+'\''
					table[i][j] = c #copy & transpose
			j += 1


# Read the list of atoms belonging to every group
# (By "group", I mean the groups that the user wants us to keep track of when
#  we count the number of h-bonds between groups. These two lists of groups are
#  indicated by the 2nd and 3rd command line arguments to the main program.

def Read_atoms2groups(file):
	a2g = {}
	g_count = bracket_count = 0
	for line in file:
		if (line.find('[') == -1):
			for a in line.split():
				a2g[int(a)] = g_count
		else:
			bracket_count += 1
			if ((len(a2g) != 0) or (bracket_count > 1)):
				g_count+=1
	return a2g, g_count+1 # return the lookup table & number of groups


def PrintBondTable(numbonds):
	for g1 in range(0,len(numbonds)):
		for g2 in range(0,len(numbonds[g1])):
			sys.stdout.write(str(numbonds[g1][g2]))
			if g2+1 < len(numbonds[g1]):
				sys.stdout.write(' ')
			else:
				sys.stdout.write('\n')
	sys.stdout.write('\n')


#  main program:
import sys
if (len(sys.argv) < 4) or (len(sys.argv) > 5):
	print '\nError:\n       Expected at least 3 arguments.\n\n' + docs
	sys.exit(-1)

hb2ap_filename = sys.argv[1]
a2g_filename1 = sys.argv[2]
a2g_filename2 = sys.argv[3]

g1donors_g2acceptors = False
g1acceptors_g2donors = False
if len(sys.argv) > 4:
	if sys.argv[4] == 'DA':
		g1donors_g2acceptors = True
	elif sys.argv[4] == 'AD':
		g1acceptors_g2donors = True

hb2ap,redundant = Read_hbond2atompairs(open(hb2ap_filename,'r'))
a2g1,numgroups1 = Read_atoms2groups(open(a2g_filename1,'r'))
a2g2,numgroups2 = Read_atoms2groups(open(a2g_filename2,'r'))

#print hb2ap
#print redundant
#print a2g1
#print a2g2

# Read in the dimensions of the data in the .xpm file
# This includes the number of bonds, and the duration of the simulation.
# This should agree with the number of rows and columns in the .xpm file.

(sim_duration, numbonds_hb2ap) = ReadTableDimensions(sys.stdin)


# Now that we know how big it is, we create a large array 
# to store the data in the .xpm file

lines = [[' ' for j in range(0,numbonds_hb2ap)] for i in range(0,sim_duration)]

TransposeTable(sys.stdin, lines)

if numbonds_hb2ap != len(hb2ap):
	print '\nError: the number of hbonds [='+str(numbonds_hb2ap)+'] (Donor-Hydrogen-Acceptor triplets)\n'+\
	'       from the file \"'+hb2ap_filename+'\", (created by running \"g_hbond -hbn\")\n'+\
	'       does not match the number of rows of data contained in the\n'+\
	'       .xpm file [='+str(len(hb2ap))+'] (created by running \"g_hbond -hbm\").  Assuming you\n'+\
	'       did not make a mistake entering the filenames, you are probably using\n'+\
	'       buggy version of g_hbond.  The version of g_hbond which ships with\n'+\
	'      gromacs 3.3.1 has this bug.  (But it looks like they fixed it in the\n'+\
	'       CVS version.)  You need to obtain a newer version of g_hbond,\n'+\
	'       and/or download and compile the CVS version.\n'+\
	'       Then you must use it to regenerate your .xpm and .ndx files.\n'
	sys.exit(-3)

# Our goal:  For each moment in time during the simulation,
#            we want to calculate the following:

# numbondsDA[g1][g2] 
# counts the number of times a donor from group g1 bonds to an acceptor from g2

# numbondsAD[g1][g2] 
# counts the number of times an acceptor from group g1 bonds to a donor from g2

# numbonds[g1][g2] 
# counts the total number of hydrogen bonds between groups g1 and g2
#   that is:
#   numbonds[g1][g2] = numbondsDA[g1][g2] + numbondsAD[g1][g2]

missing = -1

for line in lines:
	# preallocate the bond-counter tables to size: numgroups1 x numbroups2:
	numbonds = [[0 for g2 in range(0,numgroups2)] \
	for g1 in range(0,numgroups1)]
	numbondsDA=[[0 for g2 in range(0,numgroups2)] \
	for g1 in range(0,numgroups1)]
	numbondsAD=[[0 for g2 in range(0,numgroups2)] \
	for g1 in range(0,numgroups1)]

	# Loop over all of the possible bonds at this moment in time
	for b in range(0,len(hb2ap)):

		# If the bond exists,update the relevant bond counters:
		if ((line[b] == 'o') and (not redundant[b])):
			D,A = hb2ap[b] # lookup the donor and acceptor atoms
			g1=a2g1.get(D,missing)#ifD belongs to a group in g1.ndx
			g2=a2g2.get(A,missing)#& A belongs to a group in g2.ndx
			#print str(D)+','+str(A)+'{g'+str(g1+1)+',g'+str(g2+1)
			if ((g1!=missing) and (g2!=missing)):# then add 1 to
				numbondsDA[g1][g2] += 1      # the relevant
				numbonds[g1][g2] += 1        # bond counters

			g1=a2g1.get(A,missing)#ifA belongs to a group in g1.ndx
			g2=a2g2.get(D,missing)#& D belongs to a group in g2.ndx
			#print '  and {g'+str(g1+1)+',g'+str(g2+1)+'}'
			if ((g1!=missing) and (g2!=missing)):# then add 1 to
				numbondsAD[g1][g2] +=1       # the relevant
				numbonds[g1][g2] +=1         # bond counters


	if g1donors_g2acceptors:
		#print ' -----DA:---- '
		print str(FrameShift(numbondsDA))
	elif g1acceptors_g2donors:
		#print '     -AD:- '
		print str(FrameShift(numbondsAD))
	else:
		#print '   -total:- '
		print str(FrameShift(numbonds))