#!/usr/bin/python # You may need to change this to /usr/local/bin/python program_name = 'xpm2frameshift' # Name: # xpm2frameshift # Version: # v0.2, 2007-8-03 # Author: # Andrew Jewett, Shea Group, University of California at Santa Barbara # License: # GNU LGPL and GPL (General Public Licence) # Comments: # Be forgiving. It's my first python program # # Description below: docs = 'syntax (example):\n' \ + '\n' \ + ' '+program_name+' hbond.ndx groups1.ndx groups2.ndx [DA,AD] < hbmap.xpm\n' \ + '\n' \ + 'Description:\n' \ + '\n' \ + ' This program reads .xpm files generated by \"g_hbond -hbm ...\".\n' \ + ' for a system of two polymers and determines the \"frame-shift\" between\n'\ + ' them, by examining the the hydrogen bond pattern.\n' \ + ' This .xpm file should correspond to a trajectory of two biopolymers\n' \ + ' interacting with eachother. For each frame in the trajectory,\n' \ + ' '+program_name+' calclates which residues are hydrogen-bonded together.\n' \ + ' From this information, it calculates the average value of (i-j) where\n' \ + ' i and j denote the location in the sequence of two residues from\n'\ + ' either molecule which are hydrogen bonded together, averaged over all\n'\ + ' of the hydrogen bonds connecting the two molecules.\n' \ + ' As an input this program requires a pair of files (\"groups1.ndx\" and \n'\ + ' \"groups2.ndx\" in this example), containing a list of which atoms beloning\n' \ + ' to each of the residues from either molecule. (One group per residue.\n'\ + ' The two files correspond to the two molecules.)\n' \ + '\n' \ + 'Input:\n' \ + '\n' \ + ' hbmap.xpm An .xpm file created by \"g_hbond -hbm\" that you want to analyze\n' \ + ' (See documentation for g_hbond.)\n' \ + '\n' \ + 'Arguments:\n' \ + '\n' \ + ' hbond.ndx The ndx file created by \"g_hbond -hbn ...\"\n' \ + ' groups1.ndx The number of hydrogen bonds is counted between every pair\n' \ + ' groups2.ndx of groups in these two index files: groups1.ndx & groups2.ndx.\n' \ + ' DA or AD The DA/AD argument is optional (see below).\n' \ + '\n' \ + ' If the optional 4th argument \"DA\" is specified, then we will\n' \ + ' only consider donor atoms from the first group (in groups1.ndx)\n' \ + ' and acceptor atoms from the second group (in groups2.ndx)\n' \ + ' (The reverse is true for AD.)\n' \ + '\n'\ + 'Performance Issues:\n' \ + '\n'\ + ' This program can consume lots of memory, due to the order that data is\n' \ + ' saved to the xpm file, especially for long trajectories. If the program\n'\ + ' is performing very slowly (running out of memory), you can get around the\n'\ + ' problem by running ghbond multiple times using the \"-b\" and \"-e\" flags,\n'+ ' in order to generate multiple smaller .xpm files. You can run' \ +program_name+'\n'\ + ' multiple times and concatenate the results together.\n' def TotNumBonds(numbonds): tot_num_bonds = 0 for i in range(0,len(numbonds)): for j in range(0,len(numbonds[0])): tot_num_bonds += numbonds[i][j] return tot_num_bonds def FrameShift(numbonds): tot_num_bonds = 0 frame_shift = 0. for i in range(0,len(numbonds)): for j in range(0,len(numbonds[0])): frame_shift += (i-j)*numbonds[i][j] tot_num_bonds += numbonds[i][j] if tot_num_bonds == 0: return 0. frame_shift /= tot_num_bonds return frame_shift # Read_hbond2atompairs(file) # Read in the "hbond.ndx" file generated by "g_hbond -hbn hbond.ndx": # Convert this information into a dictionary of tuples. # (A simple list of tuples would also work, and be simpler. Oh well.) # Later, when we read in the body of the .xpm file, this lookup table # will tell how the position in the .xpm file tells us # which pair of (heavy) atoms are connected by the hydrogen bond? def Read_hbond2atompairs(file): missing = -1 lines = file.readlines() hb2ap = [] # <-which (DA) pairs of atoms correspond to each h-bond? redundant = [] # <-if we have read that pair already indicate that here # (To avoid counting the same donor/acceptor pair twice) pairs = {} # <-used to keep track of which pairs we have read in so far for i in range(len(lines)-1, 0, -1): if (lines[i].find('[') == -1): tokens = lines[i].split() if len(tokens) == 3: [D, H, A] = lines[i].split() hb2ap.append((int(D), int(A))) if pairs.get((int(D),int(A)),missing)==missing: redundant.append(False) pairs[(int(D),int(A))] = True else: redundant.append(True) elif len(tokens) > 0: print '\n ---- Format error: ----\n' \ 'Where: the .ndx file that should have been created by \"g_hbond -hbn\"\n' \ 'Explanation: Most likely, you have specified your files in the wrong order.\n\n' \ ' ---- error details: ---- \n' \ 'You have the wrong number of entries on line ' \ +str(i+1)+'\n'+'of the .ndx file.\n' 'This file is supposed to contain h-bond triplets (DHA) in the last group.\n' \ 'Consequently, the last group should contain 3 atoms per line.' sys.exit(-2) else: break # We started this loop at the end of the list and read it in in reverse # order. I would think we need to reverse it again: Consequently: #hb2ap.reverse() # <-- oops. NO! #redundant.reverse() # <-- oops. no #COMMENTING PREVIOUS LINES: Don't reverse this list! g_hbond apparently # creates the hbond.ndx list in the OPPOSITE order as the hbmap.xpm # ..So just leave hb2ap the way it is (reversed). return hb2ap,redundant # <-- Return a list of pairs of atoms, hb2ap # The caller is warned if a pair is redundant # Scan through the .xpm file to find the dimensions of the map to find: # The number of save points in the trajectory (= number of columns), and # the number of possible h-bonds (= number of rows). # Note: Table must be pre-allocated. def ReadTableDimensions(file): for line in file: if line.find('*') == -1: dimensions = line.replace('\"', "").split() return int(dimensions[0]), int(dimensions[1]) # TranposeTable() reads in all the lines of a file. After removing the quotes # the remaining data is assumed to be a 2-dimensional array of characters. # We copy this two dimensional array of characters to a table (a 2-D list) # however we transpose it as we copy it. # (Why do we transpose the data? # Because g_hbond saves creates a .xpm file with the data saved # in a way which the array in a strange format where the data from # different times, appears in different COLUMNS (not rows). # This is the opposite order that we need to access the data, # so we transpose it now.) def TransposeTable(file, table): j = 0 for line in file: if (line.find('*') == -1) and (line.find('"') != -1): line_no_quotes = line.replace('\"','') # get rid of " for i in range(0,len(line_no_quotes)): c = line_no_quotes[i] if ((c != '\n') and (c != ',')): #print 'i,j,\'c\' = '+str(i)+','+str(j)+'\''+c+'\'' table[i][j] = c #copy & transpose j += 1 # Read the list of atoms belonging to every group # (By "group", I mean the groups that the user wants us to keep track of when # we count the number of h-bonds between groups. These two lists of groups are # indicated by the 2nd and 3rd command line arguments to the main program. def Read_atoms2groups(file): a2g = {} g_count = bracket_count = 0 for line in file: if (line.find('[') == -1): for a in line.split(): a2g[int(a)] = g_count else: bracket_count += 1 if ((len(a2g) != 0) or (bracket_count > 1)): g_count+=1 return a2g, g_count+1 # return the lookup table & number of groups def PrintBondTable(numbonds): for g1 in range(0,len(numbonds)): for g2 in range(0,len(numbonds[g1])): sys.stdout.write(str(numbonds[g1][g2])) if g2+1 < len(numbonds[g1]): sys.stdout.write(' ') else: sys.stdout.write('\n') sys.stdout.write('\n') # main program: import sys if (len(sys.argv) < 4) or (len(sys.argv) > 5): print '\nError:\n Expected at least 3 arguments.\n\n' + docs sys.exit(-1) hb2ap_filename = sys.argv[1] a2g_filename1 = sys.argv[2] a2g_filename2 = sys.argv[3] g1donors_g2acceptors = False g1acceptors_g2donors = False if len(sys.argv) > 4: if sys.argv[4] == 'DA': g1donors_g2acceptors = True elif sys.argv[4] == 'AD': g1acceptors_g2donors = True hb2ap,redundant = Read_hbond2atompairs(open(hb2ap_filename,'r')) a2g1,numgroups1 = Read_atoms2groups(open(a2g_filename1,'r')) a2g2,numgroups2 = Read_atoms2groups(open(a2g_filename2,'r')) #print hb2ap #print redundant #print a2g1 #print a2g2 # Read in the dimensions of the data in the .xpm file # This includes the number of bonds, and the duration of the simulation. # This should agree with the number of rows and columns in the .xpm file. (sim_duration, numbonds_hb2ap) = ReadTableDimensions(sys.stdin) # Now that we know how big it is, we create a large array # to store the data in the .xpm file lines = [[' ' for j in range(0,numbonds_hb2ap)] for i in range(0,sim_duration)] TransposeTable(sys.stdin, lines) if numbonds_hb2ap != len(hb2ap): print '\nError: the number of hbonds [='+str(numbonds_hb2ap)+'] (Donor-Hydrogen-Acceptor triplets)\n'+\ ' from the file \"'+hb2ap_filename+'\", (created by running \"g_hbond -hbn\")\n'+\ ' does not match the number of rows of data contained in the\n'+\ ' .xpm file [='+str(len(hb2ap))+'] (created by running \"g_hbond -hbm\"). Assuming you\n'+\ ' did not make a mistake entering the filenames, you are probably using\n'+\ ' buggy version of g_hbond. The version of g_hbond which ships with\n'+\ ' gromacs 3.3.1 has this bug. (But it looks like they fixed it in the\n'+\ ' CVS version.) You need to obtain a newer version of g_hbond,\n'+\ ' and/or download and compile the CVS version.\n'+\ ' Then you must use it to regenerate your .xpm and .ndx files.\n' sys.exit(-3) # Our goal: For each moment in time during the simulation, # we want to calculate the following: # numbondsDA[g1][g2] # counts the number of times a donor from group g1 bonds to an acceptor from g2 # numbondsAD[g1][g2] # counts the number of times an acceptor from group g1 bonds to a donor from g2 # numbonds[g1][g2] # counts the total number of hydrogen bonds between groups g1 and g2 # that is: # numbonds[g1][g2] = numbondsDA[g1][g2] + numbondsAD[g1][g2] missing = -1 for line in lines: # preallocate the bond-counter tables to size: numgroups1 x numbroups2: numbonds = [[0 for g2 in range(0,numgroups2)] \ for g1 in range(0,numgroups1)] numbondsDA=[[0 for g2 in range(0,numgroups2)] \ for g1 in range(0,numgroups1)] numbondsAD=[[0 for g2 in range(0,numgroups2)] \ for g1 in range(0,numgroups1)] # Loop over all of the possible bonds at this moment in time for b in range(0,len(hb2ap)): # If the bond exists,update the relevant bond counters: if ((line[b] == 'o') and (not redundant[b])): D,A = hb2ap[b] # lookup the donor and acceptor atoms g1=a2g1.get(D,missing)#ifD belongs to a group in g1.ndx g2=a2g2.get(A,missing)#& A belongs to a group in g2.ndx #print str(D)+','+str(A)+'{g'+str(g1+1)+',g'+str(g2+1) if ((g1!=missing) and (g2!=missing)):# then add 1 to numbondsDA[g1][g2] += 1 # the relevant numbonds[g1][g2] += 1 # bond counters g1=a2g1.get(A,missing)#ifA belongs to a group in g1.ndx g2=a2g2.get(D,missing)#& D belongs to a group in g2.ndx #print ' and {g'+str(g1+1)+',g'+str(g2+1)+'}' if ((g1!=missing) and (g2!=missing)):# then add 1 to numbondsAD[g1][g2] +=1 # the relevant numbonds[g1][g2] +=1 # bond counters if g1donors_g2acceptors: #print ' -----DA:---- ' print str(FrameShift(numbondsDA)) elif g1acceptors_g2donors: #print ' -AD:- ' print str(FrameShift(numbondsAD)) else: #print ' -total:- ' print str(FrameShift(numbonds))