#!/usr/bin/python # You may need to change this to /usr/local/bin/python program_name = 'xpm2matrix' # Name: # xpm2matrix # Version: # v0.2, 2007-8-03 # Author: # Andrew Jewett, Shea Group, University of California at Santa Barbara # License: # GNU LGPL and GPL (General Public Licence) # Comments: # Be forgiving. It's my first python program # (I think one of the reasons it is so slow is because the program has to # read in the entire .xpm file and transpose the data before it can begin. # This seems to be where the program is slowing down. However it could also # be because I am using memory inefficienly (making temporary copies). # Regardless, for really long trajectories, or large systems, # the arrays inside the xpm files can be quite large. # I hope the authors of g_hbond will later provide an option to save # the xpm data with the x and y axis exchanged. # That way, I could just read in and process one line at a time.) # # Description below: docs = 'syntax (example):\n' \ + '\n' \ + ' '+program_name+' hbond.ndx groups1.ndx groups2.ndx [DA,AD] < hbmap.xpm > con.mat\n' \ + '\n' \ + 'Description:\n' \ + '\n' \ + ' This program reads .xpm files generated by \"g_hbond -hbm ...\",\n' \ + ' and, for each frame in the trajectory, prints out a 2-dimensional table\n' \ + ' indicating which pairs of atoms (or residues, or groups of atoms)\n' \ + ' are connected together (a connectivity matrix). This table may be easier\n' \ + ' to interpret than the original .xpm file. As an input it requires a pair\n' \ + ' of files (\"groups1.ndx\" and \"groups2.ndx\" in this example) and each file\n' \ + ' contains a list of groups. (Each group typically represents the atoms\n' \ + ' belonging to each amino acid or nucleotide in a molecule.) Since there\n' \ + ' are two lists of groups, you can look at the connectivity matrix between\n'\ + ' two different molecules. The the two lists of groups do not have to\n' \ + ' correspond to residues from different molecules. They can be identical.\n' \ + ' The atoms in each group are not required to be donors or acceptors\n'\ + ' (other atoms will be ignored). It is possible to tell '+program_name+'\n'\ + ' to only consider the donor atoms from one group, and the acceptor atoms\n'\ + ' from another group. (See \"DA\" / \"AD\" below).\n' \ + ' Again, groups do not have to correspond to residues. Of course, there\n' \ + ' can be multiple donors and acceptors in each group, and more than one h-bond\n' \ + ' connecting each pair of groups. For example, one of list of groups\n' \ + ' (say \"groups2.ndx\") might contain only a single group consisting of all\n' \ + ' of the solvent atoms. In that case, for every frame in the trajectory\n' \ + ' '+program_name+' would simply report the number of water molecules bonded to \n' \ + ' every residue/group in the molecule.\n' \ + '\n' \ + 'Output (stored in the \"con.mat\" file in the example above):\n' \ + '\n' \ + ' For each frame in the trajectory used to generate the xpm file, this\n' \ + ' program generates a connectivity matrix (delimited by spaces and newlines):\n' \ + '\n' \ + 'c11 c12 c13 ... c1n\n' \ + 'c21 c22 c23 ... c2n\n' \ + 'c31 c32 c33 ... c3n\n' \ + ' : : : :\n' \ + 'cm1 cm2 cm3 ... cmn\n' \ + '\n' \ + ' where cij = the number of hydrogen bonds connecting group i (from the\n' \ + ' first list of groups) with group j (from the second list)\n' \ + '\n' \ + 'Input:\n' \ + '\n' \ + ' hbmap.xpm An .xpm file created by \"g_hbond -hbm\" that you want to analyze\n' \ + ' (See documentation for g_hbond.)\n' \ + '\n' \ + 'Arguments:\n' \ + '\n' \ + ' hbond.ndx The ndx file created by \"g_hbond -hbn ...\"\n' \ + ' groups1.ndx The number of hydrogen bonds is counted between every pair\n' \ + ' groups2.ndx of groups in these two index files: groups1.ndx & groups2.ndx.\n' \ + ' DA or AD The 4th argument DA / AD is optional.\n' \ + '\n' \ + ' By using the \"DA\" and \"AD\" arguments, the user can limit\n' \ + ' the atoms that the program considers from groups1.ndx\n'\ + ' and groups2.ndx. The DA command line argument causes\n' \ + ' '+program_name+' to ignore all atoms from the first list of\n' \ + ' groups which are not potential donor atoms, and all atoms\n' \ + ' from the second list which are not potential acceptors.\n' \ + ' (The reverse is true for the AD command.)\n' \ + '\n'\ + 'Performance Issues:\n' \ + '\n'\ + ' This program can consume lots of memory, due to the order that data is\n' \ + ' saved to the xpm file, especially for long trajectories. If the program\n'\ + ' is performing very slowly (running out of memory), you can get around the\n'\ + ' problem by running g_hbond multiple times using the \"-b\" and \"-e\" flags,\n'+ ' in order to generate multiple smaller .xpm files. You can run ' \ +program_name+'\n'\ + ' multiple times and concatenate the results together.\n\n' # Read_hbond2atompairs(file) # Read in the "hbond.ndx" file generated by "g_hbond -hbn hbond.ndx": # Convert this information into a dictionary of tuples. # (A simple list of tuples would also work, and be simpler. Oh well.) # Later, when we read in the body of the .xpm file, this lookup table # will tell how the position in the .xpm file tells us # which pair of (heavy) atoms are connected by the hydrogen bond? def Read_hbond2atompairs(file): missing = -1 lines = file.readlines() hb2ap = [] # <-which (DA) pairs of atoms correspond to each h-bond? redundant = [] # <-if we have read that pair already indicate that here # (To avoid counting the same donor/acceptor pair twice) pairs = {} # <-used to keep track of which pairs we have read in so far for i in range(len(lines)-1, 0, -1): if (lines[i].find('[') == -1): tokens = lines[i].split() if len(tokens) == 3: [D, H, A] = lines[i].split() hb2ap.append((int(D), int(A))) if pairs.get((int(D),int(A)),missing)==missing: redundant.append(False) pairs[(int(D),int(A))] = True else: redundant.append(True) elif len(tokens) > 0: print '\n ---- Format error: ----\n' \ 'Where: the .ndx file that should have been created by \"g_hbond -hbn\"\n' \ 'Explanation: Most likely, you have specified your files in the wrong order.\n\n' \ ' ---- error details: ---- \n' \ 'You have the wrong number of entries on line ' \ +str(i+1)+'\n'+'of the .ndx file.\n' 'This file is supposed to contain h-bond triplets (DHA) in the last group.\n' \ 'Consequently, the last group should contain 3 atoms per line.' sys.exit(-2) else: break # We started this loop at the end of the list and read it in in reverse # order. I would think we need to reverse it again: Consequently: #hb2ap.reverse() # <-- oops. NO! #redundant.reverse() # <-- no #COMMENTING PREVIOUS LINES: Don't reverse this list! g_hbond apparently # creates the hbond.ndx list in the OPPOSITE order as the hbmap.xpm # ..So just leave hb2ap the way it is (reversed). return hb2ap,redundant # <-- Return a list of pairs of atoms, hb2ap # The caller is warned if a pair is redundant # Scan through the .xpm file to find the dimensions of the map to find: # The number of save points in the trajectory (= number of columns), and # the number of possible h-bonds (= number of rows). # Note: Table must be pre-allocated. def ReadTableDimensions(file): for line in file: if line.find('*') == -1: dimensions = line.replace('\"', "").split() return int(dimensions[0]), int(dimensions[1]) # TranposeTable() reads in all the lines of a file. After removing the quotes # the remaining data is assumed to be a 2-dimensional array of characters. # We copy this two dimensional array of characters to a table (a 2-D list) # however we transpose it as we copy it. # (Why do we transpose the data? # Because g_hbond saves creates a .xpm file with the data saved # in a way which the array in a strange format where the data from # different times, appears in different COLUMNS (not rows). # This is the opposite order that we need to access the data, # so we transpose it now.) def TransposeTable(file, table): j = 0 for line in file: if (line.find('*') == -1) and (line.find('"') != -1): line_no_quotes = line.replace('\"','') # get rid of " for i in range(0,len(line_no_quotes)): c = line_no_quotes[i] if ((c != '\n') and (c != ',')): #print 'i,j,\'c\' = '+str(i)+','+str(j)+'\''+c+'\'' table[i][j] = c #copy & transpose j += 1 # Read the list of atoms belonging to every group # (By "group", I mean the groups that the user wants us to keep track of when # we count the number of h-bonds between groups. These two lists of groups are # indicated by the 2nd and 3rd command line arguments to the main program. def Read_atoms2groups(file): a2g = {} g_count = bracket_count = 0 for line in file: if (line.find('[') == -1): for a in line.split(): a2g[int(a)] = g_count else: bracket_count += 1 if ((len(a2g) != 0) or (bracket_count > 1)): g_count+=1 return a2g, g_count+1 # return the lookup table & number of groups def PrintBondTable(numbonds): for g1 in range(0,len(numbonds)): for g2 in range(0,len(numbonds[g1])): sys.stdout.write(str(numbonds[g1][g2])) if g2+1 < len(numbonds[g1]): sys.stdout.write(' ') else: sys.stdout.write('\n') sys.stdout.write('\n') # main program: import sys if (len(sys.argv) < 4) or (len(sys.argv) > 5): print '\nError:\n Expected at least 3 arguments.\n\n' + docs sys.exit(-1) hb2ap_filename = sys.argv[1] a2g_filename1 = sys.argv[2] a2g_filename2 = sys.argv[3] g1donors_g2acceptors = False g1acceptors_g2donors = False if len(sys.argv) > 4: if sys.argv[4] == 'DA': g1donors_g2acceptors = True elif sys.argv[4] == 'AD': g1acceptors_g2donors = True hb2ap,redundant = Read_hbond2atompairs(open(hb2ap_filename,'r')) a2g1,numgroups1 = Read_atoms2groups(open(a2g_filename1,'r')) a2g2,numgroups2 = Read_atoms2groups(open(a2g_filename2,'r')) #print hb2ap #print redundant #print a2g1 #print a2g2 # Read in the dimensions of the data in the .xpm file # This includes the number of bonds, and the duration of the simulation. # This should agree with the number of rows and columns in the .xpm file. (sim_duration, numbonds_hb2ap) = ReadTableDimensions(sys.stdin) # Now that we know how big it is, we create a large array # to store the data in the .xpm file lines = [[' ' for j in range(0,numbonds_hb2ap)] for i in range(0,sim_duration)] TransposeTable(sys.stdin, lines) if numbonds_hb2ap != len(hb2ap): print '\nError: the number of hbonds [='+str(numbonds_hb2ap)+'] (Donor-Hydrogen-Acceptor triplets)\n'+\ ' from the file \"'+hb2ap_filename+'\", (created by running \"g_hbond -hbn\")\n'+\ ' does not match the number of rows of data contained in the\n'+\ ' .xpm file [='+str(len(hb2ap))+'] (created by running \"g_hbond -hbm\"). Assuming you\n'+\ ' did not make a mistake entering the filenames, you are probably using\n'+\ ' buggy version of g_hbond. The version of g_hbond which ships with\n'+\ ' gromacs 3.3.1 has this bug. (But it looks like they fixed it in the\n'+\ ' CVS version.) You need to obtain a newer version of g_hbond,\n'+\ ' and/or download and compile the CVS version.\n'+\ ' Then you must use it to regenerate your .xpm and .ndx files.\n' sys.exit(-3) # Our goal: For each moment in time during the simulation, # we want to calculate the following: # numbondsDA[g1][g2] # counts the number of times a donor from group g1 bonds to an acceptor from g2 # numbondsAD[g1][g2] # counts the number of times an acceptor from group g1 bonds to a donor from g2 # numbonds[g1][g2] # counts the total number of hydrogen bonds between groups g1 and g2 # that is: # numbonds[g1][g2] = numbondsDA[g1][g2] + numbondsAD[g1][g2] missing = -1 for line in lines: # preallocate the bond-counter tables to size: numgroups1 x numbroups2: numbonds = [[0 for g2 in range(0,numgroups2)] \ for g1 in range(0,numgroups1)] numbondsDA=[[0 for g2 in range(0,numgroups2)] \ for g1 in range(0,numgroups1)] numbondsAD=[[0 for g2 in range(0,numgroups2)] \ for g1 in range(0,numgroups1)] # Loop over all of the possible bonds at this moment in time for b in range(0,len(hb2ap)): # If the bond exists,update the relevant bond counters: if ((line[b] == 'o') and (not redundant[b])): D,A = hb2ap[b] # lookup the donor and acceptor atoms g1=a2g1.get(D,missing)#ifD belongs to a group in g1.ndx g2=a2g2.get(A,missing)#& A belongs to a group in g2.ndx #print str(D)+','+str(A)+'{g'+str(g1+1)+',g'+str(g2+1) if ((g1!=missing) and (g2!=missing)):# then add 1 to numbondsDA[g1][g2] += 1 # the relevant numbonds[g1][g2] += 1 # bond counters g1=a2g1.get(A,missing)#ifA belongs to a group in g1.ndx g2=a2g2.get(D,missing)#& D belongs to a group in g2.ndx #print ' and {g'+str(g1+1)+',g'+str(g2+1)+'}' if ((g1!=missing) and (g2!=missing)):# then add 1 to numbondsAD[g1][g2] +=1 # the relevant numbonds[g1][g2] +=1 # bond counters if g1donors_g2acceptors: #print ' -----DA:---- ' PrintBondTable(numbondsDA) elif g1acceptors_g2donors: #print ' -AD:- ' PrintBondTable(numbondsAD) else: #print ' -total:- ' PrintBondTable(numbonds)