diff --git a/source/scripts/python/public/molfile_to_params_polymer.py b/source/scripts/python/public/molfile_to_params_polymer.py index 51117ea661..470550cace 100755 --- a/source/scripts/python/public/molfile_to_params_polymer.py +++ b/source/scripts/python/public/molfile_to_params_polymer.py @@ -141,6 +141,11 @@ def main(argv): help = "modifier for the polymer flag, " + "adjusts PDB style naming to be correct for peptoids" ) + parser.add_argument("--keep-names", + default=False, + action="store_true", + help="leaves atom names untouched except for duplications" + ) parser.add_argument('--partial_charges', default=None, help="file that contains the partial charges of each atom in the input file", @@ -211,11 +216,16 @@ def main(argv): assign_rigid_ids(m.atoms) num_frags = fragment_ligand(m) build_fragment_trees(m) - assign_internal_coords(m) if args.polymer: print("Preforming polymer modifications") - polymer_assign_pdb_like_atom_names_to_sidechain( m.atoms, m.bonds, args.peptoid ) - polymer_assign_backbone_atom_names( m.atoms, m.bonds, args.peptoid ) + if args.keep_names: + for a in m.atoms: + a.pdb_name = a.name + else: + polymer_assign_pdb_like_atom_names_to_sidechain( m.atoms, m.bonds, args.peptoid ) + polymer_assign_backbone_atom_names( m.atoms, m.bonds, args.peptoid ) + # Connections get renamed even if we don't rename other atoms + polymer_assign_connection_atom_names( m.atoms, m.bonds, args.peptoid ) # if instructed to write out all pdb files if not args.no_pdb : for molfile in molfiles[1:]: @@ -223,6 +233,7 @@ def main(argv): polymer_reorder_atoms(molfile) polymer_reorder_atoms(m) #uniquify_atom_names(m.atoms) + assign_internal_coords(m) if not args.no_param: for i in range(num_frags): if num_frags == 1: param_file = "%s.params" % args.name @@ -273,7 +284,7 @@ def main(argv): if not args.all_in_one_pdb: pdb_file_name = "%s_%04i.pdb" % (args.pdb, i+1) pdb_file = open(pdb_file_name, 'w') - if not args.clobber and os.path.exists(pdb_file): + if not args.clobber and os.path.exists(pdb_file_name): print( "File %s already exists -- skip!" % pdb_file ) print( "Use --clobber to overwrite existing files." ) break diff --git a/source/scripts/python/public/molfile_to_params_polymer/IO_functions.py b/source/scripts/python/public/molfile_to_params_polymer/IO_functions.py index 238712071e..0f7c5e4027 100644 --- a/source/scripts/python/public/molfile_to_params_polymer/IO_functions.py +++ b/source/scripts/python/public/molfile_to_params_polymer/IO_functions.py @@ -303,6 +303,25 @@ def write_icoords(a): # if close_file: f.close() +def get_bonded_distance_map(atoms, bonds, root_atom): + bonded_distance_map = {} + bonded_distance_map[ root_atom ] = 0 + + def update(ref, new): + '''Helper function to do the update -- will attempt to find the minimum distance''' + if ref in bonded_distance_map: + d = bonded_distance_map[ ref ] + 1 + bonded_distance_map[ new ] = min( d, bonded_distance_map.get(new, 9999999999) ) + + for ii in range(len(atoms)): # At worst, we should add one atom per cycle + for b in bonds: + update( b.a1, b.a2 ) + update( b.a2, b.a1 ) + if len(bonded_distance_map) == len(atoms): + break # Early out if we've assigned everything. + + return bonded_distance_map + def write_poly_param_file(f, molfile, name, frag_id, peptoid, parent): '''Writes a Molfile object to a file. f may be a file name or file handle. @@ -336,6 +355,14 @@ def write_poly_param_file(f, molfile, name, frag_id, peptoid, parent): assert(len(root_atoms) == 1) root_atom = root_atoms[0] + # Reorder atoms based on (bonded) distance to root + bonded_distance_map = get_bonded_distance_map(atoms, bonds, root_atom) + atoms.sort( key=lambda a: bonded_distance_map[a] ) + atoms.sort( key=lambda a: a.poly_backbone, reverse=True ) # Put backbone atoms in front + atoms.sort( key=lambda a: a.is_H ) # Put hydrogens to the back + # Reorder bonds based on the atom ordering + bonds.sort( key=lambda b: max( atoms.index(b.a1), atoms.index(b.a2) ) ) + # write atoms for atom in atoms: if atom.poly_lower != True and atom.poly_upper != True: @@ -357,9 +384,22 @@ def write_poly_param_file(f, molfile, name, frag_id, peptoid, parent): f.write("BOND_TYPE %-4s %-4s %-4s\n" % (bond.a1.pdb_name, bond.a2.pdb_name, bond.order)) #f.write("BOND %-4s %-4s\n" % (bond.a1.pdb_name, bond.a2.pdb_name)) + lower_connects = [] + upper_connects = [] + for bond in bonds: + if bond.a1.poly_lower: lower_connects.append( bond.a2.pdb_name ) + if bond.a2.poly_lower: lower_connects.append( bond.a1.pdb_name ) + if bond.a1.poly_upper: upper_connects.append( bond.a2.pdb_name ) + if bond.a2.poly_upper: upper_connects.append( bond.a1.pdb_name ) + + if len(lower_connects) != 1: + print("WARNING: One and only one non-ignored atoms should be bonded to the POLY_LOWER atom. Found:", lower_connects) + if len(upper_connects) != 1: + print("WARNING: One and only one non-ignored atoms should be bonded to the POLY_UPPER atom. Found:", upper_connects) # write upper and lower connect - f.write("LOWER_CONNECT N\nUPPER_CONNECT C\n") + f.write("LOWER_CONNECT "+lower_connects[0].strip()+"\n") + f.write("UPPER_CONNECT "+upper_connects[0].strip()+"\n") # Define chi angles # Iterating over the bonds is non-trivial and we need multiple passes, so we define a generator: @@ -394,6 +434,9 @@ def rot_bond_iter(bonds): # So, use first child of c that's not b: # ... again, unless C is root and the O of an ether bond (no other kids) d = [k for k in c.children if k != b][0] + # If we connect backbone to backbone across the bond, then we're not a proper "rotatable" bond + if (d.poly_backbone == True or c.poly_backbone == True) and (b.poly_backbone == True or a.poly_backbone == True): + continue yield (bond, a, b, c, d) def is_sp2_proton(a, b, c, d): '''Crude guestimate of H-bonding geometry''' @@ -406,10 +449,12 @@ def is_sp2_proton(a, b, c, d): # hacky check to see if chis in correct order else revese the order if not peptoid: - for bond, a, b, c, d in rot_bond_iter(sorted_bonds): + rotatable_bonds = list( rot_bond_iter(sorted_bonds) ) + rotatable_bonds.reverse() + for bond, a, b, c, d in rotatable_bonds: if a.poly_upper == False and b.poly_upper == False and c.poly_upper == False and d.poly_upper == False and a.poly_lower == False and b.poly_lower == False and c.poly_lower == False and d.poly_lower == False: #print( d.poly_n_bb, d.pdb_name, c.pdb_name, b.pdb_name, a.pdb_name ) - if not d.poly_n_bb: + if d.poly_n_bb: sorted_bonds.reverse() print( "REVERSED!!!" ) break @@ -421,9 +466,8 @@ def is_sp2_proton(a, b, c, d): if a.poly_upper == False and b.poly_upper == False and c.poly_upper == False and d.poly_upper == False and \ a.poly_lower == False and b.poly_lower == False and c.poly_lower == False and d.poly_lower == False: - # Do not include backbone chi (d and c cant be backbone atoms - if d.poly_backbone == True and c.poly_backbone == True and \ - b.poly_backbone == True and a.poly_backbone == True: + # Do not include backbone chi (where "both sides" of the bond are backbone) + if (d.poly_backbone == True or c.poly_backbone == True) and (b.poly_backbone == True or a.poly_backbone == True): continue # in peptoid case, there should C backbone should not be in any CHI angles if peptoid: @@ -442,8 +486,16 @@ def is_sp2_proton(a, b, c, d): else: f.write("PROTON_CHI %i SAMPLES 3 60 -60 180 EXTRA %s\n" % (num_chis, extra)) + # determine first side chain atom order of atoms should be n ca c o upper lower [side chain heavys] [hydrogens] + non_bb_heavy_atoms = [a for a in atoms if a.poly_backbone == False and a.poly_lower == False and a.poly_upper == False and a.poly_ignore == False and a.elem != 'H'] + if len(non_bb_heavy_atoms) > 0: + first_sidechain_atom = non_bb_heavy_atoms[0] + else: + first_sidechain_atom = None + # choose neighbor atom first CB else CA nbr_list = [i for i,a in enumerate(molfile.atoms) if a.pdb_greek_dist == 'B'] + nbr_atom = None if len(nbr_list) >= 1: nbr_atom_index = nbr_list[0] nbr_atom = atoms[nbr_atom_index] @@ -453,8 +505,20 @@ def is_sp2_proton(a, b, c, d): nbr_atom_index = i nbr_atom = a break + if nbr_atom is None and first_sidechain_atom is not None: + # We kinda assume that this is attached to the backbone -- perhaps we should be more explicit about it + nbr_atom = first_sidechain_atom + nbr_atom_index = [ i for i, a in enumerate(atoms) if a.pdb_name == nbr_atom.name ][ 0 ] # Should just be the one + if nbr_atom is None: + print("WARNING: Residue does not have a sidechain -- picking arbitrary neighbor atom from backbone") + # Attempt to pick an atom in the "middle" of the backbone + backbone_atoms = [ a for a in atoms if a.poly_backbone and not a.is_H and not a.poly_upper and not a.poly_lower ] + nbr_atom = backbone_atoms[ len(backbone_atoms) // 2 ] + nbr_atom_index = [ i for i, a in enumerate(atoms) if a.pdb_name == nbr_atom.name ][ 0 ] # Should just be the one + # There's probably a better way of dealing with this edge case + f.write("NBR_ATOM %s\n" % nbr_atom.pdb_name) - + #Using atoms here instead of molfile.atoms to ensure poly_ignore stays ignored na = len(atoms) # Number of Atoms all_all_dist = [ [1e100] * na for i in range(na) ] @@ -465,7 +529,7 @@ def is_sp2_proton(a, b, c, d): all_all_dist[i] = dijkstra( start = atoms[i], nodes = atoms, nbr = lambda a: nbrs[a], dist = r3.distance ) nbr_dist = max(all_all_dist[nbr_atom_index]) f.write("NBR_RADIUS %f\n" % nbr_dist) - + #Write formal charge formal_charge = int(round(sum(a.partial_charge for a in atoms if not (a.poly_upper or a.poly_lower)))) if formal_charge != 0: @@ -474,10 +538,8 @@ def is_sp2_proton(a, b, c, d): else: f.write("NET_FORMAL_CHARGE +%d\n"%formal_charge) - # determine first side chain atom order of atoms should be n ca c o upper lower [side chain heavys] [hydrogens] - non_bb_heavy_atoms = [a for a in atoms if a.poly_backbone == False and a.poly_lower == False and a.poly_upper == False and a.poly_ignore == False and a.elem != 'H'] - if len(non_bb_heavy_atoms) > 0: - f.write("FIRST_SIDECHAIN_ATOM %s\n" % non_bb_heavy_atoms[0].pdb_name) + if first_sidechain_atom is not None: + f.write("FIRST_SIDECHAIN_ATOM %s\n" % first_sidechain_atom.pdb_name) else: f.write("FIRST_SIDECHAIN_ATOM NONE\n") @@ -500,65 +562,17 @@ def is_sp2_proton(a, b, c, d): for p in properties_list: f.write(" %s" % p) f.write("\n") - # hacky hardcoding of stubs and icoords for backbone atoms and upper lower - nbb = cabb = cbb = obb = upper = lower = 0 - for a in atoms: - if a.poly_n_bb == True: nbb = a - if a.poly_ca_bb == True: cabb = a - if a.poly_c_bb == True: cbb = a - if a.poly_o_bb == True: obb = a - if a.poly_upper == True: upper = a - if a.poly_lower == True: lower = a - if a.bonds[0].a2.poly_n_bb == True: hbb = a - #print(nbb) - - # For writing the pdb names of atoms used to compute internal coords - nbb.input_stub1, nbb.input_stub2, nbb.input_stub3 = nbb, cabb, cbb - cabb.input_stub1, cabb.input_stub2, cabb.input_stub3 = nbb, cabb, cbb - cbb.input_stub1, cbb.input_stub2, cbb.input_stub3 = cabb, nbb, cbb - obb.input_stub1, obb.input_stub2, obb.input_stub3 = cbb, cabb, upper - upper.input_stub1, upper.input_stub2, upper.input_stub3 = cbb, cabb, nbb - lower.input_stub1, lower.input_stub2, lower.input_stub3 = nbb, cabb, cbb - if "hbb" in locals(): #If backbone conjugation occurs, hbb does not exist - hbb.input_stub1, hbb.input_stub2, hbb.input_stub3 = nbb, cabb, lower - - # Calculate ideal values for icoords - # NOTE - these values are conformation-dependent, which means that unless the NCAAs are prepared identically - # between param generation steps, there may be differences in ideal values attributable to preparation; - # e.g., NCAAs may have different UPPER phi dihedral icoor values than the standard CAAs depending on how the - # dipeptide is prepared - nbb.d, nbb.theta, nbb.phi = calc_internal_coords(nbb, nbb.input_stub1, nbb.input_stub2, nbb.input_stub3) - cabb.d, cabb.theta, cabb.phi = calc_internal_coords(cabb, cabb.input_stub1, cabb.input_stub2, cabb.input_stub3) - cbb.d, cbb.theta, cbb.phi = calc_internal_coords(cbb, cbb.input_stub1, cbb.input_stub2, cbb.input_stub3) - obb.d, obb.theta, obb.phi = calc_internal_coords(obb, obb.input_stub1, obb.input_stub2, obb.input_stub3) - upper.d, upper.theta, upper.phi = calc_internal_coords(upper, upper.input_stub1, upper.input_stub2, upper.input_stub3) - lower.d, lower.theta, lower.phi = calc_internal_coords(lower, lower.input_stub1, lower.input_stub2, lower.input_stub3) - if "hbb" in locals(): #If backbone conjugation occurs, hbb does not exist - hbb.d, hbb.theta, hbb.phi = calc_internal_coords(hbb, hbb.input_stub1, hbb.input_stub2, hbb.input_stub3) + # Reporting + print("WRITING ICOOR_INTERNAL: ") def write_icoords(a): - if not a.poly_n_bb and not a.poly_ca_bb and not a.poly_c_bb and not a.poly_upper and not a.poly_o_bb: - #print("ICOOR_INTERNAL %-4s %11.6f %11.6f %11.6f %-4s %-4s %-4s" % (a.pdb_name, a.phi, a.theta, a.d, a.input_stub1.pdb_name, a.input_stub2.pdb_name, a.input_stub3.pdb_name)) - f.write("ICOOR_INTERNAL %-4s %11.6f %11.6f %11.6f %-4s %-4s %-4s\n" % (a.pdb_name, a.phi, a.theta, a.d, a.input_stub1.pdb_name, a.input_stub2.pdb_name, a.input_stub3.pdb_name)); + #print("ICOOR_INTERNAL %-4s %11.6f %11.6f %11.6f %-4s %-4s %-4s" % (a.pdb_name, a.phi, a.theta, a.d, a.input_stub1.pdb_name, a.input_stub2.pdb_name, a.input_stub3.pdb_name)) + f.write("ICOOR_INTERNAL %-4s %11.6f %11.6f %11.6f %-4s %-4s %-4s\n" % (a.pdb_name, a.phi, a.theta, a.d, a.input_stub1.pdb_name, a.input_stub2.pdb_name, a.input_stub3.pdb_name)); for child in a.children: if not child.poly_ignore: write_icoords(child) - # Reporting - print("WRITING ICOOR_INTERNAL: ") - #print("ICOOR_INTERNAL %-4s %11.6f %11.6f %11.6f %-4s %-4s %-4s" % (nbb.pdb_name, nbb.phi, nbb.theta, nbb.d, nbb.input_stub1.pdb_name, nbb.input_stub2.pdb_name, nbb.input_stub3.pdb_name)) - #print("ICOOR_INTERNAL %-4s %11.6f %11.6f %11.6f %-4s %-4s %-4s" % (cabb.pdb_name, cabb.phi, cabb.theta, cabb.d, cabb.input_stub1.pdb_name, cabb.input_stub2.pdb_name, cabb.input_stub3.pdb_name)) - #print("ICOOR_INTERNAL %-4s %11.6f %11.6f %11.6f %-4s %-4s %-4s" % (cbb.pdb_name, cbb.phi, cbb.theta, cbb.d, cbb.input_stub1.pdb_name, cbb.input_stub2.pdb_name, cbb.input_stub3.pdb_name)) - #print("ICOOR_INTERNAL %-4s %11.6f %11.6f %11.6f %-4s %-4s %-4s" % (upper.pdb_name, upper.phi, upper.theta, upper.d, upper.input_stub1.pdb_name, upper.input_stub2.pdb_name, upper.input_stub3.pdb_name)) - #print("ICOOR_INTERNAL %-4s %11.6f %11.6f %11.6f %-4s %-4s %-4s" % (obb.pdb_name, obb.phi, obb.theta, obb.d, obb.input_stub1.pdb_name, obb.input_stub2.pdb_name, obb.input_stub3.pdb_name)) - - # Writing - f.write("ICOOR_INTERNAL %-4s %11.6f %11.6f %11.6f %-4s %-4s %-4s\n" % (nbb.pdb_name, nbb.phi, nbb.theta, nbb.d, nbb.input_stub1.pdb_name, nbb.input_stub2.pdb_name, nbb.input_stub3.pdb_name)); - f.write("ICOOR_INTERNAL %-4s %11.6f %11.6f %11.6f %-4s %-4s %-4s\n" % (cabb.pdb_name, cabb.phi, cabb.theta, cabb.d, cabb.input_stub1.pdb_name, cabb.input_stub2.pdb_name, cabb.input_stub3.pdb_name)); - f.write("ICOOR_INTERNAL %-4s %11.6f %11.6f %11.6f %-4s %-4s %-4s\n" % (cbb.pdb_name, cbb.phi, cbb.theta, cbb.d, cbb.input_stub1.pdb_name, cbb.input_stub2.pdb_name, cbb.input_stub3.pdb_name)); - f.write("ICOOR_INTERNAL %-4s %11.6f %11.6f %11.6f %-4s %-4s %-4s\n" % (upper.pdb_name, upper.phi, upper.theta, upper.d, upper.input_stub1.pdb_name, upper.input_stub2.pdb_name, upper.input_stub3.pdb_name)); - f.write("ICOOR_INTERNAL %-4s %11.6f %11.6f %11.6f %-4s %-4s %-4s\n" % (obb.pdb_name, obb.phi, obb.theta, obb.d, obb.input_stub1.pdb_name, obb.input_stub2.pdb_name, obb.input_stub3.pdb_name)); - write_icoords(nbb) + write_icoords( root_atom ) # End if close_file: f.close() diff --git a/source/scripts/python/public/molfile_to_params_polymer/atom_functions.py b/source/scripts/python/public/molfile_to_params_polymer/atom_functions.py index 388bce1677..9c3bc838f9 100644 --- a/source/scripts/python/public/molfile_to_params_polymer/atom_functions.py +++ b/source/scripts/python/public/molfile_to_params_polymer/atom_functions.py @@ -131,7 +131,7 @@ def is_saturated(atom): num_aro_C = count_bonded(a, lambda x: (x.elem == "C" and is_aromatic(x)) or x.ros_type == "aroC") num_NO = count_bonded(a, lambda x: x.elem == "N" or x.elem == "O") num_S = count_bonded(a, lambda x: x.elem == "S") - + if any(bond.a2.poly_n_bb for bond in a.bonds): a.ros_type = "HNbb" elif num_S >= 1: a.ros_type = "HS " elif num_NO >= 1: a.ros_type = "Hpol" @@ -155,10 +155,10 @@ def is_saturated(atom): if bond.a2.elem != "O": num_aro_nonO += 1 if bond.a2.elem == "N": num_aro_N += 1 # really if, not elif #print( i+1, a.name, num_aro_nonO, num_dbl_nonO, num_aro_N ) - if num_aro_nonO >= 2: + if num_aro_nonO >= 2: if num_H > 0: a.ros_type = "aroC" else: a.ros_type = "CH0 " - elif num_dbl_nonO >= 1: + elif num_dbl_nonO >= 1: if num_H > 0: a.ros_type = "aroC" else: a.ros_type = "CH0 " elif num_aro_N >= 1: a.ros_type = "CNH2" @@ -205,7 +205,7 @@ def is_saturated(atom): # This is a non-standard rule introduced by IWD, agreed to by KWK: elif bonded_to_C_to_N: a.ros_type = "ONH2" else: a.ros_type = "OOC " - elif "S" == a.elem: + elif "S" == a.elem: num_H = count_bonded(a, lambda x: x.is_H) if num_H == 1: a.ros_type = "SH1 " else: a.ros_type = "S " @@ -633,7 +633,7 @@ def is_charmm_SS(atom): assert(len(a.bonds) == 1) # hydrogen should only be attached to one atom at = a.bonds[0].a2 #HC comes first because of delocalized/aromatic NH2 - if is_charmm_HC(a, at): a.mm_type = "HC" + if is_charmm_HC(a, at): a.mm_type = "HC" elif is_charmm_H(a, at): a.mm_type = "H" elif is_charmm_HS(a, at): a.mm_type = "HS" elif is_charmm_HB(a, at, peptoid): a.mm_type = "HB" @@ -727,12 +727,10 @@ def assign_partial_charges(atoms, partial_charges, net_charge=0.0): '''Assigns Rosetta standard partial charges, then corrects them so they sum to the desired net charge. Correction is distributed equally among all atoms. - - If non-zero partial charges were already assigned, no change is made. ''' # If the partial charges file is not provided if partial_charges == None: - + std_charges = { # from Rosetta++ aaproperties_pack.cc "CNH2" : 0.550, "COO " : 0.620, @@ -782,7 +780,7 @@ def assign_partial_charges(atoms, partial_charges, net_charge=0.0): for a in atoms: a.partial_charge = std_charges[ a.ros_type ] null_charge = [a for a in atoms if a.partial_charge is None] - if 0 < len(null_charge): + if 0 < len(null_charge): if len(null_charge) < len(atoms): raise ValueError("Only some partial charges were assigned -- must be all or none.") else: diff --git a/source/scripts/python/public/molfile_to_params_polymer/fragment_functions.py b/source/scripts/python/public/molfile_to_params_polymer/fragment_functions.py index 46b88dccf3..7470e4ed5a 100644 --- a/source/scripts/python/public/molfile_to_params_polymer/fragment_functions.py +++ b/source/scripts/python/public/molfile_to_params_polymer/fragment_functions.py @@ -26,6 +26,17 @@ def all(itr): if not el: return False return True +def atom_num( desig, m ): + try: + return int(desig) - 1 + except ValueError: + # Atom name? + for ii, a in enumerate(m.atoms): + if desig == a.name: + return ii + + raise ValueError("Cannot find atom designation {}, should be integer or one of {}.".format( desig, [a.name for a in m.atoms] ) ) + def assign_rigid_ids(atoms): '''Groups atoms that are connected in rigid units, i.e. no rotatable bonds.''' # Iterate through atoms, assigning them to rigids via depth first search @@ -55,8 +66,8 @@ def fragment_ligand(molfile): for line in molfile.footer: if not line.startswith("M SPLT"): continue fields = line.split() - atom1 = molfile.atoms[int(fields[2]) - 1] - atom2 = molfile.atoms[int(fields[3]) - 1] + atom1 = molfile.atoms[ atom_num(fields[2], molfile) ] + atom2 = molfile.atoms[ atom_num(fields[3], molfile) ] bond_to_remove = None for b in remaining_bonds: if((b.a1 == atom1 and b.a2 == atom2) @@ -146,9 +157,9 @@ def build_fragment_trees(molfile): # Assign root atoms based on instructions in the molfile for line in molfile.footer: # Standard MDL style is with a space, but KWK has used "MROOT" in the past. Babel uses 2 spaces. - if line.startswith("M ROOT"): molfile.atoms[ int(line.split()[2]) - 1 ].is_root = True - elif line.startswith("M ROOT"): molfile.atoms[ int(line.split()[2]) - 1 ].is_root = True - elif line.startswith("MROOT"): molfile.atoms[ int(line.split()[1]) - 1 ].is_root = True + if line.startswith("M ROOT"): molfile.atoms[ atom_num(line.split()[2], molfile) ].is_root = True + elif line.startswith("M ROOT"): molfile.atoms[ atom_num(line.split()[2], molfile) ].is_root = True + elif line.startswith("MROOT"): molfile.atoms[ atom_num(line.split()[1], molfile) ].is_root = True for frag_id in set([a.fragment_id for a in molfile.atoms]): # If we want to have a default way of choosing the root atom, this is the place: root_atoms = [a for a in molfile.atoms if a.fragment_id == frag_id and a.is_root] @@ -173,10 +184,16 @@ def get_atom_num(elem): return 99 # Want to visit non-H children first tmp_children = [b.a2 for b in parent.bonds] - #Sort first by identity of this atom, then identity of bonded atoms, then number of non-H bonds - tmp_children.sort(key= lambda a: get_atom_num(a.elem) + - 0.01*max([get_atom_num(b.a2.elem) for b in a.bonds]) + - 0.001*len([b for b in a.bonds if not b.a2.is_H]), reverse=True) + def sort_key(a): + # Priority to higher valued items (reverse sort) + return (not a.poly_ignore, # Priority to non-ignore + not ( a.poly_upper or a.poly_lower ), # Priority to non-connect + a.poly_backbone, # Priority to backbone + get_atom_num(a.elem), # Priority to higher atom number + max([get_atom_num(b.a2.elem) for b in a.bonds]), # Priority to bonded to heavier MW + len([b for b in a.bonds if not b.a2.is_H]), ) # Number of non-hydrogen bonds + tmp_children.sort(key=sort_key, reverse=True) + for child in tmp_children: if child.fragment_id != parent.fragment_id: continue if child.parent is not None or child.is_root: continue @@ -201,6 +218,36 @@ def get_atom_num(elem): # parent.children.sort(lambda a,b: cmp(a.is_H, b.is_H)) # Every atom should have a parent OR be a root assert(len([a for a in molfile.atoms if not a.is_root and a.parent is None]) == 0) + +def handle_polymer_special_cases(child, molfile): + # There should be a better way of doing this + nbb = cabb = cbb = obb = upper = lower = None + for a in molfile.atoms: + if a.poly_n_bb == True: nbb = a + if a.poly_ca_bb == True: cabb = a + if a.poly_c_bb == True: cbb = a + if a.poly_o_bb == True: obb = a + if a.poly_upper == True: upper = a + if a.poly_lower == True: lower = a + + # For alpha amino acids + if child.poly_n_bb: + child.input_stub1, child.input_stub2, child.input_stub3 = nbb, cabb, cbb + elif child.poly_ca_bb: + child.input_stub1, child.input_stub2, child.input_stub3 = nbb, cabb, cbb + elif child.poly_c_bb: + child.input_stub1, child.input_stub2, child.input_stub3 = cabb, nbb, cbb + elif child.poly_o_bb: + child.input_stub1, child.input_stub2, child.input_stub3 = cbb, cabb, upper + elif child.poly_upper and None not in (cbb, cabb, nbb): + child.input_stub1, child.input_stub2, child.input_stub3 = cbb, cabb, nbb + elif child.poly_lower and None not in (nbb, cabb, cbb): + child.input_stub1, child.input_stub2, child.input_stub3 = nbb, cabb, cbb + elif child.bonds[0].a2.poly_n_bb: # hbb + child.input_stub1, child.input_stub2, child.input_stub3 = nbb, cabb, lower + else: + return + def assign_internal_coords(molfile): '''Sets up stubs/input_stubs and d,theta,phi for all atoms.''' for frag_id in set([a.fragment_id for a in molfile.atoms]): @@ -230,7 +277,9 @@ def assign_stubs(me): prev_sibling = me.stub3 parent = me # rename to make logic clearer # Have to store input_stub atoms b/c they're written to params file - for child in parent.children: + + # Descend into backbone atoms first, then sidechain + for child in sorted( parent.children, key=lambda a: a.poly_backbone, reverse=True ): child.input_stub1 = parent.stub1 child.input_stub2 = parent.stub2 child.input_stub3 = prev_sibling # for first child, this is parent.stub3 @@ -238,6 +287,9 @@ def assign_stubs(me): if parent.is_root and prev_sibling == parent.stub2: #print( "activate second child case! stub3 =", parent.stub3.name ) child.input_stub3 = parent.stub3 + + handle_polymer_special_cases(child, molfile) + #print( "input_stubs", [x.name for x in (child, child.parent, child.input_stub1, child.input_stub2, child.input_stub3)] ) # Now actually calculate spherical internal coordinates child.d, child.theta, child.phi = calc_internal_coords(child, child.input_stub1, child.input_stub2, child.input_stub3) @@ -246,11 +298,14 @@ def assign_stubs(me): # Child is now previous sibling for next child in this loop prev_sibling = child # end assign_stubs() + assign_stubs(root_atom) + # Root has to have dummy input stub atoms to fill space in params file root_atom.input_stub1 = root_atom.stub1 root_atom.input_stub2 = root_atom.stub2 root_atom.input_stub3 = root_atom.stub3 + handle_polymer_special_cases(root_atom, molfile) # In case root is a special case (which it probably is) # Root has dummy values for d, theta, phi root_atom.d = 0.0 root_atom.theta = 0.0 diff --git a/source/scripts/python/public/molfile_to_params_polymer/polymer_functions.py b/source/scripts/python/public/molfile_to_params_polymer/polymer_functions.py index 5a16a0b15b..9d6c6ef13c 100644 --- a/source/scripts/python/public/molfile_to_params_polymer/polymer_functions.py +++ b/source/scripts/python/public/molfile_to_params_polymer/polymer_functions.py @@ -31,23 +31,32 @@ def polymer_assign_backbone_atom_types(m): # first get POLY flags from molfile for line in m.footer: if line.startswith("M POLY_N_BB"): - m.atoms[ int(line.split()[2]) - 1 ].poly_backbone = True - m.atoms[ int(line.split()[2]) - 1 ].poly_n_bb = True + num = atom_num( line.split()[2], m ) + m.atoms[ num ].poly_backbone = True + m.atoms[ num ].poly_n_bb = True if line.startswith("M POLY_CA_BB"): - m.atoms[ int(line.split()[2]) - 1 ].poly_backbone = True - m.atoms[ int(line.split()[2]) - 1 ].poly_ca_bb = True + num = atom_num( line.split()[2], m ) + m.atoms[ num ].poly_backbone = True + m.atoms[ num ].poly_ca_bb = True if line.startswith("M POLY_O_BB"): - m.atoms[ int(line.split()[2]) - 1 ].poly_backbone = True - m.atoms[ int(line.split()[2]) - 1 ].poly_o_bb = True + num = atom_num( line.split()[2], m ) + m.atoms[ num ].poly_backbone = True + m.atoms[ num ].poly_o_bb = True if line.startswith("M POLY_C_BB"): - m.atoms[ int(line.split()[2]) - 1 ].poly_backbone = True - m.atoms[ int(line.split()[2]) - 1 ].poly_c_bb = True + num = atom_num( line.split()[2], m ) + m.atoms[ num ].poly_backbone = True + m.atoms[ num ].poly_c_bb = True if line.startswith("M POLY_UPPER"): - m.atoms[ int(line.split()[2]) - 1 ].poly_backbone = True - m.atoms[ int(line.split()[2]) - 1 ].poly_upper = True + num = atom_num( line.split()[2], m ) + m.atoms[ num ].poly_backbone = True + m.atoms[ num ].poly_upper = True if line.startswith("M POLY_LOWER"): - m.atoms[ int(line.split()[2]) - 1 ].poly_backbone = True - m.atoms[ int(line.split()[2]) - 1 ].poly_lower = True + num = atom_num( line.split()[2], m ) + m.atoms[ num ].poly_backbone = True + m.atoms[ num ].poly_lower = True + if line.startswith("M POLY_BACKBONE"): + for num in [ atom_num(s, m) for s in line.split()[2:] ]: + m.atoms[ num ].poly_backbone = True def polymer_assign_backbone_atom_names(atoms, bonds, peptoid): ''' modifies the rosetta atom type of the hydorgen attached to the alpha carbon and backbone nitrogen to be the correct @@ -69,12 +78,7 @@ def polymer_assign_backbone_atom_names(atoms, bonds, peptoid): elif atom.poly_c_bb: atom.ros_type = "CObb" atom.pdb_name = " C " - elif atom.poly_upper: - atom.ros_type = "X" - atom.pdb_name = "UPPER" - elif atom.poly_lower: - atom.ros_type = "X" - atom.pdb_name = "LOWER" + # Upper & Lower handled in a different function. # alpha carbon hydrogen(s) ca_h_bonds = [] index = 0; @@ -109,13 +113,23 @@ def polymer_assign_backbone_atom_names(atoms, bonds, peptoid): bond.a1.ros_type = "HNbb" bond.a1.pdb_name = " H " +def polymer_assign_connection_atom_names(atoms, bonds, peptoid): + ''' modifies the names of the upper and lower connections ''' + for atom in atoms: + if atom.poly_upper: + atom.ros_type = "X" + atom.pdb_name = "UPPER" + elif atom.poly_lower: + atom.ros_type = "X" + atom.pdb_name = "LOWER" + + def polymer_assign_ignored_atoms_bonds(m): ''' sets the ignore boolean for each atom in the list and for each bond with at least one atom in the list''' ignore_list = [] for line in m.footer: if line.startswith("M POLY_IGNORE"): - ignore_list = line.split()[2:] - ignore_list = [int(i)-1 for i in ignore_list] + ignore_list = [ atom_num(d, m) for d in line.split()[2:] ] #atoms for i,a in enumerate(m.atoms): if i in ignore_list: @@ -218,7 +232,7 @@ def compare_atom_num(x,y): for t in temp: while str(k) in used_index: k += 1 - if atoms[t].pdb_postfix_num == " ": + if atoms[t].pdb_postfix_num == " ": atoms[t].pdb_postfix_num = "%d" % k used_index.append(str(k)) diff --git a/source/scripts/python/public/rosetta_py/io/mdl_molfile.py b/source/scripts/python/public/rosetta_py/io/mdl_molfile.py index b9fd8bb4eb..29a6ac57e2 100644 --- a/source/scripts/python/public/rosetta_py/io/mdl_molfile.py +++ b/source/scripts/python/public/rosetta_py/io/mdl_molfile.py @@ -429,6 +429,7 @@ def read_mol2_lines(f): elif line.startswith("@"): mode = line molfile.footer.append(line+"\n") + molfile.footer.append(line[1:]+"\n") elif mode == "@MOLECULE": linecnt += 1 if linecnt == 1: molfile.title = line.strip() diff --git a/tests/integration/tests/molfile_to_params_polymer/CRO.mol2 b/tests/integration/tests/molfile_to_params_polymer/CRO.mol2 new file mode 100644 index 0000000000..9911a6b27b --- /dev/null +++ b/tests/integration/tests/molfile_to_params_polymer/CRO.mol2 @@ -0,0 +1,92 @@ +@MOLECULE +CRO_ideal.pdb +39 40 1 0 0 +SMALL +USER_CHARGES + + +@ATOM + 1 N1 -0.8870 -2.4570 -1.6710 N.3 1 <1> -0.3156 + 2 H1 -1.1158 -2.8637 -2.5667 H 1 <1> 0.0000 + 3 CA1 -1.7960 -1.6130 -0.8830 C.3 1 <1> 0.1357 + 4 CB1 -2.0510 -2.2680 0.4760 C.3 1 <1> 0.0788 + 5 CG1 -2.5690 -3.6930 0.2670 C.3 1 <1> -0.0369 + 6 OG1 -0.8320 -2.3090 1.2210 O.3 1 <1> -0.3899 + 7 C1 -1.1710 -0.2570 -0.6780 C.2 1 <1> 0.3169 + 8 N2 0.1000 -0.0540 -0.5230 N.2 1 <1> -0.0365 + 9 N3 -1.8710 0.9160 -0.6370 N.am 1 <1> -0.1413 + 10 C2 -0.9960 1.9270 -0.4390 C.2 1 <1> 0.3789 + 11 O2 -1.2480 3.1150 -0.3470 O.2 1 <1> -0.2374 + 12 CA2 0.3270 1.2850 -0.3620 C.2 1 <1> 0.2625 + 13 CA3 -3.3220 1.0540 -0.7830 C.3 1 <1> 0.1851 + 14 C3 -3.9740 0.9600 0.5720 C.2 1 <1> 0.3456 + 15 O3 -3.2970 0.7940 1.5590 O.2 1 <1> -0.2467 + 16 CB2 1.5430 1.9040 -0.1690 C.2 1 <1> 0.0039 + 17 CG2 2.7570 1.1010 -0.0080 C.ar 1 <1> -0.0205 + 18 CD1 2.7090 -0.2900 -0.1800 C.ar 1 <1> -0.0510 + 19 CD2 3.9730 1.7210 0.3130 C.ar 1 <1> -0.0510 + 20 CE1 3.8500 -1.0350 -0.0330 C.ar 1 <1> -0.0196 + 21 CE2 5.1070 0.9640 0.4630 C.ar 1 <1> -0.0196 + 22 CZ 5.0540 -0.4150 0.2850 C.ar 1 <1> 0.1169 + 23 OH 6.1800 -1.1580 0.4290 O.3 1 <1> -0.5068 + 24 OXT -5.3080 1.0600 0.6820 O.3 1 <1> -0.4780 + 25 HA1 -2.7410 -1.5010 -1.4150 H 1 <1> 0.0610 + 26 HB1 -2.7930 -1.6890 1.0260 H 1 <1> 0.0612 + 27 HG11 -1.8270 -4.2720 -0.2830 H 1 <1> 0.0255 + 28 HG12 -2.7500 -4.1590 1.2350 H 1 <1> 0.0255 + 29 HG13 -3.4990 -3.6610 -0.3010 H 1 <1> 0.0255 + 30 HOG1 -0.1250 -2.8100 0.7910 H 1 <1> 0.2099 + 31 HA31 -3.5510 2.0210 -1.2310 H 1 <1> 0.0850 + 32 HA32 -3.7000 0.2580 -1.4250 H 1 <1> 0.0850 + 33 HB2 1.6000 2.9820 -0.1380 H 1 <1> 0.0669 + 34 HD1 1.7750 -0.7720 -0.4270 H 1 <1> 0.0625 + 35 HD2 4.0170 2.7920 0.4420 H 1 <1> 0.0625 + 36 HE1 3.8160 -2.1060 -0.1700 H 1 <1> 0.0654 + 37 HE2 6.0440 1.4400 0.7100 H 1 <1> 0.0654 + 38 HOH 6.6820 -1.2740 -0.3890 H 1 <1> 0.2921 + 39 C 0.3035 -2.6174 -1.0333 C.3 1 <1> 0.0000 +@BOND + 1 1 2 1 + 2 1 3 1 + 3 1 39 1 + 4 3 4 1 + 5 3 7 1 + 6 3 25 1 + 7 4 5 1 + 8 4 6 1 + 9 4 26 1 + 10 5 27 1 + 11 5 28 1 + 12 5 29 1 + 13 6 30 1 + 14 7 8 2 + 15 7 9 1 + 16 8 12 1 + 17 9 10 am + 18 9 13 1 + 19 10 11 2 + 20 10 12 1 + 21 12 16 2 + 22 13 14 1 + 23 13 31 1 + 24 13 32 1 + 25 14 15 2 + 26 14 24 1 + 27 16 17 1 + 28 16 33 1 + 29 17 18 ar + 30 17 19 ar + 31 18 20 ar + 32 18 34 1 + 33 19 21 ar + 34 19 35 1 + 35 20 22 ar + 36 20 36 1 + 37 21 22 ar + 38 21 37 1 + 39 22 23 1 + 40 23 38 1 +@M ROOT N1 +@M POLY_UPPER OXT +@M POLY_LOWER C +@M POLY_BACKBONE N1 CA1 C1 N3 CA3 C3 O3 diff --git a/tests/integration/tests/molfile_to_params_polymer/command b/tests/integration/tests/molfile_to_params_polymer/command index 6a6d6f92f2..e61ff8b478 100644 --- a/tests/integration/tests/molfile_to_params_polymer/command +++ b/tests/integration/tests/molfile_to_params_polymer/command @@ -7,3 +7,8 @@ for f in *.sdf; do test "${PIPESTATUS[0]}" != '0' && exit 1 || true # Check if the first executable in pipe line return error and exit with error code if so done +for f in *.mol2; do + %(python)s %(pyapps)s/public/molfile_to_params_polymer.py --clobber --all-in-one-pdb --keep-names --name $(basename $f .mol2) -i $f --use-pdb-rotamers >> log 2>&1 + + test "${PIPESTATUS[0]}" != '0' && exit 1 || true # Check if the first executable in pipe line return error and exit with error code if so +done