From 55eff50f2f64c3cdb1f382cf172a85e17bcfc4ab Mon Sep 17 00:00:00 2001 From: Rocco Moretti Date: Fri, 3 Oct 2025 12:32:48 -0500 Subject: [PATCH 1/7] Add mmCIF as output option to the restype_converter. --- source/src/apps/public/restype_converter.cc | 9 + source/src/core.2.src.settings | 1 + source/src/core/chemical/mmCIF/mmCIFWriter.cc | 157 ++++++++++++++++++ .../core/chemical/mmCIF/mmCIFWriter.fwd.hh | 28 ++++ source/src/core/chemical/mmCIF/mmCIFWriter.hh | 68 ++++++++ 5 files changed, 263 insertions(+) create mode 100644 source/src/core/chemical/mmCIF/mmCIFWriter.cc create mode 100644 source/src/core/chemical/mmCIF/mmCIFWriter.fwd.hh create mode 100644 source/src/core/chemical/mmCIF/mmCIFWriter.hh diff --git a/source/src/apps/public/restype_converter.cc b/source/src/apps/public/restype_converter.cc index 96c39c8a09..2797f66617 100644 --- a/source/src/apps/public/restype_converter.cc +++ b/source/src/apps/public/restype_converter.cc @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -69,6 +70,8 @@ void register_options() { option.add_relevant( out::pdb ); option.add_relevant( restype_convert::params_out ); option.add_relevant( restype_convert::sdf_out ); + option.add_relevant( out::mmCIF ); + } @@ -254,6 +257,12 @@ output_residue_types( utility::vector1< core::chemical::ResidueTypeCOP > const & files_outputted = true; } + if ( option[ out::mmCIF ]() ) { + mmCIF::mmCIFWriter writer; + writer.write_file( determine_output_name( name, "cif" ), *restype ); + files_outputted = true; + } + } if ( ! files_outputted ) { diff --git a/source/src/core.2.src.settings b/source/src/core.2.src.settings index 9f581de842..c51c54c5f2 100644 --- a/source/src/core.2.src.settings +++ b/source/src/core.2.src.settings @@ -144,6 +144,7 @@ sources = { ], "core/chemical/mmCIF": [ "mmCIFParser", + "mmCIFWriter", ], "core/conformation": [ "AbstractRotamerTrie", diff --git a/source/src/core/chemical/mmCIF/mmCIFWriter.cc b/source/src/core/chemical/mmCIF/mmCIFWriter.cc new file mode 100644 index 0000000000..9e8db5fe46 --- /dev/null +++ b/source/src/core/chemical/mmCIF/mmCIFWriter.cc @@ -0,0 +1,157 @@ +// -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- +// vi: set ts=2 noet: +// +// (c) Copyright Rosetta Commons Member Institutions. +// (c) This file is part of the Rosetta software suite and is made available under license. +// (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. +// (c) For more information, see http://www.rosettacommons.org. Questions about this can be +// (c) addressed to University of Washington CoMotion, email: license@uw.edu. + +/// @file src/core/chemical/sdf/mmCIFWriter.cc +/// @author Rocco Moretti (rmorettiase@gmail.com) + +#include + +#include +#include + +#include + +//Utility functions +#include +#include +#include +#include +#include + +//external CIF includes +#include +#include +#include // for as_number + +namespace core { +namespace chemical { +namespace mmCIF { + + +//Load up the tracer for this class +static basic::Tracer TR( "core.io.mmCIF.mmCIFWriter" ); + +void +mmCIFWriter::write_file(std::string const & file_name, core::chemical::ResidueType const & restype) { + utility::io::ozstream outfile; + outfile.open(file_name.c_str(), std::ios::out); + if ( !outfile ) { + throw CREATE_EXCEPTION(utility::excn::FileNotFound, "Cannot open file"+file_name); + } + write_stream(outfile,restype); + outfile.close(); +} + +void +mmCIFWriter::write_stream(std::ostream & output_stream, core::chemical::ResidueType const & restype) { + gemmi::cif::WriteOptions options; + options.prefer_pairs = true; + options.misuse_hash = true; + + gemmi::cif::write_cif_block_to_stream( output_stream, generate_block(restype), options ); +} + +gemmi::cif::Block +mmCIFWriter::generate_block( core::chemical::ResidueType const & restype ) { + using utility::gemmi_add_table; + using utility::gemmi_add_row; + + gemmi::cif::Block block; + block.name = restype.name(); + + block.set_pair( "_chem_comp.id", restype.name() ); + block.set_pair( "_chem_comp.name", restype.name() ); + block.set_pair( "_chem_comp.one_letter_code", std::string(1, restype.name1() ) ); + block.set_pair( "_chem_comp.three_letter_code", restype.name3() ); + if ( restype.is_polymer() ) { + if ( restype.is_l_aa() ) { + block.set_pair( "_chem_comp.type", "L-PEPTIDE LINKING" ); + } else if ( restype.is_d_aa() ) { + block.set_pair( "_chem_comp.type", "D-PEPTIDE LINKING" ); + } else if ( restype.is_RNA() ) { + block.set_pair( "_chem_comp.type", "RNA LINKING" ); + } else if ( restype.is_DNA() ) { + block.set_pair( "_chem_comp.type", "DNA LINKING" ); + } + } else { + block.set_pair( "_chem_comp.type", "NON-POLYMER" ); + } + + + gemmi::cif::Loop & atom_comp = gemmi_add_table(block, "_chem_comp_atom", { + "comp_id", + "atom_id", + "type_symbol", + "charge", + "model_Cartn_x", + "model_Cartn_y", + "model_Cartn_z", + "pdbx_ordinal", + } ); + + for ( core::Size ii(1); ii <= restype.natoms(); ++ii ) { + std::vector< std::string > vec; + vec.push_back( restype.name() ); + vec.push_back( restype.atom_name(ii) ); + vec.push_back( restype.element_type(ii)->get_chemical_symbol() ); + vec.push_back( std::to_string( restype.formal_charge(ii) ) ); + core::Vector pos = restype.ideal_xyz(ii); + vec.push_back( std::to_string(pos.x()) ); + vec.push_back( std::to_string(pos.y()) ); + vec.push_back( std::to_string(pos.z()) ); + vec.push_back( std::to_string(ii) ); + + gemmi_add_row( atom_comp, vec ); + } + + gemmi::cif::Loop & bond_comp = gemmi_add_table(block, "_chem_comp_bond", { + "comp_id", + "atom_id_1", + "atom_id_2", + "value_order", + "pdbx_aromatic_flag", + "pdbx_ordinal" + } ); + + utility::vector1< std::pair< core::Size, core::Size > > const & all_bonds = restype.bonds(); + + for ( core::Size ii(1); ii <= restype.nbonds(); ++ii ) { + std::vector< std::string > vec; + vec.push_back( restype.name() ); + core::Size atm1 = all_bonds[ii].first; + core::Size atm2 = all_bonds[ii].second; + vec.push_back( restype.atom_name( atm1 ) ); + vec.push_back( restype.atom_name( atm2 ) ); + std::string bond_type = "UNK"; + switch ( restype.bond_type( atm1, atm2 ) ) { + case SingleBond: + bond_type = "SING"; break; + case DoubleBond: + bond_type = "DOUB"; break; + case TripleBond: + bond_type = "TRIP"; break; + case AromaticBond: + bond_type = "AROM"; break; + default: + bond_type = "UNK"; break; + } + vec.push_back( bond_type ); + vec.push_back( (restype.bond_type( atm1, atm2 ) == AromaticBond) ? "Y" : "N" ); + vec.push_back( std::to_string(ii) ); + + gemmi_add_row( bond_comp, vec ); + } + + return block; +} + + +} +} +} diff --git a/source/src/core/chemical/mmCIF/mmCIFWriter.fwd.hh b/source/src/core/chemical/mmCIF/mmCIFWriter.fwd.hh new file mode 100644 index 0000000000..9cdca30653 --- /dev/null +++ b/source/src/core/chemical/mmCIF/mmCIFWriter.fwd.hh @@ -0,0 +1,28 @@ +// -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- +// vi: set ts=2 noet: +// +// (c) Copyright Rosetta Commons Member Institutions. +// (c) This file is part of the Rosetta software suite and is made available under license. +// (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. +// (c) For more information, see http://www.rosettacommons.org. Questions about this can be +// (c) addressed to University of Washington CoMotion, email: license@uw.edu. + +/// @file src/core/chemical/mmCIF/mmCIFWriter.fwd.hh +/// @author Rocco Moretti (rmorettiase@gmail.com) + +#ifndef INCLUDED_core_chemical_mmCIF_mmCIFWriter_fwd_hh +#define INCLUDED_core_chemical_mmCIF_mmCIFWriter_fwd_hh + +#include + + +namespace core { +namespace chemical { +namespace mmCIF { + +} +} +} + +#endif + diff --git a/source/src/core/chemical/mmCIF/mmCIFWriter.hh b/source/src/core/chemical/mmCIF/mmCIFWriter.hh new file mode 100644 index 0000000000..b790a45fe1 --- /dev/null +++ b/source/src/core/chemical/mmCIF/mmCIFWriter.hh @@ -0,0 +1,68 @@ +// -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- +// vi: set ts=2 noet: +// +// (c) Copyright Rosetta Commons Member Institutions. +// (c) This file is part of the Rosetta software suite and is made available under license. +// (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. +// (c) For more information, see http://www.rosettacommons.org. Questions about this can be +// (c) addressed to University of Washington CoMotion, email: license@uw.edu. +////////////////////////////////////////////////////////////////////// +/// +/// @brief +/// Class to convert a ResidueType to mmCIF. +/// +/// @file src/core/chemical/sdf/mmCIFWriter.hh +/// +/// @details +/// +/// @author Rocco Moretti (rmorettiase@gmail.com) +/// +/// +///////////////////////////////////////////////////////////////////////// + +#ifndef INCLUDED_core_chemical_mmCIF_mmCIFWriter_hh +#define INCLUDED_core_chemical_mmCIF_mmCIFWriter_hh + +#include +#include + +#include +#include +#include +#include + +#include + +namespace core { +namespace chemical { +namespace mmCIF { + +class mmCIFWriter : public utility::VirtualBase +{ + +public: + + mmCIFWriter() = default; + ~mmCIFWriter() override = default; + + void + write_file( std::string const & filename, core::chemical::ResidueType const & restype ); + + void + write_stream( std::ostream & output_stream, core::chemical::ResidueType const & restype ); + +protected: + + gemmi::cif::Block + generate_block( core::chemical::ResidueType const & restype ); + +private: + +}; + + + +} +} +} +#endif From 7e1bcb502a70c62d24ef24cf86068dd8d82f97f4 Mon Sep 17 00:00:00 2001 From: Rocco Moretti Date: Fri, 3 Oct 2025 13:26:57 -0500 Subject: [PATCH 2/7] Encode and obey the 3&1 letter codes. --- source/src/core/chemical/mmCIF/mmCIFParser.cc | 28 ++++++++++++++++--- source/src/core/chemical/mmCIF/mmCIFWriter.cc | 23 +++++++++++++++ source/src/core/chemical/sdf/MolFileIOData.cc | 12 ++++++-- source/src/core/chemical/sdf/MolFileIOData.hh | 12 +++++--- 4 files changed, 65 insertions(+), 10 deletions(-) diff --git a/source/src/core/chemical/mmCIF/mmCIFParser.cc b/source/src/core/chemical/mmCIF/mmCIFParser.cc index 6b0fa15e68..4949354d52 100644 --- a/source/src/core/chemical/mmCIF/mmCIFParser.cc +++ b/source/src/core/chemical/mmCIF/mmCIFParser.cc @@ -96,13 +96,26 @@ mmCIFParser::get_molfile_molecule( gemmi::cif::Block & block ) { sdf::MolFileIOMoleculeOP molecule( new sdf::MolFileIOMolecule() ); - molecule->name( block.name ); //only proceed if the tables for bonds and atoms are present if ( !block.has_mmcif_category("_chem_comp_atom") ) { TR.Error << "Cannot parse CIF file. No atom block (chem_comp_atom) found for " << block.name << std::endl; return molecule; } + /////////////////// Standard Residue-level data + molecule->name( block.name ); + + std::string const * name3 = block.find_value("_chem_comp.three_letter_code"); // Non-owning raw pointer, null if not found. + if ( name3 ) { + molecule->name3( as_string(name3) ); + } + std::string const * name1 = block.find_value("_chem_comp.one_letter_code"); // Non-owning raw pointer, null if not found. + if ( name1 && name1 ) { + molecule->name1( as_string(name1) ); + } + + //////////////////// Standard Atom-level data + // There's another possible issue. to pre-pick about. We absolutely NEED N, // because we need to be very specific about adding and deleting atoms. // also... residue types without N are very likely to be a poor representative @@ -126,6 +139,8 @@ mmCIFParser::get_molfile_molecule( gemmi::cif::Block & block ) { int model_Cartn_y = find_gemmi_column(atom_comp,"model_Cartn_y"); int model_Cartn_z = find_gemmi_column(atom_comp,"model_Cartn_z"); int charge = find_gemmi_column(atom_comp,"charge"); + int partial_charge = find_gemmi_column(atom_comp,"partial_charge"); + int atom_name_id = find_gemmi_column(atom_comp,"atom_id"); if ( atom_name_id < 0 ) { @@ -207,9 +222,9 @@ mmCIFParser::get_molfile_molecule( gemmi::cif::Block & block ) { // Get the chem_comp table first, because this will help us // look out for extraneous atoms common in CIF entries -- extra nitrogen H // and OH terminus on C - gemmi::cif::Table chem_comp = block.find( "_chem_comp.", {"type"} ); - if ( chem_comp.size() > 0 ) { - std::string type = as_string(chem_comp[0][0]); + gemmi::cif::Table chem_comp_type = block.find( "_chem_comp.", {"type"} ); + if ( chem_comp_type.size() > 0 ) { + std::string type = as_string(chem_comp_type[0][0]); if ( type == "L-PEPTIDE LINKING" && is_peptide_linking ) { TR.Debug << "Found L-peptide RT" << std::endl;// named " << molecule->name() << std::endl; molecule->add_str_str_data( "Rosetta Properties", "PROTEIN POLYMER L_AA" ); @@ -246,6 +261,8 @@ mmCIFParser::get_molfile_molecule( gemmi::cif::Block & block ) { } } + + // Sometimes OP3/O3P is used for non-term-deletable phosphate oxygens. (THX) //bool interesting_upper_behavior = false; @@ -470,6 +487,9 @@ mmCIFParser::get_molfile_molecule( gemmi::cif::Block & block ) { } else { atom->formal_charge( 0 ); } + if ( partial_charge >= 0 ) { + atom->partial_charge( as_number( atom_comp[ii][partial_charge], 0 ) ); // Default zero if present and null + } molecule->add_atom( atom ); // only increment if we actually get here. diff --git a/source/src/core/chemical/mmCIF/mmCIFWriter.cc b/source/src/core/chemical/mmCIF/mmCIFWriter.cc index 9e8db5fe46..e1a6f55c7c 100644 --- a/source/src/core/chemical/mmCIF/mmCIFWriter.cc +++ b/source/src/core/chemical/mmCIF/mmCIFWriter.cc @@ -65,6 +65,8 @@ mmCIFWriter::generate_block( core::chemical::ResidueType const & restype ) { gemmi::cif::Block block; block.name = restype.name(); + ////////////// Standard Residue-level data + block.set_pair( "_chem_comp.id", restype.name() ); block.set_pair( "_chem_comp.name", restype.name() ); block.set_pair( "_chem_comp.one_letter_code", std::string(1, restype.name1() ) ); @@ -83,6 +85,7 @@ mmCIFWriter::generate_block( core::chemical::ResidueType const & restype ) { block.set_pair( "_chem_comp.type", "NON-POLYMER" ); } + ////////////// Standard Atom-level Data gemmi::cif::Loop & atom_comp = gemmi_add_table(block, "_chem_comp_atom", { "comp_id", @@ -93,6 +96,7 @@ mmCIFWriter::generate_block( core::chemical::ResidueType const & restype ) { "model_Cartn_y", "model_Cartn_z", "pdbx_ordinal", + "partial_charge" } ); for ( core::Size ii(1); ii <= restype.natoms(); ++ii ) { @@ -106,10 +110,29 @@ mmCIFWriter::generate_block( core::chemical::ResidueType const & restype ) { vec.push_back( std::to_string(pos.y()) ); vec.push_back( std::to_string(pos.z()) ); vec.push_back( std::to_string(ii) ); + vec.push_back( std::to_string(restype.atom_charge(ii)) ); gemmi_add_row( atom_comp, vec ); } + //////////////// Rosetta-specific Atom-level Data + // + //// Note, if there's a standard place to put this data, prefer that + //gemmi::cif::Loop & rosetta_atom_comp = gemmi_add_table(block, "_rosetta_chem_comp_atom", { + // "comp_id", + // "atom_id", + // } ); + // + //for ( core::Size ii(1); ii <= restype.natoms(); ++ii ) { + // std::vector< std::string > vec; + // vec.push_back( restype.name() ); + // vec.push_back( restype.atom_name(ii) ); + // + // gemmi_add_row( rosetta_atom_comp, vec ); + //} + // + //////////////// Standard Bond-level Data + gemmi::cif::Loop & bond_comp = gemmi_add_table(block, "_chem_comp_bond", { "comp_id", "atom_id_1", diff --git a/source/src/core/chemical/sdf/MolFileIOData.cc b/source/src/core/chemical/sdf/MolFileIOData.cc index a8b1e4cb88..55d78df318 100644 --- a/source/src/core/chemical/sdf/MolFileIOData.cc +++ b/source/src/core/chemical/sdf/MolFileIOData.cc @@ -156,9 +156,17 @@ MutableResidueTypeOP MolFileIOMolecule::convert_to_ResidueType( restype->name( name_ ); restype->base_name( name_ ); - restype->name3( name_.substr(0,3) ); + if ( name3_.empty() ) { + restype->name3( name_.substr(0,3) ); + } else { + restype->name3( name3_ ); + } restype->interchangeability_group( restype->name3() ); - restype->name1( 'Z' ); + if ( name1_.empty() ) { + restype->name1( 'Z' ); + } else { + restype->name1( name1_[0] ); + } bool uncharged = true; // Have partial charges been set? diff --git a/source/src/core/chemical/sdf/MolFileIOData.hh b/source/src/core/chemical/sdf/MolFileIOData.hh index 91b8abf3d4..69902dc768 100644 --- a/source/src/core/chemical/sdf/MolFileIOData.hh +++ b/source/src/core/chemical/sdf/MolFileIOData.hh @@ -139,11 +139,15 @@ public: MolFileIOMolecule(); ~MolFileIOMolecule() override; - std::string name() const { return name_; } + std::string const & name() const { return name_; } + std::string const & name3() const { return name3_; } + std::string const & name1() const { return name1_; } //core::Size nbr() const { return nbr_; } //core::Real nbr_radius() const { return nbr_radius_; } - void name(std::string name) { name_ = name; } + void name(std::string const & name) { name_ = name; } + void name3(std::string const & name3) { name3_ = name3; } + void name1(std::string const & name1) { name1_ = name1; } //void nbr(core::Size nbr) { nbr_ = nbr; } //void nbr_radius(core::Real nbr_radius) { nbr_radius_ = nbr_radius; } @@ -203,8 +207,8 @@ private: private: std::string name_; - //std::string name3_; - //std::string name1_; + std::string name3_; + std::string name1_; MolFileIOGraph molgraph_; std::map< AtomIndex, mioAD > index_atom_map_; StrStrMap molecule_string_data_; From 125975b7410cc7a1b41822d28f66a9bdfe513618 Mon Sep 17 00:00:00 2001 From: Rocco Moretti Date: Fri, 3 Oct 2025 17:39:48 -0500 Subject: [PATCH 3/7] Allow placing multiple residues into a single CIF block --- source/src/core/chemical/mmCIF/mmCIFWriter.cc | 71 ++++++++++-------- source/src/core/chemical/mmCIF/mmCIFWriter.hh | 6 +- source/src/utility/gemmi_util.hh | 72 +++++++++++++++++++ 3 files changed, 115 insertions(+), 34 deletions(-) diff --git a/source/src/core/chemical/mmCIF/mmCIFWriter.cc b/source/src/core/chemical/mmCIF/mmCIFWriter.cc index e1a6f55c7c..df712f6b44 100644 --- a/source/src/core/chemical/mmCIF/mmCIFWriter.cc +++ b/source/src/core/chemical/mmCIF/mmCIFWriter.cc @@ -29,6 +29,7 @@ #include #include // for as_number + namespace core { namespace chemical { namespace mmCIF { @@ -53,41 +54,57 @@ mmCIFWriter::write_stream(std::ostream & output_stream, core::chemical::ResidueT gemmi::cif::WriteOptions options; options.prefer_pairs = true; options.misuse_hash = true; - - gemmi::cif::write_cif_block_to_stream( output_stream, generate_block(restype), options ); -} - -gemmi::cif::Block -mmCIFWriter::generate_block( core::chemical::ResidueType const & restype ) { - using utility::gemmi_add_table; - using utility::gemmi_add_row; + options.align_pairs = 48; // Matches from-RCSB alignment + options.align_loops = 10; gemmi::cif::Block block; block.name = restype.name(); + add_data_to_block( block, restype ); - ////////////// Standard Residue-level data + gemmi::cif::write_cif_block_to_stream( output_stream, block, options ); +} - block.set_pair( "_chem_comp.id", restype.name() ); - block.set_pair( "_chem_comp.name", restype.name() ); - block.set_pair( "_chem_comp.one_letter_code", std::string(1, restype.name1() ) ); - block.set_pair( "_chem_comp.three_letter_code", restype.name3() ); +void +mmCIFWriter::add_data_to_block( gemmi::cif::Block &block, core::chemical::ResidueType const & restype ) { + using utility::gemmi_get_table; + using utility::gemmi_append_row; + + ////////////// Standard Residue-level data + // + std::string type; if ( restype.is_polymer() ) { if ( restype.is_l_aa() ) { - block.set_pair( "_chem_comp.type", "L-PEPTIDE LINKING" ); + type = "L-PEPTIDE LINKING"; } else if ( restype.is_d_aa() ) { - block.set_pair( "_chem_comp.type", "D-PEPTIDE LINKING" ); + type = "D-PEPTIDE LINKING"; } else if ( restype.is_RNA() ) { - block.set_pair( "_chem_comp.type", "RNA LINKING" ); + type = "RNA LINKING"; } else if ( restype.is_DNA() ) { - block.set_pair( "_chem_comp.type", "DNA LINKING" ); + type = "DNA LINKING"; } } else { - block.set_pair( "_chem_comp.type", "NON-POLYMER" ); + type = "NON-POLYMER"; } + gemmi::cif::Table chem_comp = gemmi_get_table(block, "_chem_comp", { + "id", + "name", + "one_letter_code", + "three_letter_code", + "type" + }); + + gemmi_append_row( chem_comp, { + restype.name(), + restype.name(), + std::string(1, restype.name1() ), + restype.name3(), + type + }); + ////////////// Standard Atom-level Data - gemmi::cif::Loop & atom_comp = gemmi_add_table(block, "_chem_comp_atom", { + gemmi::cif::Table atom_comp = gemmi_get_table(block, "_chem_comp_atom", { "comp_id", "atom_id", "type_symbol", @@ -95,7 +112,6 @@ mmCIFWriter::generate_block( core::chemical::ResidueType const & restype ) { "model_Cartn_x", "model_Cartn_y", "model_Cartn_z", - "pdbx_ordinal", "partial_charge" } ); @@ -109,16 +125,15 @@ mmCIFWriter::generate_block( core::chemical::ResidueType const & restype ) { vec.push_back( std::to_string(pos.x()) ); vec.push_back( std::to_string(pos.y()) ); vec.push_back( std::to_string(pos.z()) ); - vec.push_back( std::to_string(ii) ); vec.push_back( std::to_string(restype.atom_charge(ii)) ); - gemmi_add_row( atom_comp, vec ); + gemmi_append_row( atom_comp, vec ); } //////////////// Rosetta-specific Atom-level Data // //// Note, if there's a standard place to put this data, prefer that - //gemmi::cif::Loop & rosetta_atom_comp = gemmi_add_table(block, "_rosetta_chem_comp_atom", { + //gemmi::cif::Table rosetta_atom_comp = gemmi_get_table(block, "_rosetta_chem_comp_atom", { // "comp_id", // "atom_id", // } ); @@ -128,18 +143,17 @@ mmCIFWriter::generate_block( core::chemical::ResidueType const & restype ) { // vec.push_back( restype.name() ); // vec.push_back( restype.atom_name(ii) ); // - // gemmi_add_row( rosetta_atom_comp, vec ); + // gemmi_append_row( rosetta_atom_comp, vec ); //} // //////////////// Standard Bond-level Data - gemmi::cif::Loop & bond_comp = gemmi_add_table(block, "_chem_comp_bond", { + gemmi::cif::Table bond_comp = gemmi_get_table(block, "_chem_comp_bond", { "comp_id", "atom_id_1", "atom_id_2", "value_order", "pdbx_aromatic_flag", - "pdbx_ordinal" } ); utility::vector1< std::pair< core::Size, core::Size > > const & all_bonds = restype.bonds(); @@ -166,12 +180,9 @@ mmCIFWriter::generate_block( core::chemical::ResidueType const & restype ) { } vec.push_back( bond_type ); vec.push_back( (restype.bond_type( atm1, atm2 ) == AromaticBond) ? "Y" : "N" ); - vec.push_back( std::to_string(ii) ); - gemmi_add_row( bond_comp, vec ); + gemmi_append_row( bond_comp, vec ); } - - return block; } diff --git a/source/src/core/chemical/mmCIF/mmCIFWriter.hh b/source/src/core/chemical/mmCIF/mmCIFWriter.hh index b790a45fe1..300bd798f9 100644 --- a/source/src/core/chemical/mmCIF/mmCIFWriter.hh +++ b/source/src/core/chemical/mmCIF/mmCIFWriter.hh @@ -51,10 +51,8 @@ public: void write_stream( std::ostream & output_stream, core::chemical::ResidueType const & restype ); -protected: - - gemmi::cif::Block - generate_block( core::chemical::ResidueType const & restype ); + void + add_data_to_block( gemmi::cif::Block & block, core::chemical::ResidueType const & restype ); private: diff --git a/source/src/utility/gemmi_util.hh b/source/src/utility/gemmi_util.hh index 6426ad5a96..e6f223efc2 100644 --- a/source/src/utility/gemmi_util.hh +++ b/source/src/utility/gemmi_util.hh @@ -26,6 +26,8 @@ namespace utility { +/////////////// READING UTILS + /// As the default as_char() is not robust to empty strings inline char @@ -53,6 +55,76 @@ inline int find_gemmi_column(gemmi::cif::Table & table, std::string const & name return -1; } +/////////////// WRITING UTILS + +inline +void +normalize_table_name(std::string & table_name) { + if ( table_name.size() == 0 ) { + table_name = "_TABLE."; + } + if ( table_name[0] != '_' ) { + table_name = '_' + table_name; + } + if ( table_name[ table_name.size()-1 ] != '.' ) { + table_name = table_name + '.'; + } +} + +/// @brief Gets a table with the given name (cif category) and column names for writing +/// If the table does not exist, create it. +/// If it does already exist, make sure that all the provided column names are present. +/// +/// (The returned table is simply a view to the underlying Block.) +/// +/// Rows can be added with gemmi_append_row() below +inline +gemmi::cif::Table +gemmi_get_table(gemmi::cif::Block & block, std::string table_name, std::vector const & columns ) { + normalize_table_name(table_name); + + if ( ! block.has_mmcif_category(table_name) ) { + block.init_loop(table_name, columns); + } else { + // Already has the table, make sure we have the columns. + block.find_mmcif_category(table_name).ensure_loop(); + gemmi::cif::Loop * loop = block.find_mmcif_category(table_name).get_loop(); + runtime_assert( loop != nullptr ); + std::vector new_tags; + for ( std::string const & tag: columns ) { + if ( ! loop->has_tag( table_name + tag ) ) { + new_tags.push_back( table_name + tag ); + } + } + if ( ! new_tags.empty() ) { + loop->add_columns( new_tags, "?" ); + } + } + + return block.find( table_name, columns ); +} + +template < class Iterable > +void +gemmi_append_row(gemmi::cif::Table & table, Iterable const & values ) { + std::vector< std::string > quoted; + for ( auto iter(values.begin()); iter != values.end(); ++iter ) { + if ( *iter == "?" || *iter == "." ) { + // Assume we actually want it as a null, rather than a quoted question mark + quoted.push_back( *iter ); + } else { + quoted.push_back( gemmi::cif::quote( *iter ) ); + } + } + table.append_row(quoted); +} + +inline +void +gemmi_append_row(gemmi::cif::Table & table, std::initializer_list const & init_list ) { + gemmi_append_row< std::initializer_list >(table, init_list); +} + /// @brief Adds a new table (actually a 'Loop' object) to the given block, with the given column names /// Returns a reference to the newly added loop object (which can be augmented with the `gemmi_add_row()` function) inline From b57f8a2dbf27d6eebbf7f657970e50dcb098d30e Mon Sep 17 00:00:00 2001 From: Rocco Moretti Date: Fri, 10 Oct 2025 12:35:12 -0500 Subject: [PATCH 4/7] Add integration test. --- .../integration/tests/restype_converter/flags | 2 + .../tests/restype_converter/input/TTN.cif | 106 ++++++++++++++++++ 2 files changed, 108 insertions(+) create mode 100644 tests/integration/tests/restype_converter/input/TTN.cif diff --git a/tests/integration/tests/restype_converter/flags b/tests/integration/tests/restype_converter/flags index cfa25a3f55..72ff9ddbbb 100644 --- a/tests/integration/tests/restype_converter/flags +++ b/tests/integration/tests/restype_converter/flags @@ -2,11 +2,13 @@ -name3 TRP Glc -extra_res_fa input/7cpa.params input/000.params -extra_res_mol input/XST.sdf +-extra_res_mmCIF input/TTN.cif -include_sugars -include_lipids -load_PDB_components # Needed for include lipids (when this test is written) -out:path:all output_fa -out:pdb +-out:mmCIF -params_out -sdf_out diff --git a/tests/integration/tests/restype_converter/input/TTN.cif b/tests/integration/tests/restype_converter/input/TTN.cif new file mode 100644 index 0000000000..63f9b8f086 --- /dev/null +++ b/tests/integration/tests/restype_converter/input/TTN.cif @@ -0,0 +1,106 @@ +data_TTN +# +_chem_comp.id TTN +_chem_comp.name TARTRONATE +_chem_comp.type NON-POLYMER +_chem_comp.pdbx_type HETAIN +_chem_comp.formula "C3 H2 O5" +_chem_comp.mon_nstd_parent_comp_id ? +_chem_comp.pdbx_synonyms ? +_chem_comp.pdbx_formal_charge -2 +_chem_comp.pdbx_initial_date 2000-02-16 +_chem_comp.pdbx_modified_date 2011-06-04 +_chem_comp.pdbx_ambiguous_flag N +_chem_comp.pdbx_release_status REL +_chem_comp.pdbx_replaced_by ? +_chem_comp.pdbx_replaces ? +_chem_comp.formula_weight 118.045 +_chem_comp.one_letter_code ? +_chem_comp.three_letter_code TTN +_chem_comp.pdbx_model_coordinates_details ? +_chem_comp.pdbx_model_coordinates_missing_flag N +_chem_comp.pdbx_ideal_coordinates_details ? +_chem_comp.pdbx_ideal_coordinates_missing_flag N +_chem_comp.pdbx_model_coordinates_db_code 1EFL +_chem_comp.pdbx_subcomponent_list ? +_chem_comp.pdbx_processing_site PDBJ +# +loop_ +_chem_comp_atom.comp_id +_chem_comp_atom.atom_id +_chem_comp_atom.alt_atom_id +_chem_comp_atom.type_symbol +_chem_comp_atom.charge +_chem_comp_atom.pdbx_align +_chem_comp_atom.pdbx_aromatic_flag +_chem_comp_atom.pdbx_leaving_atom_flag +_chem_comp_atom.pdbx_stereo_config +_chem_comp_atom.model_Cartn_x +_chem_comp_atom.model_Cartn_y +_chem_comp_atom.model_Cartn_z +_chem_comp_atom.pdbx_model_Cartn_x_ideal +_chem_comp_atom.pdbx_model_Cartn_y_ideal +_chem_comp_atom.pdbx_model_Cartn_z_ideal +_chem_comp_atom.pdbx_component_atom_id +_chem_comp_atom.pdbx_component_comp_id +_chem_comp_atom.pdbx_ordinal +TTN C1 C1 C 0 1 N N N 30.058 -24.947 -0.869 -1.234 -0.198 0.011 C1 TTN 1 +TTN C2 C2 C 0 1 N N N 29.902 -23.424 -0.893 0.007 0.373 0.648 C2 TTN 2 +TTN C3 C3 C 0 1 N N N 31.136 -22.768 -0.290 1.227 -0.239 0.009 C3 TTN 3 +TTN O1 O1 O 0 1 N N N 29.736 -25.598 0.124 -2.012 0.558 -0.659 O1 TTN 4 +TTN O2 O2 O -1 1 N N N 30.519 -25.541 -1.852 -1.506 -1.436 0.143 O2 TTN 5 +TTN O3 O3 O 0 1 N N N 28.737 -23.019 -0.160 0.031 1.790 0.459 O3 TTN 6 +TTN O4 O4 O 0 1 N N N 32.080 -22.429 -1.021 2.078 0.504 -0.581 O4 TTN 7 +TTN O5 O5 O -1 1 N N N 31.197 -22.578 0.934 1.407 -1.500 0.059 O5 TTN 8 +TTN H2 H2 H 0 1 N N N 29.786 -23.100 -1.953 0.004 0.149 1.715 H2 TTN 9 +TTN HO3 HO3 H 0 1 N N N 28.640 -22.074 -0.174 0.032 1.944 -0.495 HO3 TTN 10 +# +loop_ +_chem_comp_bond.comp_id +_chem_comp_bond.atom_id_1 +_chem_comp_bond.atom_id_2 +_chem_comp_bond.value_order +_chem_comp_bond.pdbx_aromatic_flag +_chem_comp_bond.pdbx_stereo_config +_chem_comp_bond.pdbx_ordinal +TTN C1 C2 SING N N 1 +TTN C1 O1 DOUB N N 2 +TTN C1 O2 SING N N 3 +TTN C2 C3 SING N N 4 +TTN C2 O3 SING N N 5 +TTN C2 H2 SING N N 6 +TTN C3 O4 DOUB N N 7 +TTN C3 O5 SING N N 8 +TTN O3 HO3 SING N N 9 +# +loop_ +_pdbx_chem_comp_descriptor.comp_id +_pdbx_chem_comp_descriptor.type +_pdbx_chem_comp_descriptor.program +_pdbx_chem_comp_descriptor.program_version +_pdbx_chem_comp_descriptor.descriptor +TTN SMILES ACDLabs 10.04 "[O-]C(=O)C(O)C([O-])=O" +TTN SMILES_CANONICAL CACTVS 3.341 "OC(C([O-])=O)C([O-])=O" +TTN SMILES CACTVS 3.341 "OC(C([O-])=O)C([O-])=O" +TTN SMILES_CANONICAL "OpenEye OEToolkits" 1.5.0 "C(C(=O)[O-])(C(=O)[O-])O" +TTN SMILES "OpenEye OEToolkits" 1.5.0 "C(C(=O)[O-])(C(=O)[O-])O" +TTN InChI InChI 1.03 "InChI=1S/C3H4O5/c4-1(2(5)6)3(7)8/h1,4H,(H,5,6)(H,7,8)/p-2" +TTN InChIKey InChI 1.03 ROBFUDYVXSDBQM-UHFFFAOYSA-L +# +loop_ +_pdbx_chem_comp_identifier.comp_id +_pdbx_chem_comp_identifier.type +_pdbx_chem_comp_identifier.program +_pdbx_chem_comp_identifier.program_version +_pdbx_chem_comp_identifier.identifier +TTN "SYSTEMATIC NAME" ACDLabs 10.04 hydroxypropanedioate +TTN "SYSTEMATIC NAME" "OpenEye OEToolkits" 1.5.0 2-hydroxypropanedioate +# +loop_ +_pdbx_chem_comp_audit.comp_id +_pdbx_chem_comp_audit.action_type +_pdbx_chem_comp_audit.date +_pdbx_chem_comp_audit.processing_site +TTN "Create component" 2000-02-16 PDBJ +TTN "Modify descriptor" 2011-06-04 RCSB +# From 9afce8c0de36723901468a3fea2ba690dcfe9797 Mon Sep 17 00:00:00 2001 From: Rocco Moretti Date: Fri, 10 Oct 2025 13:19:17 -0500 Subject: [PATCH 5/7] Beautify --- source/src/core/chemical/mmCIF/mmCIFWriter.cc | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/source/src/core/chemical/mmCIF/mmCIFWriter.cc b/source/src/core/chemical/mmCIF/mmCIFWriter.cc index df712f6b44..ca674f782e 100644 --- a/source/src/core/chemical/mmCIF/mmCIFWriter.cc +++ b/source/src/core/chemical/mmCIF/mmCIFWriter.cc @@ -134,16 +134,16 @@ mmCIFWriter::add_data_to_block( gemmi::cif::Block &block, core::chemical::Residu // //// Note, if there's a standard place to put this data, prefer that //gemmi::cif::Table rosetta_atom_comp = gemmi_get_table(block, "_rosetta_chem_comp_atom", { - // "comp_id", - // "atom_id", - // } ); + // "comp_id", + // "atom_id", + // } ); // //for ( core::Size ii(1); ii <= restype.natoms(); ++ii ) { - // std::vector< std::string > vec; - // vec.push_back( restype.name() ); - // vec.push_back( restype.atom_name(ii) ); + // std::vector< std::string > vec; + // vec.push_back( restype.name() ); + // vec.push_back( restype.atom_name(ii) ); // - // gemmi_append_row( rosetta_atom_comp, vec ); + // gemmi_append_row( rosetta_atom_comp, vec ); //} // //////////////// Standard Bond-level Data @@ -167,16 +167,16 @@ mmCIFWriter::add_data_to_block( gemmi::cif::Block &block, core::chemical::Residu vec.push_back( restype.atom_name( atm2 ) ); std::string bond_type = "UNK"; switch ( restype.bond_type( atm1, atm2 ) ) { - case SingleBond: - bond_type = "SING"; break; - case DoubleBond: - bond_type = "DOUB"; break; - case TripleBond: - bond_type = "TRIP"; break; - case AromaticBond: - bond_type = "AROM"; break; - default: - bond_type = "UNK"; break; + case SingleBond : + bond_type = "SING"; break; + case DoubleBond : + bond_type = "DOUB"; break; + case TripleBond : + bond_type = "TRIP"; break; + case AromaticBond : + bond_type = "AROM"; break; + default : + bond_type = "UNK"; break; } vec.push_back( bond_type ); vec.push_back( (restype.bond_type( atm1, atm2 ) == AromaticBond) ? "Y" : "N" ); From 720d571d3ebc9204f18b8827856608697b941ee5 Mon Sep 17 00:00:00 2001 From: Rocco Moretti Date: Fri, 10 Oct 2025 14:23:25 -0500 Subject: [PATCH 6/7] Move functions to cc file. --- source/src/utility.src.settings | 1 + source/src/utility/gemmi_util.cc | 121 +++++++++++++++++++++++++++++++ source/src/utility/gemmi_util.hh | 76 ++----------------- 3 files changed, 127 insertions(+), 71 deletions(-) create mode 100644 source/src/utility/gemmi_util.cc diff --git a/source/src/utility.src.settings b/source/src/utility.src.settings index c684c6bd1a..9dfc2668c5 100644 --- a/source/src/utility.src.settings +++ b/source/src/utility.src.settings @@ -18,6 +18,7 @@ sources = { "curl", "dating", "exit", + "gemmi_util", "heap", "inline_file_provider", "integer_mapping", diff --git a/source/src/utility/gemmi_util.cc b/source/src/utility/gemmi_util.cc new file mode 100644 index 0000000000..352eac704d --- /dev/null +++ b/source/src/utility/gemmi_util.cc @@ -0,0 +1,121 @@ +// -*- mode:c++;tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*- +// vi: set ts=2 noet: +// +// (c) Copyright Rosetta Commons Member Institutions. +// (c) This file is part of the Rosetta software suite and is made available under license. +// (c) The Rosetta software is developed by the contributing members of the Rosetta Commons. +// (c) For more information, see http://www.rosettacommons.org. Questions about this can be +// (c) addressed to University of Washington CoMotion, email: license@uw.edu. + +/// @file utility/gemmi_util.cc +/// @brief Utilities for working with Gemmi CIF file data +/// +/// @author Rocco Moretti (rmorettiase@gmail.com) + +// Unit headers +#include +#include + +#include + +#include +#include +#include + +namespace utility { + +/////////////// READING UTILS + +/// As the default as_char() is not robust to empty strings +char +as_char(std::string const & value, char null) { + if ( value.size() == 0 ) { + return null; + } + if ( value.size() == 2 && (value == "''" || value == "\"\"" ) ) { + return null; + } + return gemmi::cif::as_char(value, null); +} + +/// @brief find the index for the given column name in the table. +/// If it can't be found, return a negative number +int find_gemmi_column(gemmi::cif::Table & table, std::string const & name) { + if ( table.width() == 0 ) { return -1; } // No columns + gemmi::cif::Table::Row const & tags = table.tags(); + for ( int ii = 0; ii < int(tags.size()); ++ii ) { + std::string const & tag = tags[ii]; + if ( name == tag || name == tag.substr( table.prefix_length ) ) { + return ii; + } + } + return -1; +} + +/////////////// WRITING UTILS + +void +normalize_table_name(std::string & table_name) { + if ( table_name.size() == 0 ) { + table_name = "_TABLE."; + } + if ( table_name[0] != '_' ) { + table_name = '_' + table_name; + } + if ( table_name[ table_name.size()-1 ] != '.' ) { + table_name = table_name + '.'; + } +} + +/// @brief Gets a table with the given name (cif category) and column names for writing +/// If the table does not exist, create it. +/// If it does already exist, make sure that all the provided column names are present. +/// +/// (The returned table is simply a view to the underlying Block.) +/// +/// Rows can be added with gemmi_append_row() below +gemmi::cif::Table +gemmi_get_table(gemmi::cif::Block & block, std::string table_name, std::vector const & columns ) { + normalize_table_name(table_name); + + if ( ! block.has_mmcif_category(table_name) ) { + block.init_loop(table_name, columns); + } else { + // Already has the table, make sure we have the columns. + block.find_mmcif_category(table_name).ensure_loop(); + gemmi::cif::Loop * loop = block.find_mmcif_category(table_name).get_loop(); + runtime_assert( loop != nullptr ); + std::vector new_tags; + for ( std::string const & tag: columns ) { + if ( ! loop->has_tag( table_name + tag ) ) { + new_tags.push_back( table_name + tag ); + } + } + if ( ! new_tags.empty() ) { + loop->add_columns( new_tags, "?" ); + } + } + + return block.find( table_name, columns ); +} + +/// @brief Adds a new table (actually a 'Loop' object) to the given block, with the given column names +/// Returns a reference to the newly added loop object (which can be augmented with the `gemmi_add_row()` function) +gemmi::cif::Loop & +gemmi_add_table(gemmi::cif::Block & block, std::string table_name, std::vector const & columns) { + if ( table_name.size() == 0 ) { + table_name = "_TABLE."; + } + if ( table_name[0] != '_' ) { + table_name = '_' + table_name; + } + if ( table_name[ table_name.size()-1 ] != '.' ) { + table_name = table_name + '.'; + } + + return block.init_loop(table_name, columns); +} + + +} // namespace utility + diff --git a/source/src/utility/gemmi_util.hh b/source/src/utility/gemmi_util.hh index e6f223efc2..6bdf81ba9b 100644 --- a/source/src/utility/gemmi_util.hh +++ b/source/src/utility/gemmi_util.hh @@ -29,47 +29,17 @@ namespace utility { /////////////// READING UTILS /// As the default as_char() is not robust to empty strings -inline char -as_char(std::string const & value, char null) { - if ( value.size() == 0 ) { - return null; - } - if ( value.size() == 2 && (value == "''" || value == "\"\"" ) ) { - return null; - } - return gemmi::cif::as_char(value, null); -} +as_char(std::string const & value, char null); /// @brief find the index for the given column name in the table. /// If it can't be found, return a negative number -inline int find_gemmi_column(gemmi::cif::Table & table, std::string const & name) { - if ( table.width() == 0 ) { return -1; } // No columns - gemmi::cif::Table::Row const & tags = table.tags(); - for ( int ii = 0; ii < int(tags.size()); ++ii ) { - std::string const & tag = tags[ii]; - if ( name == tag || name == tag.substr( table.prefix_length ) ) { - return ii; - } - } - return -1; -} +int find_gemmi_column(gemmi::cif::Table & table, std::string const & name); /////////////// WRITING UTILS -inline void -normalize_table_name(std::string & table_name) { - if ( table_name.size() == 0 ) { - table_name = "_TABLE."; - } - if ( table_name[0] != '_' ) { - table_name = '_' + table_name; - } - if ( table_name[ table_name.size()-1 ] != '.' ) { - table_name = table_name + '.'; - } -} +normalize_table_name(std::string & table_name); /// @brief Gets a table with the given name (cif category) and column names for writing /// If the table does not exist, create it. @@ -78,31 +48,8 @@ normalize_table_name(std::string & table_name) { /// (The returned table is simply a view to the underlying Block.) /// /// Rows can be added with gemmi_append_row() below -inline gemmi::cif::Table -gemmi_get_table(gemmi::cif::Block & block, std::string table_name, std::vector const & columns ) { - normalize_table_name(table_name); - - if ( ! block.has_mmcif_category(table_name) ) { - block.init_loop(table_name, columns); - } else { - // Already has the table, make sure we have the columns. - block.find_mmcif_category(table_name).ensure_loop(); - gemmi::cif::Loop * loop = block.find_mmcif_category(table_name).get_loop(); - runtime_assert( loop != nullptr ); - std::vector new_tags; - for ( std::string const & tag: columns ) { - if ( ! loop->has_tag( table_name + tag ) ) { - new_tags.push_back( table_name + tag ); - } - } - if ( ! new_tags.empty() ) { - loop->add_columns( new_tags, "?" ); - } - } - - return block.find( table_name, columns ); -} +gemmi_get_table(gemmi::cif::Block & block, std::string table_name, std::vector const & columns ); template < class Iterable > void @@ -127,21 +74,8 @@ gemmi_append_row(gemmi::cif::Table & table, std::initializer_list c /// @brief Adds a new table (actually a 'Loop' object) to the given block, with the given column names /// Returns a reference to the newly added loop object (which can be augmented with the `gemmi_add_row()` function) -inline gemmi::cif::Loop & -gemmi_add_table(gemmi::cif::Block & block, std::string table_name, std::vector const & columns) { - if ( table_name.size() == 0 ) { - table_name = "_TABLE."; - } - if ( table_name[0] != '_' ) { - table_name = '_' + table_name; - } - if ( table_name[ table_name.size()-1 ] != '.' ) { - table_name = table_name + '.'; - } - - return block.init_loop(table_name, columns); -} +gemmi_add_table(gemmi::cif::Block & block, std::string table_name, std::vector const & columns); /// @brief Adds a row to the table. Takes care of quoting the entries properly template< class Iterable > From 080939414fed317cca22b590a82b3797ea43a26c Mon Sep 17 00:00:00 2001 From: Rocco Moretti Date: Fri, 10 Oct 2025 15:41:27 -0500 Subject: [PATCH 7/7] Fix cppcheck --- source/src/core/chemical/mmCIF/mmCIFParser.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/src/core/chemical/mmCIF/mmCIFParser.cc b/source/src/core/chemical/mmCIF/mmCIFParser.cc index 4949354d52..07b96850db 100644 --- a/source/src/core/chemical/mmCIF/mmCIFParser.cc +++ b/source/src/core/chemical/mmCIF/mmCIFParser.cc @@ -106,11 +106,11 @@ mmCIFParser::get_molfile_molecule( gemmi::cif::Block & block ) { molecule->name( block.name ); std::string const * name3 = block.find_value("_chem_comp.three_letter_code"); // Non-owning raw pointer, null if not found. - if ( name3 ) { + if ( name3 && !name3->empty() ) { molecule->name3( as_string(name3) ); } std::string const * name1 = block.find_value("_chem_comp.one_letter_code"); // Non-owning raw pointer, null if not found. - if ( name1 && name1 ) { + if ( name1 && !name1->empty() ) { molecule->name1( as_string(name1) ); }