RDKit
Open-source cheminformatics and machine learning.
SLNParseOps.h
Go to the documentation of this file.
1 //
2 // Copyright (c) 2008, Novartis Institutes for BioMedical Research Inc.
3 // All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following
13 // disclaimer in the documentation and/or other materials provided
14 // with the distribution.
15 // * Neither the name of Novartis Institutes for BioMedical Research Inc.
16 // nor the names of its contributors may be used to endorse or promote
17 // products derived from this software without specific prior
18 // written permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //
32 // Created by Greg Landrum, September 2006
33 //
34 #include <RDGeneral/export.h>
35 #ifndef __RD_SLNPARSEOPS_H__
36 #define __RD_SLNPARSEOPS_H__
37 
38 #include <vector>
41 #include <GraphMol/RDKitBase.h>
42 #include <GraphMol/RDKitQueries.h>
44 #include <boost/lexical_cast.hpp>
46 
47 namespace RDKit {
48 namespace SLNParse {
49 namespace {
50 //! set a bookmark in the molecule if the atom has an associated ID:
51 void bookmarkAtomID(RWMol *mp, Atom *atom) {
52  PRECONDITION(mp, "bad molecule");
53  PRECONDITION(atom, "bad atom");
54  unsigned int label;
55  if (atom->getPropIfPresent(common_properties::_AtomID, label)) {
56  if (mp->hasAtomBookmark(label)) {
57  std::stringstream err;
58  err << "SLN Parser error: Atom ID " << label << " used a second time.";
59  throw SLNParseException(err.str());
60  }
61  if (mp->hasBondBookmark(label)) {
62  std::stringstream err;
63  err << "SLN Parser error: Atom ID " << label
64  << " appears *after* its ring closure.";
65  throw SLNParseException(err.str());
66  }
67  mp->setAtomBookmark(atom, label);
68  }
69 }
70 
71 //! adds a bond, being careful to handle aromaticity properly
72 template <typename BondType>
73 void addBondToMol(RWMol *mp, BondType *bond) {
74  PRECONDITION(mp, "null molecule");
75  PRECONDITION(bond, "null bond");
76  mp->addBond(bond, true);
77  if (bond->getBondType() == Bond::AROMATIC) {
78  // SLN doesn't have aromatic atom types, aromaticity is a property
79  // of the bonds themselves, so we need to set the atom types:
80  bond->setIsAromatic(true);
81  bond->getBeginAtom()->setIsAromatic(true);
82  bond->getEndAtom()->setIsAromatic(true);
83  }
84 }
85 } // end of anonymous namespace
86 
87 // ------------------------------------------------------------------------------------
88 //! initialize a molecule
89 template <typename AtomType>
90 int startMol(std::vector<RWMol *> &molList, AtomType *firstAtom,
91  bool doingQuery) {
92  PRECONDITION(firstAtom, "empty atom");
93  RWMol *mp = new RWMol();
94  mp->addAtom(firstAtom, true, true);
95  bookmarkAtomID(mp, firstAtom);
96 
97  if (!doingQuery) {
98  // add any hydrogens that are set on the atom, otherwise getting the
99  // numbering right
100  // is just too hard:
101  for (unsigned int i = 0; i < firstAtom->getNumExplicitHs(); ++i) {
102  int hIdx = mp->addAtom(new Atom(1), false, true);
103  mp->addBond(0, hIdx, Bond::SINGLE);
104  }
105  firstAtom->setNumExplicitHs(0);
106  }
107 
108  int sz = molList.size();
109  molList.push_back(mp);
110  return sz;
111 };
112 
113 // ------------------------------------------------------------------------------------
114 //! adds an atom to a molecule
115 template <typename AtomType, typename BondType>
116 void addAtomToMol(std::vector<RWMol *> &molList, unsigned int idx,
117  AtomType *atom, BondType *bond, bool doingQuery) {
118  PRECONDITION(idx < molList.size(), "bad index");
119  RWMol *mp = molList[idx];
120  PRECONDITION(mp, "null molecule");
121  PRECONDITION(atom, "empty atom");
122  PRECONDITION(bond, "null bond");
123 
124  Atom *a1 = mp->getActiveAtom();
125  int atomIdx1 = a1->getIdx();
126  int atomIdx2 = mp->addAtom(atom, true, true);
127  bookmarkAtomID(mp, atom);
128  bond->setOwningMol(mp);
129  bond->setBeginAtomIdx(atomIdx1);
130  bond->setEndAtomIdx(atomIdx2);
131  addBondToMol(mp, bond);
132 
133  if (!doingQuery) {
134  // add any hydrogens that are set on the atom, otherwise getting the
135  // numbering right
136  // is just too hard:
137  for (unsigned int i = 0; i < atom->getNumExplicitHs(); ++i) {
138  int hIdx = mp->addAtom(new Atom(1), false, true);
139  mp->addBond(atomIdx2, hIdx, Bond::SINGLE);
140  }
141  atom->setNumExplicitHs(0);
142  }
143 }
144 //! \overload
145 template <typename AtomType>
146 void addAtomToMol(std::vector<RWMol *> &molList, unsigned int idx,
147  AtomType *atom, bool doingQuery) {
148  addAtomToMol(molList, idx, atom, new Bond(Bond::SINGLE), doingQuery);
149 }
150 
151 // ------------------------------------------------------------------------------------
152 //! closes an indexed ring in a molecule using the bond provided
153 /// The bond is formed from the atom in the molecule with the
154 /// corresponding bookmark to the active atom
155 //
156 template <typename BondType>
157 void closeRingBond(std::vector<RWMol *> &molList, unsigned int molIdx,
158  unsigned int ringIdx, BondType *bond,
159  bool postponeAllowed = true) {
160  PRECONDITION(molIdx < molList.size(), "bad index");
161  RWMol *mp = molList[molIdx];
162  PRECONDITION(mp, "null molecule");
163  PRECONDITION(bond, "Null bond");
164 
165  if (!mp->hasAtomBookmark(ringIdx)) {
166  if (postponeAllowed) {
167  // save it for later:
168  bond->setOwningMol(mp);
169  bond->setEndAtomIdx(mp->getActiveAtom()->getIdx());
170  mp->setBondBookmark(bond, ringIdx);
171  return;
172  } else {
173  std::stringstream err;
174  err << "SLN Parser error: Ring closure " << ringIdx
175  << " does not have a corresponding opener.";
176  throw SLNParseException(err.str());
177  }
178  }
179  Atom *opener = mp->getAtomWithBookmark(ringIdx);
180  CHECK_INVARIANT(opener, "invalid atom");
181 
182  Atom *closer = mp->getActiveAtom();
183  bond->setOwningMol(mp);
184  bond->setBeginAtom(opener);
185  bond->setEndAtom(closer);
186  addBondToMol(mp, bond);
187 };
188 //! \overload
189 void closeRingBond(std::vector<RWMol *> &molList, unsigned int molIdx,
190  unsigned int ringIdx) {
191  auto *newBond = new Bond(Bond::SINGLE);
192  try {
193  closeRingBond(molList, molIdx, ringIdx, newBond);
194  } catch (...) {
195  delete newBond;
196  throw;
197  }
198 };
199 
200 // ------------------------------------------------------------------------------------
201 // NOTE: this takes over responsibility for the bond
202 template <typename BondType>
203 int addBranchToMol(std::vector<RWMol *> &molList, unsigned int molIdx,
204  unsigned int branchIdx, BondType *&bond) {
205  PRECONDITION(molIdx < molList.size(), "bad index");
206  RWMol *mp = molList[molIdx];
207  PRECONDITION(mp, "null molecule");
208  PRECONDITION(branchIdx < molList.size(), "bad index");
209  RWMol *branch = molList[branchIdx];
210  PRECONDITION(branch, "null branch");
211  PRECONDITION(bond, "null bond");
212 
213  unsigned int activeAtomIdx = mp->getActiveAtom()->getIdx();
214  unsigned int nOrigAtoms = mp->getNumAtoms();
215 
216  //
217  // Add the fragment's atoms and bonds to the molecule:
218  //
219  mp->insertMol(*branch);
220 
221  // copy in any atom bookmarks from the branch:
222  for (ROMol::ATOM_BOOKMARK_MAP::const_iterator bmIt =
223  branch->getAtomBookmarks()->begin();
224  bmIt != branch->getAtomBookmarks()->end(); ++bmIt) {
225  if (bmIt->first < 0) {
226  continue;
227  }
228  if (mp->hasAtomBookmark(bmIt->first)) {
229  std::stringstream err;
230  err << "SLN Parser error: Atom ID " << bmIt->first
231  << " used a second time.";
232  throw SLNParseException(err.str());
233  } else if (mp->hasBondBookmark(bmIt->first)) {
234  std::stringstream err;
235  err << "SLN Parser error: Atom ID " << bmIt->first
236  << " appears *after* its ring closure.";
237  throw SLNParseException(err.str());
238  } else {
239  CHECK_INVARIANT(bmIt->second.size() == 1,
240  "bad atom bookmark list on branch");
241  Atom *tgtAtom =
242  mp->getAtomWithIdx((*bmIt->second.begin())->getIdx() + nOrigAtoms);
243  mp->setAtomBookmark(tgtAtom, bmIt->first);
244  }
245  }
246 
247  // loop over bond bookmarks in the branch and close the corresponding rings
248  for (ROMol::BOND_BOOKMARK_MAP::const_iterator bmIt =
249  branch->getBondBookmarks()->begin();
250  bmIt != branch->getBondBookmarks()->end(); ++bmIt) {
251  CHECK_INVARIANT(bmIt->second.size() >= 1,
252  "bad bond bookmark list on branch");
253  for (ROMol::BOND_PTR_LIST::const_iterator bondIt = bmIt->second.begin();
254  bondIt != bmIt->second.end(); ++bondIt) {
255  Bond *tgtBond = *bondIt;
256  if (bmIt->first > 0 && mp->hasAtomBookmark(bmIt->first)) {
257  Atom *tmpAtom = mp->getActiveAtom();
258  mp->setActiveAtom(
259  mp->getAtomWithIdx(tgtBond->getEndAtomIdx() + nOrigAtoms));
260  closeRingBond(molList, molIdx, bmIt->first, tgtBond, false);
261  mp->setActiveAtom(tmpAtom);
262  } else {
263  // no partner found yet, copy into this mol:
264  tgtBond->setOwningMol(mp);
265  tgtBond->setEndAtomIdx(tgtBond->getEndAtomIdx() + nOrigAtoms);
266  mp->setBondBookmark(tgtBond, bmIt->first);
267  }
268  }
269  }
270 
271  // set the connecting bond:
272  if (bond->getBondType() != Bond::IONIC) {
273  bond->setOwningMol(mp);
274  bond->setBeginAtomIdx(activeAtomIdx);
275  bond->setEndAtomIdx(nOrigAtoms);
276  addBondToMol(mp, bond);
277  } else {
278  delete bond;
279  }
280  bond = nullptr;
281 
282  delete branch;
283  unsigned int sz = molList.size();
284  if (sz == branchIdx + 1) {
285  molList.resize(sz - 1);
286  }
287  return molIdx;
288 };
289 //! \overload
290 int addBranchToMol(std::vector<RWMol *> &molList, unsigned int molIdx,
291  unsigned int branchIdx) {
292  Bond *newBond = new Bond(Bond::SINGLE);
293  int ret = -1;
294  try {
295  ret = addBranchToMol(molList, molIdx, branchIdx, newBond);
296  } catch (...) {
297  delete newBond;
298  throw;
299  }
300  return ret;
301 };
302 
303 // ------------------------------------------------------------------------------------
304 //! adds the atoms and bonds from a fragment to the molecule, sets no bond
305 /// between them
306 int addFragToMol(std::vector<RWMol *> &molList, unsigned int molIdx,
307  unsigned int fragIdx) {
308  Bond *newBond = new Bond(Bond::IONIC);
309  int ret = -1;
310  try {
311  ret = addBranchToMol(molList, molIdx, fragIdx, newBond);
312  } catch (...) {
313  delete newBond;
314  throw;
315  }
316  return ret;
317 }
318 
319 //! convenience function to convert the argument to a string
320 template <typename T>
321 std::string convertToString(T val) {
322  std::string res = boost::lexical_cast<std::string>(val);
323  return res;
324 }
325 
326 } // end of namespace SLNParse
327 } // end of namespace RDKit
328 #endif
#define CHECK_INVARIANT(expr, mess)
Definition: Invariant.h:101
#define PRECONDITION(expr, mess)
Definition: Invariant.h:109
pulls in the core RDKit functionality
pulls in the RDKit Query functionality
The class for representing atoms.
Definition: Atom.h:68
unsigned int getIdx() const
returns our index within the ROMol
Definition: Atom.h:143
void setOwningMol(ROMol *other)
sets our owning molecule
class for representing a bond
Definition: Bond.h:47
void setEndAtomIdx(unsigned int what)
sets the index of our end Atom
@ AROMATIC
Definition: Bond.h:69
@ IONIC
Definition: Bond.h:70
@ SINGLE
Definition: Bond.h:58
void setOwningMol(ROMol *other)
sets our owning molecule
unsigned int getEndAtomIdx() const
returns the index of our end Atom
Definition: Bond.h:220
bool hasAtomBookmark(int mark) const
queries whether or not any atoms are associated with a bookmark
Definition: ROMol.h:502
unsigned int getNumAtoms() const
returns our number of atoms
Definition: ROMol.h:413
void setBondBookmark(Bond *bond, int mark)
associates a Bond pointer with a bookmark
Definition: ROMol.h:507
BOND_BOOKMARK_MAP * getBondBookmarks()
returns a pointer to all of our bond bookmarks
Definition: ROMol.h:527
void setAtomBookmark(Atom *at, int mark)
associates an Atom pointer with a bookmark
Definition: ROMol.h:479
Atom * getAtomWithIdx(unsigned int idx)
returns a pointer to a particular Atom
Atom * getAtomWithBookmark(int mark)
returns the first Atom associated with the bookmark provided
ATOM_BOOKMARK_MAP * getAtomBookmarks()
returns a pointer to all of our atom bookmarks
Definition: ROMol.h:504
bool hasBondBookmark(int mark) const
queries whether or not any bonds are associated with a bookmark
Definition: ROMol.h:525
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:32
unsigned int addAtom(bool updateLabel=true)
adds an empty Atom to our collection
Atom * getActiveAtom()
returns a pointer to the "active" Atom
unsigned int addBond(unsigned int beginAtomIdx, unsigned int endAtomIdx, Bond::BondType order=Bond::UNSPECIFIED)
adds a Bond between the indicated Atoms
void setActiveAtom(Atom *atom)
sets our activeAtom
void insertMol(const ROMol &other)
insert the atoms and bonds from other into this molecule
void addAtomToMol(std::vector< RWMol * > &molList, unsigned int idx, AtomType *atom, BondType *bond, bool doingQuery)
adds an atom to a molecule
Definition: SLNParseOps.h:116
int startMol(std::vector< RWMol * > &molList, AtomType *firstAtom, bool doingQuery)
initialize a molecule
Definition: SLNParseOps.h:90
int addFragToMol(std::vector< RWMol * > &molList, unsigned int molIdx, unsigned int fragIdx)
Definition: SLNParseOps.h:306
int addBranchToMol(std::vector< RWMol * > &molList, unsigned int molIdx, unsigned int branchIdx, BondType *&bond)
Definition: SLNParseOps.h:203
std::string convertToString(T val)
convenience function to convert the argument to a string
Definition: SLNParseOps.h:321
void closeRingBond(std::vector< RWMol * > &molList, unsigned int molIdx, unsigned int ringIdx, BondType *bond, bool postponeAllowed=true)
Definition: SLNParseOps.h:157
RDKIT_RDGENERAL_EXPORT const std::string _AtomID
Std stuff.
Definition: Abbreviations.h:19