RDKit
Open-source cheminformatics and machine learning.
SubstanceGroup.h
Go to the documentation of this file.
1 //
2 //
3 // Copyright (C) 2018-2020 Greg Landrum and T5 Informatics GmbH
4 //
5 // @@ All Rights Reserved @@
6 // This file is part of the RDKit.
7 // The contents are covered by the terms of the BSD license
8 // which is included in the file license.txt, found at the root
9 // of the RDKit source tree.
10 //
11 /*! \file SubstanceGroup.h
12 
13  \brief Defines the SubstanceGroup class
14 
15 */
16 #include <RDGeneral/export.h>
17 #ifndef _RD_SGROUP_H
18 #define _RD_SGROUP_H
19 
20 #include <iostream>
21 #include <utility>
22 #include <unordered_map>
23 
24 #include <Geometry/point.h>
25 #include <RDGeneral/types.h>
26 #include <RDGeneral/RDProps.h>
27 #include <boost/smart_ptr.hpp>
28 
29 namespace RDKit {
30 class ROMol;
31 class RWMol;
32 class Bond;
33 class Atom;
34 
35 //! used to indicate errors from incorrect sgroup access
37  : public std::runtime_error {
38  public:
39  //! construct with an error message
40  SubstanceGroupException(const char *msg) : std::runtime_error(msg) {}
41  //! construct with an error message
42  SubstanceGroupException(const std::string &msg) : std::runtime_error(msg) {}
43 };
44 
45 //! The class for representing SubstanceGroups
46 /*!
47  <b>Notes:</b>
48  - These are inspired by the SGroups in the MDL formats
49  - Implementation is based on 2010 MDL SD specification:
50  http://infochim.u-strasbg.fr/recherche/Download/Fragmentor/MDL_SDF.pdf
51  - See SGroups.md for further, more comprehensive notes.
52 
53 */
54 
56  public:
57  //! Bond type (see V3000 spec)
58  enum class BondType {
59  XBOND, // External/Crossing bond
60  CBOND, // Internal/Contained bond
61  };
62 
63  typedef std::array<RDGeom::Point3D, 3> Bracket;
64 
65  //! Data structure for SAP lines (see V3000 spec)
66  //! lvIdx may not be set; this signaled with value -1
67  struct AttachPoint {
68  unsigned int aIdx;
69  int lvIdx;
70  std::string id;
71  bool operator==(const AttachPoint &other) const {
72  return aIdx == other.aIdx && lvIdx == other.lvIdx && id == other.id;
73  }
74  };
75 
76  //! See specification for V3000 CSTATE
77  //! vector may or not be considered, depending on TYPE
78  struct CState {
79  unsigned int bondIdx;
81  bool operator==(const CState &other) const {
82  // note that we ignore coordinates for this
83  return bondIdx == other.bondIdx;
84  }
85  };
86 
87 //! No default constructor
88 #ifndef SWIG
89  // Unfortunately, SWIG generated wrapper code uses temporary variables that
90  // require a default ctor not be deleted.
91  SubstanceGroup() = delete;
92 #endif // !SWIG
93 
94  //! Main Constructor. Ownership is only set on this side of the relationship:
95  //! mol->addSubstanceGroup(sgroup) still needs to be called to get ownership
96  //! on the other side.
97  SubstanceGroup(ROMol *owning_mol, const std::string &type);
98 
99  SubstanceGroup(const SubstanceGroup &other) = default;
100  SubstanceGroup &operator=(const SubstanceGroup &other) = default;
101 
102  SubstanceGroup(SubstanceGroup &&other) noexcept : RDProps(std::move(other)) {
103  dp_mol = std::exchange(other.dp_mol, nullptr);
104  d_atoms = std::move(other.d_atoms);
105  d_patoms = std::move(other.d_patoms);
106  d_bonds = std::move(other.d_bonds);
107  d_brackets = std::move(other.d_brackets);
108  d_cstates = std::move(other.d_cstates);
109  d_saps = std::move(other.d_saps);
110  }
111 
113  if (this == &other) {
114  return *this;
115  }
116  RDProps::operator=(std::move(other));
117  dp_mol = std::exchange(other.dp_mol, nullptr);
118  d_atoms = std::move(other.d_atoms);
119  d_patoms = std::move(other.d_patoms);
120  d_bonds = std::move(other.d_bonds);
121  d_brackets = std::move(other.d_brackets);
122  d_cstates = std::move(other.d_cstates);
123  d_saps = std::move(other.d_saps);
124  return *this;
125  }
126 
127  //! Destructor
128  ~SubstanceGroup() = default;
129 
130  //! returns whether or not this belongs to a molecule
131  bool hasOwningMol() const { return dp_mol != nullptr; }
132 
133  //! Get the molecule that owns this instance
134  ROMol &getOwningMol() const {
135  PRECONDITION(dp_mol, "no owner");
136  return *dp_mol;
137  }
138 
139  //! returns whether or not this group is valid; invalid groups must be
140  //! ignored.
141  bool getIsValid() const { return d_isValid; }
142 
143  //! set whether or not this group is valid; invalid groups must be ignored.
144  void setIsValid(bool isValid) { d_isValid = isValid; }
145 
146  //! get the index of this sgroup in dp_mol's sgroups vector
147  //! (do not mistake this by the ID!)
148  unsigned int getIndexInMol() const;
149 
150  /* Atom and Bond methods */
151  void addAtomWithIdx(unsigned int idx);
152  void addParentAtomWithIdx(unsigned int idx);
153  void addBondWithIdx(unsigned int idx);
154  void addAtomWithBookmark(int mark);
156  void addBondWithBookmark(int mark);
157 
158  // These methods should be handled with care, since they can leave
159  // Attachment points and CStates in an invalid state!
160  void removeAtomWithIdx(unsigned int idx);
161  void removeParentAtomWithIdx(unsigned int idx);
162  void removeBondWithIdx(unsigned int idx);
163 
164  void addBracket(const Bracket &bracket);
165  void addCState(unsigned int bondIdx, const RDGeom::Point3D &vector);
166  void addAttachPoint(unsigned int aIdx, int lvIdx, const std::string &idStr);
167 
168  BondType getBondType(unsigned int bondIdx) const;
169 
170  const std::vector<unsigned int> &getAtoms() const { return d_atoms; }
171  const std::vector<unsigned int> &getParentAtoms() const { return d_patoms; }
172  const std::vector<unsigned int> &getBonds() const { return d_bonds; }
173 
174  void setAtoms(std::vector<unsigned int> atoms) { d_atoms = std::move(atoms); }
175  void setParentAtoms(std::vector<unsigned int> patoms) {
176  d_patoms = std::move(patoms);
177  }
178  void setBonds(std::vector<unsigned int> bonds) { d_bonds = std::move(bonds); }
179 
180  const std::vector<Bracket> &getBrackets() const { return d_brackets; }
181  const std::vector<CState> &getCStates() const { return d_cstates; }
182  const std::vector<AttachPoint> &getAttachPoints() const { return d_saps; }
183 
184  std::vector<Bracket> &getBrackets() { return d_brackets; }
185  std::vector<CState> &getCStates() { return d_cstates; }
186  std::vector<AttachPoint> &getAttachPoints() { return d_saps; }
187 
188  void clearBrackets() { d_brackets.clear(); }
189  void clearCStates() { d_cstates.clear(); }
190  void clearAttachPoints() { d_saps.clear(); }
191 
192  //! adjusts our atom IDs to reflect that an atom has been removed from the
193  //! parent molecule
194  //! decrements all atom IDs that are higher than \c atomIdx
195  //! raises a \c SubstanceGroupException if \c atomIdx is actually part of
196  //! this substance group
197  //! \returns whether or not anything was changed
198  bool adjustToRemovedAtom(unsigned int atomIdx);
199 
200  //! \returns whether or not anything the specified atom is part of the
201  //! definition of this substance group
202  bool includesAtom(unsigned int atomIdx) const;
203 
204  //! adjusts our bond IDs to reflect that a bond has been removed from the
205  //! parent molecule
206  //! decrements all bond IDs that are higher than \c bondIdx
207  //! raises a \c SubstanceGroupException if \c bondIdx is actually part of
208  //! this substance group
209  //! \returns whether or not anything was changed
210  bool adjustToRemovedBond(unsigned int bondIdx);
211 
212  //! \returns whether or not anything the specified bond is part of the
213  //! definition of this substance group
214  bool includesBond(unsigned int bondIdx) const;
215 
216  //! Set owning molecule
217  //! This only updates atoms and bonds; parent sgroup has to be updated
218  //! independently, since parent might not exist at the time this is
219  //! called.
220  void setOwningMol(ROMol *mol);
221 
222  bool operator==(const SubstanceGroup &other) const {
223  // we ignore brackets and cstates, which involve coordinates
224  return dp_mol == other.dp_mol && d_atoms == other.d_atoms &&
225  d_patoms == other.d_patoms && d_bonds == other.d_bonds &&
226  d_saps == other.d_saps;
227  }
228 
229  private:
230  ROMol *dp_mol = nullptr; // owning molecule
231 
232  bool d_isValid = true;
233 
234  std::vector<unsigned int> d_atoms;
235  std::vector<unsigned int> d_patoms;
236  std::vector<unsigned int> d_bonds;
237 
238  std::vector<Bracket> d_brackets;
239  std::vector<CState> d_cstates;
240  std::vector<AttachPoint> d_saps;
241 }; // namespace RDKit
242 
243 namespace SubstanceGroupChecks {
244 
245 const std::vector<std::string> sGroupTypes = {
246  // polymer sgroups:
247  "SRU", "MON", "COP", "CRO", "GRA", "MOD", "MER", "ANY",
248  // formulations/mixtures:
249  "COM", "MIX", "FOR",
250  // other
251  "SUP", "MUL", "DAT", "GEN"};
252 
253 const std::vector<std::string> sGroupSubtypes = {"ALT", "RAN", "BLO"};
254 const std::vector<std::string> sGroupConnectTypes = {"HH", "HT", "EU"};
255 
256 RDKIT_GRAPHMOL_EXPORT bool isValidType(const std::string &type);
257 
258 RDKIT_GRAPHMOL_EXPORT bool isValidSubType(const std::string &type);
259 
260 RDKIT_GRAPHMOL_EXPORT bool isValidConnectType(const std::string &type);
261 
263  unsigned int id);
264 
265 } // namespace SubstanceGroupChecks
266 
267 //! \name SubstanceGroups and molecules
268 //! @{
269 
270 RDKIT_GRAPHMOL_EXPORT std::vector<SubstanceGroup> &getSubstanceGroups(
271  ROMol &mol);
272 RDKIT_GRAPHMOL_EXPORT const std::vector<SubstanceGroup> &getSubstanceGroups(
273  const ROMol &mol);
274 
275 //! Add a new SubstanceGroup. A copy is added, so we can be sure that no other
276 //! references to the SubstanceGroup exist.
277 /*!
278  \param sgroup - SubstanceGroup to be added to the molecule.
279 */
281  SubstanceGroup sgroup);
282 
283 //! Removes SubstanceGroups which reference a particular atom index
284 /*!
285  \param mol - molecule to be edited.
286  \param idx - atom index
287 */
289  RWMol &mol, unsigned int idx);
290 //! Removes SubstanceGroups which reference a particular bond index
291 /*!
292  \param mol - molecule to be edited.
293  \param idx - bond index
294 */
296  RWMol &mol, unsigned int idx);
297 //! @}
298 
299 } // namespace RDKit
300 
301 //! allows SubstanceGroup objects to be dumped to streams
302 RDKIT_GRAPHMOL_EXPORT std::ostream &operator<<(std::ostream &target,
303  const RDKit::SubstanceGroup &sg);
304 #endif
#define PRECONDITION(expr, mess)
Definition: Invariant.h:109
RDKIT_GRAPHMOL_EXPORT std::ostream & operator<<(std::ostream &target, const RDKit::SubstanceGroup &sg)
allows SubstanceGroup objects to be dumped to streams
RDProps & operator=(const RDProps &rhs)
Definition: RDProps.h:24
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:32
used to indicate errors from incorrect sgroup access
SubstanceGroupException(const std::string &msg)
construct with an error message
SubstanceGroupException(const char *msg)
construct with an error message
The class for representing SubstanceGroups.
const std::vector< Bracket > & getBrackets() const
void addBondWithIdx(unsigned int idx)
void setOwningMol(ROMol *mol)
void setParentAtoms(std::vector< unsigned int > patoms)
std::vector< AttachPoint > & getAttachPoints()
void addAttachPoint(unsigned int aIdx, int lvIdx, const std::string &idStr)
void setBonds(std::vector< unsigned int > bonds)
~SubstanceGroup()=default
Destructor.
void setIsValid(bool isValid)
set whether or not this group is valid; invalid groups must be ignored.
const std::vector< unsigned int > & getAtoms() const
std::vector< Bracket > & getBrackets()
void addParentAtomWithBookmark(int mark)
const std::vector< unsigned int > & getParentAtoms() const
void setAtoms(std::vector< unsigned int > atoms)
const std::vector< unsigned int > & getBonds() const
bool adjustToRemovedBond(unsigned int bondIdx)
void addCState(unsigned int bondIdx, const RDGeom::Point3D &vector)
const std::vector< CState > & getCStates() const
SubstanceGroup()=delete
No default constructor.
bool adjustToRemovedAtom(unsigned int atomIdx)
bool operator==(const SubstanceGroup &other) const
BondType
Bond type (see V3000 spec)
SubstanceGroup(const SubstanceGroup &other)=default
SubstanceGroup(SubstanceGroup &&other) noexcept
const std::vector< AttachPoint > & getAttachPoints() const
ROMol & getOwningMol() const
Get the molecule that owns this instance.
std::vector< CState > & getCStates()
SubstanceGroup & operator=(const SubstanceGroup &other)=default
void addBondWithBookmark(int mark)
void addAtomWithBookmark(int mark)
bool includesAtom(unsigned int atomIdx) const
void removeParentAtomWithIdx(unsigned int idx)
SubstanceGroup(ROMol *owning_mol, const std::string &type)
void addParentAtomWithIdx(unsigned int idx)
void addAtomWithIdx(unsigned int idx)
std::array< RDGeom::Point3D, 3 > Bracket
void addBracket(const Bracket &bracket)
bool hasOwningMol() const
returns whether or not this belongs to a molecule
SubstanceGroup & operator=(SubstanceGroup &&other) noexcept
bool includesBond(unsigned int bondIdx) const
void removeAtomWithIdx(unsigned int idx)
void removeBondWithIdx(unsigned int idx)
BondType getBondType(unsigned int bondIdx) const
unsigned int getIndexInMol() const
#define RDKIT_GRAPHMOL_EXPORT
Definition: export.h:225
RDKIT_GRAPHMOL_EXPORT bool isValidType(const std::string &type)
RDKIT_GRAPHMOL_EXPORT bool isValidSubType(const std::string &type)
const std::vector< std::string > sGroupConnectTypes
RDKIT_GRAPHMOL_EXPORT bool isSubstanceGroupIdFree(const ROMol &mol, unsigned int id)
RDKIT_GRAPHMOL_EXPORT bool isValidConnectType(const std::string &type)
const std::vector< std::string > sGroupSubtypes
const std::vector< std::string > sGroupTypes
Std stuff.
Definition: Abbreviations.h:19
RDKIT_GRAPHMOL_EXPORT unsigned int addSubstanceGroup(ROMol &mol, SubstanceGroup sgroup)
RDKIT_GRAPHMOL_EXPORT void removeSubstanceGroupsReferencingBond(RWMol &mol, unsigned int idx)
Removes SubstanceGroups which reference a particular bond index.
RDKIT_GRAPHMOL_EXPORT std::vector< SubstanceGroup > & getSubstanceGroups(ROMol &mol)
RDKIT_GRAPHMOL_EXPORT void removeSubstanceGroupsReferencingAtom(RWMol &mol, unsigned int idx)
Removes SubstanceGroups which reference a particular atom index.
bool operator==(const AttachPoint &other) const
bool operator==(const CState &other) const