RDKit
Open-source cheminformatics and machine learning.
O3AAlignMolecules.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2013-2014 Paolo Tosco
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #ifndef _RD_O3AALIGNMOLECULES_H_
12 #define _RD_O3AALIGNMOLECULES_H_
13 
14 #include <RDGeneral/Invariant.h>
15 #include <Geometry/Transform3D.h>
16 #include <Geometry/point.h>
17 #include <Numerics/Vector.h>
18 #include <GraphMol/ROMol.h>
19 #include <GraphMol/Conformer.h>
22 #include <vector>
23 #include <cmath>
24 #include <boost/shared_ptr.hpp>
25 #include <boost/multi_array.hpp>
26 #include <boost/dynamic_bitset.hpp>
27 
28 namespace RDKit {
29 namespace MolAlign {
33  void *prbProp;
34  void *refProp;
35  int coeff;
36  int weight;
37  bool useMMFFSim;
38 };
39 inline bool isDoubleZero(const double x) {
40  return ((x < 1.0e-10) && (x > -1.0e-10));
41 };
42 
43 class O3AConstraintVect;
44 
45 //! A class to define alignment constraints. Each constraint
46 //! is defined by a pair of atom indexes (one for the probe,
47 //! one for the reference) and a weight. Constraints can
48 //! can be added via the O3AConstraintVect class.
50  friend class O3AConstraintVect;
51 
52  public:
53  double getPrbIdx() { return d_prbIdx; }
54  double getRefIdx() { return d_refIdx; }
55  double getWeight() { return d_weight; }
56 
57  private:
58  unsigned int d_idx;
59  unsigned int d_prbIdx;
60  unsigned int d_refIdx;
61  double d_weight;
62 };
63 
64 //! A class to store a vector of alignment constraints. Each constraint
65 //! is defined by an O3AConstraint object. Each time the append()
66 //! method is invoked, the vector is sorted to make lookup faster.
67 //! Hence, constraints are not necessarily stored in the same order
68 //! they were appended.
70  public:
72  ~O3AConstraintVect() = default;
73  void append(unsigned int prbIdx, unsigned int refIdx, double weight) {
74  O3AConstraint *o3aConstraint = new O3AConstraint();
75  o3aConstraint->d_idx = d_count;
76  o3aConstraint->d_prbIdx = prbIdx;
77  o3aConstraint->d_refIdx = refIdx;
78  o3aConstraint->d_weight = weight;
79  d_o3aConstraintVect.emplace_back(o3aConstraint);
80  std::sort(d_o3aConstraintVect.begin(), d_o3aConstraintVect.end(),
81  d_compareO3AConstraint);
82  ++d_count;
83  }
84  std::vector<boost::shared_ptr<O3AConstraint>>::size_type size() {
85  return d_o3aConstraintVect.size();
86  }
87  O3AConstraint *operator[](unsigned int i) {
88  return d_o3aConstraintVect[i].get();
89  }
90 
91  private:
92  unsigned int d_count{0};
93  std::vector<boost::shared_ptr<O3AConstraint>> d_o3aConstraintVect;
94  static bool d_compareO3AConstraint(boost::shared_ptr<O3AConstraint> a,
95  boost::shared_ptr<O3AConstraint> b) {
96  return (
97  (a->d_prbIdx != b->d_prbIdx)
98  ? (a->d_prbIdx < b->d_prbIdx)
99  : ((a->d_refIdx != b->d_refIdx)
100  ? (a->d_refIdx < b->d_refIdx)
101  : ((a->d_weight != b->d_weight) ? (a->d_weight > b->d_weight)
102  : (a->d_idx < b->d_idx))));
103  }
104 };
105 
106 const int O3_DUMMY_COST = 100000;
107 const int O3_LARGE_NEGATIVE_WEIGHT = -1e9;
109 const unsigned int O3_MAX_H_BINS = 20;
110 const unsigned int O3_MAX_SDM_ITERATIONS = 100;
111 const unsigned int O3_MAX_SDM_THRESHOLD_ITER = 3;
112 const double O3_RANDOM_TRANS_COEFF = 5.0;
113 const double O3_THRESHOLD_DIFF_DISTANCE = 0.1;
114 const double O3_SDM_THRESHOLD_START = 0.7;
115 const double O3_SDM_THRESHOLD_STEP = 0.3;
116 const double O3_CHARGE_WEIGHT = 10.0;
117 const double O3_CRIPPEN_WEIGHT = 10.0;
118 const double O3_RMSD_THRESHOLD = 1.0e-04;
119 const double O3_SCORE_THRESHOLD = 0.01;
120 const double O3_SCORING_FUNCTION_ALPHA = 5.0;
121 const double O3_SCORING_FUNCTION_BETA = 0.5;
122 const double O3_CHARGE_COEFF = 5.0;
123 const double O3_CRIPPEN_COEFF = 1.0;
124 const int O3_MAX_WEIGHT_COEFF = 5;
125 enum {
127  O3_ACCURACY_MASK = (1 << 0 | 1 << 1),
128  O3_LOCAL_ONLY = (1 << 2)
129 };
130 
132  public:
133  MolHistogram(const ROMol &mol, const double *dmat, bool cleanupDmat = false);
134  ~MolHistogram() = default;
135  inline int get(const unsigned int y, const unsigned int x) const {
136  PRECONDITION(y < d_h.shape()[0], "Invalid index on MolHistogram");
137  PRECONDITION(x < d_h.shape()[1], "Invalid index on MolHistogram");
138  return d_h[y][x];
139  }
140 
141  private:
142  boost::multi_array<int, 2> d_h;
143 };
144 
146  public:
147  LAP(unsigned int dim)
148  : d_rowSol(dim),
149  d_colSol(dim),
150  d_free(dim),
151  d_colList(dim),
152  d_matches(dim),
153  d_d(dim),
154  d_v(dim),
155  d_pred(dim),
156  d_cost(boost::extents[dim][dim]) {}
157  ~LAP() = default;
158  int getCost(const unsigned int i, const unsigned int j) {
159  PRECONDITION(i < d_cost.shape()[0], "Invalid index on LAP.cost");
160  PRECONDITION(j < d_cost.shape()[1], "Invalid index on LAP.cost");
161  return d_cost[i][j];
162  }
163  int getRowSol(const unsigned int i) {
164  PRECONDITION(i < d_rowSol.size(), "Invalid index on LAP.rowSol");
165  return d_rowSol[i];
166  }
167  void computeMinCostPath(const int dim);
168  void computeCostMatrix(const ROMol &prbMol, const MolHistogram &prbHist,
169  const ROMol &refMol, const MolHistogram &refHist,
170  O3AConstraintVect *o3aConstraintVect,
171  int (*costFunc)(const unsigned int, const unsigned int,
172  double, void *),
173  void *data, const unsigned int n_bins = O3_MAX_H_BINS);
174 
175  private:
176  std::vector<int> d_rowSol;
177  std::vector<int> d_colSol;
178  std::vector<int> d_free;
179  std::vector<int> d_colList;
180  std::vector<int> d_matches;
181  std::vector<int> d_d;
182  std::vector<int> d_v;
183  std::vector<int> d_pred;
184  boost::multi_array<int, 2> d_cost;
185 };
186 
188  public:
189  // constructor
190  SDM(const Conformer *prbConf = nullptr, const Conformer *refConf = nullptr,
191  O3AConstraintVect *o3aConstraintVect = nullptr)
192  : d_prbConf(prbConf),
193  d_refConf(refConf),
194  d_o3aConstraintVect(o3aConstraintVect) {}
195  // copy constructor
196  SDM(const SDM &other)
197  : d_prbConf(other.d_prbConf),
198  d_refConf(other.d_refConf),
199  d_o3aConstraintVect(other.d_o3aConstraintVect),
200  d_SDMPtrVect(other.d_SDMPtrVect.size()) {
201  for (unsigned int i = 0; i < d_SDMPtrVect.size(); ++i) {
202  d_SDMPtrVect[i] = boost::shared_ptr<SDMElement>(new SDMElement());
203  memcpy(d_SDMPtrVect[i].get(), other.d_SDMPtrVect[i].get(),
204  sizeof(SDMElement));
205  }
206  }
207  // assignment operator
208  SDM &operator=(const SDM &other) {
209  if (this == &other) {
210  return *this;
211  }
212  d_prbConf = other.d_prbConf;
213  d_refConf = other.d_refConf;
214  d_o3aConstraintVect = other.d_o3aConstraintVect;
215  d_SDMPtrVect.resize(other.d_SDMPtrVect.size());
216  for (unsigned int i = 0; i < d_SDMPtrVect.size(); ++i) {
217  d_SDMPtrVect[i] = boost::shared_ptr<SDMElement>(new SDMElement());
218  memcpy(d_SDMPtrVect[i].get(), other.d_SDMPtrVect[i].get(),
219  sizeof(SDMElement));
220  }
221 
222  return *this;
223  }
224  // destructor
225  ~SDM() = default;
226  void fillFromDist(double threshold,
227  const boost::dynamic_bitset<> &refHvyAtoms,
228  const boost::dynamic_bitset<> &prbHvyAtoms);
229  void fillFromLAP(LAP &lap);
230  double scoreAlignment(double (*scoringFunc)(const unsigned int,
231  const unsigned int, void *),
232  void *data);
234  RDNumeric::DoubleVector &weights,
235  double (*weightFunc)(const unsigned int,
236  const unsigned int, void *),
237  void *data);
238  unsigned int size() { return d_SDMPtrVect.size(); }
239 
240  private:
241  typedef struct SDMElement {
242  unsigned int idx[2];
243  int score;
244  int cost;
245  double sqDist;
246  O3AConstraint *o3aConstraint;
247  } SDMElement;
248  const Conformer *d_prbConf;
249  const Conformer *d_refConf;
250  O3AConstraintVect *d_o3aConstraintVect;
251  std::vector<boost::shared_ptr<SDMElement>> d_SDMPtrVect;
252  static bool compareSDMScore(boost::shared_ptr<SDMElement> a,
253  boost::shared_ptr<SDMElement> b) {
254  return ((a->score != b->score)
255  ? (a->score < b->score)
256  : ((a->cost != b->cost)
257  ? (a->cost < b->cost)
258  : ((a->idx[0] != b->idx[0]) ? (a->idx[0] < b->idx[0])
259  : (a->idx[1] < b->idx[1]))));
260  }
261  static bool compareSDMDist(boost::shared_ptr<SDMElement> a,
262  boost::shared_ptr<SDMElement> b) {
263  double aWeight = (a->o3aConstraint ? a->o3aConstraint->getWeight() : 0.0);
264  double bWeight = (b->o3aConstraint ? b->o3aConstraint->getWeight() : 0.0);
265  return ((aWeight != bWeight)
266  ? (aWeight > bWeight)
267  : ((a->sqDist != b->sqDist)
268  ? (a->sqDist < b->sqDist)
269  : ((a->idx[0] != b->idx[0]) ? (a->idx[0] < b->idx[0])
270  : (a->idx[1] < b->idx[1]))));
271  }
272 };
273 
275  public:
276  //! pre-defined atom typing schemes
277  typedef enum { MMFF94 = 0, CRIPPEN } AtomTypeScheme;
278  O3A(ROMol &prbMol, const ROMol &refMol, void *prbProp, void *refProp,
279  AtomTypeScheme atomTypes = MMFF94, const int prbCid = -1,
280  const int refCid = -1, const bool reflect = false,
281  const unsigned int maxIters = 50, unsigned int options = 0,
282  const MatchVectType *constraintMap = nullptr,
283  const RDNumeric::DoubleVector *constraintWeights = nullptr,
284  LAP *extLAP = nullptr, MolHistogram *extPrbHist = nullptr,
285  MolHistogram *extRefHist = nullptr);
286  O3A(int (*costFunc)(const unsigned int, const unsigned int, double, void *),
287  double (*weightFunc)(const unsigned int, const unsigned int, void *),
288  double (*scoringFunc)(const unsigned int, const unsigned int, void *),
289  void *data, ROMol &prbMol, const ROMol &refMol, const int prbCid,
290  const int refCid, const boost::dynamic_bitset<> &prbHvyAtoms,
291  const boost::dynamic_bitset<> &refHvyAtoms, const bool reflect = false,
292  const unsigned int maxIters = 50, unsigned int options = 0,
293  O3AConstraintVect *o3aConstraintVect = nullptr,
294  ROMol *extWorkPrbMol = nullptr, LAP *extLAP = nullptr,
295  MolHistogram *extPrbHist = nullptr, MolHistogram *extRefHist = nullptr);
296  ~O3A() {
297  if (d_o3aMatchVect) {
298  delete d_o3aMatchVect;
299  }
300  if (d_o3aWeights) {
301  delete d_o3aWeights;
302  }
303  }
304  double align();
305  double trans(RDGeom::Transform3D &trans);
306  double score() { return d_o3aScore; }
307  const RDKit::MatchVectType *matches() { return d_o3aMatchVect; }
308  const RDNumeric::DoubleVector *weights() { return d_o3aWeights; }
309 
310  private:
311  ROMol *d_prbMol;
312  const ROMol *d_refMol;
313  int d_prbCid;
314  int d_refCid;
315  bool d_reflect;
316  unsigned int d_maxIters;
317  const RDKit::MatchVectType *d_o3aMatchVect;
318  const RDNumeric::DoubleVector *d_o3aWeights;
319  double d_o3aScore;
320 };
321 
322 RDKIT_MOLALIGN_EXPORT void randomTransform(ROMol &mol, const int cid = -1,
323  const int seed = -1);
325  const Conformer &conf);
326 RDKIT_MOLALIGN_EXPORT int o3aMMFFCostFunc(const unsigned int prbIdx,
327  const unsigned int refIdx,
328  double hSum, void *data);
329 RDKIT_MOLALIGN_EXPORT double o3aMMFFWeightFunc(const unsigned int prbIdx,
330  const unsigned int refIdx,
331  void *data);
332 RDKIT_MOLALIGN_EXPORT double o3aMMFFScoringFunc(const unsigned int prbIdx,
333  const unsigned int refIdx,
334  void *data);
335 RDKIT_MOLALIGN_EXPORT int o3aCrippenCostFunc(const unsigned int prbIdx,
336  const unsigned int refIdx,
337  double hSum, void *data);
338 RDKIT_MOLALIGN_EXPORT double o3aCrippenWeightFunc(const unsigned int prbIdx,
339  const unsigned int refIdx,
340  void *data);
341 RDKIT_MOLALIGN_EXPORT double o3aCrippenScoringFunc(const unsigned int prbIdx,
342  const unsigned int refIdx,
343  void *data);
344 
346  ROMol &prbMol, const ROMol &refMol, void *prbProp, void *refProp,
347  std::vector<boost::shared_ptr<O3A>> &res, int numThreads = 1,
348  O3A::AtomTypeScheme atomTypes = O3A::MMFF94, const int refCid = -1,
349  const bool reflect = false, const unsigned int maxIters = 50,
350  unsigned int options = 0, const MatchVectType *constraintMap = nullptr,
351  const RDNumeric::DoubleVector *constraintWeights = nullptr);
352 } // namespace MolAlign
353 } // namespace RDKit
354 #endif
#define PRECONDITION(expr, mess)
Definition: Invariant.h:109
Defines the primary molecule class ROMol as well as associated typedefs.
The class for representing 2D or 3D conformation of a molecule.
Definition: Conformer.h:45
LAP(unsigned int dim)
void computeMinCostPath(const int dim)
int getRowSol(const unsigned int i)
int getCost(const unsigned int i, const unsigned int j)
void computeCostMatrix(const ROMol &prbMol, const MolHistogram &prbHist, const ROMol &refMol, const MolHistogram &refHist, O3AConstraintVect *o3aConstraintVect, int(*costFunc)(const unsigned int, const unsigned int, double, void *), void *data, const unsigned int n_bins=O3_MAX_H_BINS)
int get(const unsigned int y, const unsigned int x) const
MolHistogram(const ROMol &mol, const double *dmat, bool cleanupDmat=false)
std::vector< boost::shared_ptr< O3AConstraint > >::size_type size()
O3AConstraint * operator[](unsigned int i)
void append(unsigned int prbIdx, unsigned int refIdx, double weight)
double trans(RDGeom::Transform3D &trans)
O3A(int(*costFunc)(const unsigned int, const unsigned int, double, void *), double(*weightFunc)(const unsigned int, const unsigned int, void *), double(*scoringFunc)(const unsigned int, const unsigned int, void *), void *data, ROMol &prbMol, const ROMol &refMol, const int prbCid, const int refCid, const boost::dynamic_bitset<> &prbHvyAtoms, const boost::dynamic_bitset<> &refHvyAtoms, const bool reflect=false, const unsigned int maxIters=50, unsigned int options=0, O3AConstraintVect *o3aConstraintVect=nullptr, ROMol *extWorkPrbMol=nullptr, LAP *extLAP=nullptr, MolHistogram *extPrbHist=nullptr, MolHistogram *extRefHist=nullptr)
O3A(ROMol &prbMol, const ROMol &refMol, void *prbProp, void *refProp, AtomTypeScheme atomTypes=MMFF94, const int prbCid=-1, const int refCid=-1, const bool reflect=false, const unsigned int maxIters=50, unsigned int options=0, const MatchVectType *constraintMap=nullptr, const RDNumeric::DoubleVector *constraintWeights=nullptr, LAP *extLAP=nullptr, MolHistogram *extPrbHist=nullptr, MolHistogram *extRefHist=nullptr)
AtomTypeScheme
pre-defined atom typing schemes
const RDNumeric::DoubleVector * weights()
const RDKit::MatchVectType * matches()
void fillFromLAP(LAP &lap)
void fillFromDist(double threshold, const boost::dynamic_bitset<> &refHvyAtoms, const boost::dynamic_bitset<> &prbHvyAtoms)
SDM & operator=(const SDM &other)
double scoreAlignment(double(*scoringFunc)(const unsigned int, const unsigned int, void *), void *data)
void prepareMatchWeightsVect(RDKit::MatchVectType &matchVect, RDNumeric::DoubleVector &weights, double(*weightFunc)(const unsigned int, const unsigned int, void *), void *data)
SDM(const SDM &other)
SDM(const Conformer *prbConf=nullptr, const Conformer *refConf=nullptr, O3AConstraintVect *o3aConstraintVect=nullptr)
A class to represent vectors of numbers.
Definition: Vector.h:29
#define RDKIT_MOLALIGN_EXPORT
Definition: export.h:257
std::vector< Point3D > POINT3D_VECT
Definition: point.h:546
const uint32_t seed
Definition: MHFP.h:29
RDKIT_MOLALIGN_EXPORT void getO3AForProbeConfs(ROMol &prbMol, const ROMol &refMol, void *prbProp, void *refProp, std::vector< boost::shared_ptr< O3A >> &res, int numThreads=1, O3A::AtomTypeScheme atomTypes=O3A::MMFF94, const int refCid=-1, const bool reflect=false, const unsigned int maxIters=50, unsigned int options=0, const MatchVectType *constraintMap=nullptr, const RDNumeric::DoubleVector *constraintWeights=nullptr)
const int O3_MAX_WEIGHT_COEFF
const int O3_LARGE_NEGATIVE_WEIGHT
const int O3_DEFAULT_CONSTRAINT_WEIGHT
RDKIT_MOLALIGN_EXPORT double o3aCrippenWeightFunc(const unsigned int prbIdx, const unsigned int refIdx, void *data)
const double O3_SDM_THRESHOLD_START
const double O3_RMSD_THRESHOLD
const double O3_CRIPPEN_WEIGHT
const double O3_CHARGE_COEFF
const double O3_RANDOM_TRANS_COEFF
const unsigned int O3_MAX_SDM_ITERATIONS
const double O3_SDM_THRESHOLD_STEP
const double O3_CRIPPEN_COEFF
RDKIT_MOLALIGN_EXPORT int o3aMMFFCostFunc(const unsigned int prbIdx, const unsigned int refIdx, double hSum, void *data)
RDKIT_MOLALIGN_EXPORT const RDGeom::POINT3D_VECT * reflect(const Conformer &conf)
const unsigned int O3_MAX_SDM_THRESHOLD_ITER
const double O3_SCORING_FUNCTION_BETA
RDKIT_MOLALIGN_EXPORT void randomTransform(ROMol &mol, const int cid=-1, const int seed=-1)
const unsigned int O3_MAX_H_BINS
const double O3_SCORING_FUNCTION_ALPHA
bool isDoubleZero(const double x)
const double O3_THRESHOLD_DIFF_DISTANCE
RDKIT_MOLALIGN_EXPORT int o3aCrippenCostFunc(const unsigned int prbIdx, const unsigned int refIdx, double hSum, void *data)
RDKIT_MOLALIGN_EXPORT double o3aCrippenScoringFunc(const unsigned int prbIdx, const unsigned int refIdx, void *data)
RDKIT_MOLALIGN_EXPORT double o3aMMFFScoringFunc(const unsigned int prbIdx, const unsigned int refIdx, void *data)
const double O3_CHARGE_WEIGHT
const double O3_SCORE_THRESHOLD
RDKIT_MOLALIGN_EXPORT double o3aMMFFWeightFunc(const unsigned int prbIdx, const unsigned int refIdx, void *data)
Std stuff.
Definition: Abbreviations.h:19
std::vector< std::pair< int, int > > MatchVectType
used to return matches from substructure searching, The format is (queryAtomIdx, molAtomIdx)
Definition: RDLog.h:24