RDKit
Open-source cheminformatics and machine learning.
TautomerQuery.h
Go to the documentation of this file.
1 //
2 // Created by Gareth Jones on 5/7/2020.
3 //
4 // Copyright 2020-2022 Schrodinger, Inc and other RDKit contributors
5 // @@ All Rights Reserved @@
6 // This file is part of the RDKit.
7 // The contents are covered by the terms of the BSD license
8 // which is included in the file license.txt, found at the root
9 // of the RDKit source tree.
10 
11 #include <RDGeneral/export.h>
12 
13 #ifndef RDKIT_TAUTOMERQUERY_H
14 #define RDKIT_TAUTOMERQUERY_H
15 
16 #include <GraphMol/ROMol.h>
17 #include <GraphMol/MolPickler.h>
18 #include <vector>
21 
22 #ifdef RDK_USE_BOOST_SERIALIZATION
24 #include <boost/serialization/vector.hpp>
25 #include <boost/serialization/shared_ptr.hpp>
26 #include <boost/serialization/split_member.hpp>
28 #endif
29 
30 namespace RDKit {
31 
32 class RWMol;
33 
35 
37  private:
38  // Tautomers of the query
39  std::vector<ROMOL_SPTR> d_tautomers;
40  // Template query for substructure search
41  std::unique_ptr<const ROMol> d_templateMolecule;
42  // Tautomeric bonds and atoms
43  std::vector<size_t> d_modifiedAtoms;
44  std::vector<size_t> d_modifiedBonds;
45 
46  // tests if a match to the template matches a specific tautomer
47  bool matchTautomer(const ROMol &mol, const ROMol &tautomer,
48  const std::vector<unsigned int> &match,
49  const SubstructMatchParameters &params) const;
50 
51  public:
52  TautomerQuery(std::vector<ROMOL_SPTR> tautomers,
53  const ROMol *const templateMolecule,
54  std::vector<size_t> modifiedAtoms,
55  std::vector<size_t> modifiedBonds);
56 
57  //! Copy constructor performs a deep copy
59  : d_templateMolecule(other.d_templateMolecule
60  ? new ROMol(*other.d_templateMolecule)
61  : nullptr),
62  d_modifiedAtoms(other.d_modifiedAtoms),
63  d_modifiedBonds(other.d_modifiedBonds) {
64  PRECONDITION(other.d_templateMolecule != nullptr, "Null template");
65  for (auto taut : other.d_tautomers) {
66  PRECONDITION(taut.get() != nullptr, "Null tautomer");
67  d_tautomers.push_back(boost::make_shared<ROMol>(*taut));
68  }
69  }
70 
71  TautomerQuery(const std::string &pickle) { initFromString(pickle); }
72 
73  // Factory to build TautomerQuery
74  // Caller owns the memory
76  const ROMol &molecule,
77  const std::string &tautomerTransformFile = std::string());
78 
79  // Substructure search
80  std::vector<MatchVectType> substructOf(
81  const ROMol &mol,
83  std::vector<ROMOL_SPTR> *matchingTautomers = nullptr) const;
84 
85  // SubstructureMatch
86  bool isSubstructOf(const ROMol &mol, const SubstructMatchParameters &params =
88 
89  // Query fingerprint
91  unsigned int fpSize = 2048U) const;
92  // Static method to Fingerprint a target
94  unsigned int fpSize = 2048U);
95 
96  // accessors
97 
98  // pointer is owned by TautomerQuery
99  const ROMol &getTemplateMolecule() const { return *d_templateMolecule; }
100 
101  const std::vector<ROMOL_SPTR> getTautomers() const { return d_tautomers; }
102 
103  const std::vector<size_t> getModifiedAtoms() const { return d_modifiedAtoms; }
104 
105  const std::vector<size_t> getModifiedBonds() const { return d_modifiedBonds; }
106 
107  //! serializes (pickles) to a stream
108  void toStream(std::ostream &ss) const;
109  //! returns a string with a serialized (pickled) representation
110  std::string serialize() const;
111  //! initializes from a stream pickle
112  void initFromStream(std::istream &ss);
113  //! initializes from a string pickle
114  void initFromString(const std::string &text);
115 
116  friend class TautomerQueryMatcher;
117 
118 #ifdef RDK_USE_BOOST_SERIALIZATION
119  template <class Archive>
120  void save(Archive &ar, const unsigned int version) const {
121  RDUNUSED_PARAM(version);
122  std::vector<std::string> pkls;
123  for (const auto &taut : d_tautomers) {
124  std::string pkl;
125  MolPickler::pickleMol(*taut, pkl);
126  pkls.push_back(pkl);
127  }
128  ar << pkls;
129  std::string molpkl;
130  MolPickler::pickleMol(*d_templateMolecule, molpkl);
131  ar << molpkl;
132  ar << d_modifiedAtoms;
133  ar << d_modifiedBonds;
134  }
135 
136  template <class Archive>
137  void load(Archive &ar, const unsigned int version) {
138  RDUNUSED_PARAM(version);
139 
140  std::vector<std::string> pkls;
141  ar >> pkls;
142  d_tautomers.clear();
143  for (const auto &pkl : pkls) {
144  d_tautomers.push_back(ROMOL_SPTR(new ROMol(pkl)));
145  }
146  std::string molpkl;
147  ar >> molpkl;
148  d_templateMolecule.reset(new ROMol(molpkl));
149 
150  ar >> d_modifiedAtoms;
151  ar >> d_modifiedBonds;
152  }
153  BOOST_SERIALIZATION_SPLIT_MEMBER()
154 #endif
155 };
156 
157 // so we can use the templates in Code/GraphMol/Substruct/SubstructMatch.h
158 RDKIT_TAUTOMERQUERY_EXPORT std::vector<MatchVectType> SubstructMatch(
159  const ROMol &mol, const TautomerQuery &query,
160  const SubstructMatchParameters &params);
161 
162 } // namespace RDKit
163 
164 #endif // RDKIT_TAUTOMERQUERY_H
#define RDUNUSED_PARAM(x)
Definition: Invariant.h:196
#define PRECONDITION(expr, mess)
Definition: Invariant.h:109
Defines the primary molecule class ROMol as well as associated typedefs.
a class for bit vectors that are densely occupied
static void pickleMol(const ROMol *mol, std::ostream &ss)
pickles a molecule and sends the results to stream ss
std::string serialize() const
returns a string with a serialized (pickled) representation
TautomerQuery(const TautomerQuery &other)
Copy constructor performs a deep copy.
Definition: TautomerQuery.h:58
ExplicitBitVect * patternFingerprintTemplate(unsigned int fpSize=2048U) const
static TautomerQuery * fromMol(const ROMol &molecule, const std::string &tautomerTransformFile=std::string())
TautomerQuery(const std::string &pickle)
Definition: TautomerQuery.h:71
bool isSubstructOf(const ROMol &mol, const SubstructMatchParameters &params=SubstructMatchParameters())
void initFromString(const std::string &text)
initializes from a string pickle
std::vector< MatchVectType > substructOf(const ROMol &mol, const SubstructMatchParameters &params=SubstructMatchParameters(), std::vector< ROMOL_SPTR > *matchingTautomers=nullptr) const
const ROMol & getTemplateMolecule() const
Definition: TautomerQuery.h:99
TautomerQuery(std::vector< ROMOL_SPTR > tautomers, const ROMol *const templateMolecule, std::vector< size_t > modifiedAtoms, std::vector< size_t > modifiedBonds)
void initFromStream(std::istream &ss)
initializes from a stream pickle
void toStream(std::ostream &ss) const
serializes (pickles) to a stream
static ExplicitBitVect * patternFingerprintTarget(const ROMol &target, unsigned int fpSize=2048U)
const std::vector< ROMOL_SPTR > getTautomers() const
const std::vector< size_t > getModifiedBonds() const
const std::vector< size_t > getModifiedAtoms() const
#define RDKIT_TAUTOMERQUERY_EXPORT
Definition: export.h:497
RDKIT_CHEMREACTIONS_EXPORT void pickle(const boost::shared_ptr< EnumerationStrategyBase > &enumerator, std::ostream &ss)
pickles a EnumerationStrategy and adds the results to a stream ss
Std stuff.
Definition: Abbreviations.h:19
RDKIT_SUBSTRUCTMATCH_EXPORT std::vector< MatchVectType > SubstructMatch(const ROMol &mol, const ROMol &query, const SubstructMatchParameters &params=SubstructMatchParameters())
Find a substructure match for a query in a molecule.
RDKIT_TAUTOMERQUERY_EXPORT bool TautomerQueryCanSerialize()
boost::shared_ptr< ROMol > ROMOL_SPTR