RDKit
Open-source cheminformatics and machine learning.
Fingerprints.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2003-2020 Greg Landrum and Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #ifndef RD_FINGERPRINTS_H
12 #define RD_FINGERPRINTS_H
13 
14 #include <vector>
15 #include <cstdint>
17 
18 class ExplicitBitVect;
19 namespace RDKit {
20 class ROMol;
21 class MolBundle;
22 
23 //! \brief Generates a topological (Daylight like) fingerprint for a molecule
24 //! using an alternate (faster) hashing algorithm
25 /*!
26 
27  \param mol: the molecule to be fingerprinted
28  \param minPath: the minimum path length (in bonds) to be included
29  \param maxPath: the minimum path length (in bonds) to be included
30  \param fpSize: the size of the fingerprint
31  \param nBitsPerHash: the number of bits to be set by each path
32  \param useHs: toggles inclusion of Hs in paths (if the molecule has
33  explicit Hs)
34  \param tgtDensity: if the generated fingerprint is below this density, it
35  will
36  be folded until the density is reached.
37  \param minSize: the minimum size to which the fingerprint will be
38  folded
39  \param branchedPaths: toggles generation of branched subgraphs, not just
40  linear paths
41  \param useBondOrders: toggles inclusion of bond orders in the path hashes
42  \param atomInvariants: a vector of atom invariants to use while hashing the
43  paths
44  \param fromAtoms: only paths starting at these atoms will be included
45  \param atomBits: used to return the bits that each atom is involved in
46  (should be at least \c mol.numAtoms long)
47 
48  \return the molecular fingerprint, as an ExplicitBitVect
49 
50  <b>Notes:</b>
51  - the caller is responsible for <tt>delete</tt>ing the result
52 
53 */
55  const ROMol &mol, unsigned int minPath = 1, unsigned int maxPath = 7,
56  unsigned int fpSize = 2048, unsigned int nBitsPerHash = 2,
57  bool useHs = true, double tgtDensity = 0.0, unsigned int minSize = 128,
58  bool branchedPaths = true, bool useBondOrder = true,
59  std::vector<std::uint32_t> *atomInvariants = nullptr,
60  const std::vector<std::uint32_t> *fromAtoms = nullptr,
61  std::vector<std::vector<std::uint32_t>> *atomBits = nullptr,
62  std::map<std::uint32_t, std::vector<std::vector<int>>> *bitInfo = nullptr);
63 const std::string RDKFingerprintMolVersion = "2.0.0";
64 
65 //! \brief Generates a topological (Daylight like) fingerprint for a molecule
66 //! using a layer-based hashing algorithm
67 /*!
68 
69  <b>Experimental:</b> This function is experimental. The API or results may
70  change from
71  release to release.
72 
73  \param mol: the molecule to be fingerprinted
74  \param layerFlags: the layers to be included (see below)
75  \param minPath: the minimum path length (in bonds) to be included
76  \param maxPath: the minimum path length (in bonds) to be included
77  \param fpSize: the size of the fingerprint
78  \param atomCounts: if provided, this will be used to provide the count of
79  the number
80  of paths that set bits each atom is involved in. The
81  vector should
82  have at least as many entries as the molecule has atoms
83  and is not
84  zeroed out here.
85  \param setOnlyBits: if provided, only bits that are set in this bit vector
86  will be set
87  in the result. This is essentially the same as doing:
88  (*res) &= (*setOnlyBits);
89  but also has an impact on the atomCounts (if being used)
90  \param branchedPaths: toggles generation of branched subgraphs, not just
91  linear paths
92 
93  \return the molecular fingerprint, as an ExplicitBitVect
94 
95  <b>Notes:</b>
96  - the caller is responsible for <tt>delete</tt>ing the result
97 
98  <b>Layer definitions:</b>
99  - 0x01: pure topology
100  - 0x02: bond order
101  - 0x04: atom types
102  - 0x08: presence of rings
103  - 0x10: ring sizes
104  - 0x20: aromaticity
105 */
107  const ROMol &mol, unsigned int layerFlags = 0xFFFFFFFF,
108  unsigned int minPath = 1, unsigned int maxPath = 7,
109  unsigned int fpSize = 2048, std::vector<unsigned int> *atomCounts = nullptr,
110  ExplicitBitVect *setOnlyBits = nullptr, bool branchedPaths = true,
111  const std::vector<std::uint32_t> *fromAtoms = nullptr);
112 const unsigned int maxFingerprintLayers = 10;
113 const std::string LayeredFingerprintMolVersion = "0.7.0";
114 const unsigned int substructLayers = 0x07;
115 
116 //! \brief Generates a topological fingerprint for a molecule
117 //! using a series of pre-defined structural patterns
118 /*!
119 
120  <b>Experimental:</b> This function is experimental. The API or results may
121  change from
122  release to release.
123 
124  \param mol: the molecule to be fingerprinted
125  \param fpSize: the size of the fingerprint
126  \param atomCounts: if provided, this will be used to provide the count of
127  the number
128  of paths that set bits each atom is involved in. The
129  vector should
130  have at least as many entries as the molecule has atoms
131  and is not
132  zeroed out here.
133  \param setOnlyBits: if provided, only bits that are set in this bit vector
134  will be set
135  in the result. This is essentially the same as doing:
136  (*res) &= (*setOnlyBits);
137  but also has an impact on the atomCounts (if being used)
138 
139  \return the molecular fingerprint, as an ExplicitBitVect
140 
141  <b>Notes:</b>
142  - the caller is responsible for <tt>delete</tt>ing the result
143 
144 */
146  const ROMol &mol, unsigned int fpSize = 2048,
147  std::vector<unsigned int> *atomCounts = nullptr,
148  ExplicitBitVect *setOnlyBits = nullptr, bool tautomericFingerprint = false);
149 const std::string PatternFingerprintMolVersion = "1.0.0";
150 //! \overload
152  const MolBundle &bundle, unsigned int fpSize = 2048,
153  ExplicitBitVect *setOnlyBits = nullptr, bool tautomericFingerprint = false);
154 
157  const ROMol &mol, unsigned int minPath = 1, unsigned int maxPath = 7,
158  bool useHs = true, bool branchedPaths = true, bool useBondOrder = true,
159  std::vector<std::uint32_t> *atomInvariants = nullptr,
160  const std::vector<std::uint32_t> *fromAtoms = nullptr,
161  std::vector<std::vector<boost::uint64_t>> *atomBits = nullptr,
162  std::map<boost::uint64_t, std::vector<std::vector<int>>> *bitInfo =
163  nullptr);
164 
165 } // namespace RDKit
166 
167 #endif
a class for bit vectors that are densely occupied
MolBundle contains a collection of related ROMols.
Definition: MolBundle.h:39
a class for efficiently storing sparse vectors of ints
Definition: SparseIntVect.h:28
#define RDKIT_FINGERPRINTS_EXPORT
Definition: export.h:177
Std stuff.
Definition: Abbreviations.h:19
RDKIT_FINGERPRINTS_EXPORT ExplicitBitVect * LayeredFingerprintMol(const ROMol &mol, unsigned int layerFlags=0xFFFFFFFF, unsigned int minPath=1, unsigned int maxPath=7, unsigned int fpSize=2048, std::vector< unsigned int > *atomCounts=nullptr, ExplicitBitVect *setOnlyBits=nullptr, bool branchedPaths=true, const std::vector< std::uint32_t > *fromAtoms=nullptr)
Generates a topological (Daylight like) fingerprint for a molecule using a layer-based hashing algori...
const std::string RDKFingerprintMolVersion
Definition: Fingerprints.h:63
RDKIT_FINGERPRINTS_EXPORT ExplicitBitVect * PatternFingerprintMol(const ROMol &mol, unsigned int fpSize=2048, std::vector< unsigned int > *atomCounts=nullptr, ExplicitBitVect *setOnlyBits=nullptr, bool tautomericFingerprint=false)
Generates a topological fingerprint for a molecule using a series of pre-defined structural patterns.
const unsigned int maxFingerprintLayers
Definition: Fingerprints.h:112
const std::string LayeredFingerprintMolVersion
Definition: Fingerprints.h:113
const std::string PatternFingerprintMolVersion
Definition: Fingerprints.h:149
RDKIT_FINGERPRINTS_EXPORT SparseIntVect< boost::uint64_t > * getUnfoldedRDKFingerprintMol(const ROMol &mol, unsigned int minPath=1, unsigned int maxPath=7, bool useHs=true, bool branchedPaths=true, bool useBondOrder=true, std::vector< std::uint32_t > *atomInvariants=nullptr, const std::vector< std::uint32_t > *fromAtoms=nullptr, std::vector< std::vector< boost::uint64_t >> *atomBits=nullptr, std::map< boost::uint64_t, std::vector< std::vector< int >>> *bitInfo=nullptr)
RDKIT_FINGERPRINTS_EXPORT ExplicitBitVect * RDKFingerprintMol(const ROMol &mol, unsigned int minPath=1, unsigned int maxPath=7, unsigned int fpSize=2048, unsigned int nBitsPerHash=2, bool useHs=true, double tgtDensity=0.0, unsigned int minSize=128, bool branchedPaths=true, bool useBondOrder=true, std::vector< std::uint32_t > *atomInvariants=nullptr, const std::vector< std::uint32_t > *fromAtoms=nullptr, std::vector< std::vector< std::uint32_t >> *atomBits=nullptr, std::map< std::uint32_t, std::vector< std::vector< int >>> *bitInfo=nullptr)
Generates a topological (Daylight like) fingerprint for a molecule using an alternate (faster) hashin...
const unsigned int substructLayers
Definition: Fingerprints.h:114