# Copyright 2021 Zilliz. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import logging from towhee import register from towhee.operator import Operator from rdkit import DataStructs, Chem log = logging.getLogger() @register(output_schema=['fingerprint']) class Rdkit(Operator): """ Generate molecular fingerprint with RDKit. Args: algorithm (`str`): Which algorithm to use for fingerprinting, including morgan, daylight, ap, maccs, and defaluts to 'morgan'. size (`int`): The bit vector size, defaults to 2048. """ def __init__(self, algorithm: str = 'morgan', size: int = 2048): self.algorithm = algorithm self.size = size def __call__(self, smiles: str): mols = Chem.MolFromSmiles(smiles) try: if self.algorithm == 'daylight': fp = Chem.RDKFingerprint(mols, fpSize=self.size) elif self.algorithm == 'morgan': from rdkit.Chem import AllChem AllChem.GetMorganFingerprint fp = AllChem.GetMorganFingerprintAsBitVect(mols, 2, self.size) elif self.algorithm == 'ap': from rdkit.Chem.AtomPairs import Pairs fp = Pairs.GetAtomPairFingerprintAsBitVect(mols) elif self.algorithm == 'maccs': from rdkit.Chem import MACCSkeys fp = MACCSkeys.GenMACCSKeys(mols) except Exception as e: log.error(f'{e}, cannot generate fingerprint of {smiles}.') raise KeyError(e) hex_fp = DataStructs.BitVectToFPSText(fp) fingerprint = bytes.fromhex(hex_fp) return fingerprint