Source code for spyrmsd.optional.rdkit

import gzip
import os
from typing import List, Optional, Tuple

import numpy as np
import rdkit.Chem as Chem

from spyrmsd import molecule, utils


def _load_block_gzipped(loader, fname: str):
    """
    Load gzipped files using MolBlocks.

    Parameters
    ----------
    loader:
        RDKit MolBlock loader (MolFromMol2Block, MolFromPDBBlock, ...)
    fname: str
        File name

    Returns
    -------
    Molecule
    """
    with gzip.open(fname, "r") as fgz:
        content = fgz.read()
        rdmol = loader(content, removeHs=False)

    return rdmol


[docs]def load(fname: str): """ Load molecule from file. Parameters ---------- fname: str File name Returns ------- Molecule """ gzipped = os.path.splitext(fname)[-1] == ".gz" fmt = utils.molformat(fname) if fmt == "mol2": if not gzipped: rdmol = Chem.MolFromMol2File(fname, removeHs=False) else: rdmol = _load_block_gzipped(Chem.MolFromMol2Block, fname) elif fmt == "sdf": if not gzipped: rdmol = next(Chem.SDMolSupplier(fname, removeHs=False)) else: with gzip.open(fname, "r") as fgz: rdmol = next(Chem.ForwardSDMolSupplier(fgz, removeHs=False)) elif fmt == "pdb": if not gzipped: rdmol = Chem.MolFromPDBFile(fname, removeHs=False) else: rdmol = _load_block_gzipped(Chem.MolFromPDBBlock, fname) else: raise NotImplementedError( f"Format '{fmt}' is currently not supported with RDKit." ) return rdmol
[docs]def loadall(fname: str): """ Load molecules from file. Parameters ---------- fname: str File name Returns ------- List of molecules """ gzipped = os.path.splitext(fname)[-1] == ".gz" fmt = utils.molformat(fname) if fmt == "mol2": error = ( "Multiple molecules in MOL2 files are not supported. " "See RDKit#415 (https://github.com/rdkit/rdkit/pull/415)." ) raise NotImplementedError(error) # See RDKit Issue #415 elif fmt == "sdf": if not gzipped: rdmols = Chem.SDMolSupplier(fname, removeHs=False) mols = [rdmol for rdmol in rdmols] else: with gzip.open(fname, "r") as fgz: rdmols = Chem.ForwardSDMolSupplier(fgz, removeHs=False) # Load all molecules before closing file mols = [rdmol for rdmol in rdmols] elif fmt == "pdb": # TODO: Implement raise NotImplementedError("Multiple molecules in PDB files are not supported.") else: raise NotImplementedError( f"Format '{fmt}' is currently not supported with RDKit." ) return mols
[docs]def adjacency_matrix(mol) -> np.ndarray: """ Adjacency matrix from OpenBabel molecule. Parameters ---------- mol: Molecule Returns ------- np.ndarray Adjacency matrix of the molecule """ return Chem.rdmolops.GetAdjacencyMatrix(mol)
[docs]def to_molecule(mol, adjacency: bool = True): """ Transform molecule to `pyrmsd` molecule. Parameters ---------- mol: Molecule adjacency: boolean, optional Flag to decide wether to build the adjacency matrix from molecule Returns ------- spyrmsd.molecule.Molecule `spyrmsd` molecule """ if mol is None: # Propagate RDKit parsing failure return None atoms = mol.GetAtoms() n = len(atoms) atomicnums = np.zeros(n, dtype=int) coordinates = np.zeros((n, 3)) conformer = mol.GetConformer() for i, atom in enumerate(atoms): atomicnums[i] = atom.GetAtomicNum() pos = conformer.GetAtomPosition(i) coordinates[i] = np.array([pos.x, pos.y, pos.z]) A: Optional[np.ndarray] = adjacency_matrix(mol) if adjacency else None return molecule.Molecule(atomicnums, coordinates, A)
[docs]def numatoms(mol) -> int: """ Number of atoms. Parameters ---------- mol: Molecule Returns ------- int Number of atoms """ return mol.GetNumAtoms()
[docs]def numbonds(mol) -> int: """ Number of bonds. Parameters ---------- mol: Molecule Returns ------- int Number of bonds """ return mol.GetNumBonds()
[docs]def bonds(mol) -> List[Tuple[int, int]]: """ List of bonds. Parameters ---------- mol: Molecule Returns ------- List[Tuple[int, int]] List of bonds Notes ----- A bond is defined by a tuple of (0-based) indices of two atoms. """ b = [] for bond in mol.GetBonds(): b.append((bond.GetBeginAtomIdx(), bond.GetEndAtomIdx())) return b