Source code for spyrmsd.hungarian

import numpy as np
import scipy

from .due import Doi, due

due.cite(
    Doi("10.1021/ci400534h"),
    path="spyrmsd.hungarian",
    description="Hungarian method",
)


[docs]def cost_mtx(A: np.ndarray, B: np.ndarray): """ Compute the cost matrix for atom-atom assignment. Parameters ---------- A: numpy.ndarray Atomic coordinates of molecule A B: numpy.ndarray Atomic coordinates of molecule B Returns ------- np.ndarray Cost matrix of squared atomic distances between atoms of molecules A and B """ return scipy.spatial.distance.cdist(A, B, metric="sqeuclidean")
[docs]def optimal_assignment(A: np.ndarray, B: np.ndarray): """ Solve the optimal assignment problems between atomic coordinates of molecules A and B. Parameters ---------- A: numpy.ndarray Atomic coordinates of molecule A B: numpy.ndarray Atomic coordinates of molecule B Returns ------- Tuple[float, nd.array, nd.array] Cost of the optimal assignment, together with the row and column indices of said assignment """ C = cost_mtx(A, B) row_idx, col_idx = scipy.optimize.linear_sum_assignment(C) # Compute assignment cost cost = C[row_idx, col_idx].sum() return cost, row_idx, col_idx
[docs]def hungarian_rmsd( A: np.ndarray, B: np.ndarray, apropsA: np.ndarray, apropsB: np.ndarray ) -> float: """ Solve the optimal assignment problems between atomic coordinates of molecules A and B. Parameters ---------- A: numpy.ndarray Atomic coordinates of molecule A B: numpy.ndarray Atomic coordinates of molecule B apropsA: numpy.ndarray Atomic properties of molecule A apropsB: numpy.ndarray Atomic properties of molecule B Returns ------- float RMSD computed with the Hungarian method Notes ----- The Hungarian algorithm is used to solve the linear assignment problem, which is a minimum weight matching of the molecular graphs (bipartite). The linear assignment problem is solved for every element separately. [1]_ .. [1] W. J. Allen and R. C. Rizzo, *Implementation of the Hungarian Algorithm to Account for Ligand Symmetry and Similarity in Structure-Based Design*, J. Chem. Inf. Model. **54**, 518-529 (2014) """ assert A.shape == B.shape assert apropsA.shape == apropsB.shape elements = set(apropsA) total_cost: float = 0.0 for t in elements: apropsA_idx = apropsA == t apropsB_idx = apropsB == t assert apropsA_idx.shape == apropsB_idx.shape cost, row_idx, col_idx = optimal_assignment( A[apropsA_idx, :], B[apropsB_idx, :] ) total_cost += cost N = A.shape[0] rmsd = np.sqrt(total_cost / N) return rmsd