Source code for smact.structure_prediction.probability_models

"""Probability models for species substitution.

Implements base class :class:`SubstitutionModel`,
which can be extended to allow for development of new
lambda tables. An example of such an extension,
:class:`RadiusModel`, is also implemented.

Todo:
    * Allow for parallelism in lambda table calculations
      by implementing a `sub_probs` abstractmethod
      that :meth:`SubstitutionModel.gen_lambda` uses,
      if available.

"""

import abc
import os
from itertools import combinations_with_replacement
from typing import Dict, List, Optional

import pandas as pd

from smact import data_directory

from .utilities import parse_spec


[docs]class SubstitutionModel(abc.ABC):
    """Abstract base class for substitution models."""

[docs]    @abc.abstractmethod
    def sub_prob(self, s1: str, s2: str) -> float:
        """Calculate the probability of substituting species s1 for s2.

        Args:
            s1: The species being substituted.
            s2: The species substituting.

        Returns:
            The probability of substitution.

        """

[docs]    def gen_lambda(self, species: List[str]) -> pd.DataFrame:
        """Generate a lambda table for a list of species.

        Args:
            species: A list of species strings.

        Returns:
            A pivot table-style DataFrame containing lambda values
            for every possible species pair.

        """
        pairs = combinations_with_replacement(species, 2)

        lambda_tab = []
        for s1, s2 in pairs:
            prob = self.sub_prob(s1, s2)
            lambda_tab.append((s1, s2, prob))
            if s1 != s2:
                lambda_tab.append((s2, s1, prob))

        df = pd.DataFrame(lambda_tab)
        return df.pivot(index=0, columns=1, values=2)


[docs]class RadiusModel(SubstitutionModel):
    """Substitution probability model based on Shannon radii."""

    def __init__(self):
        r"""Parse Shannon radii data file.

        Also calculates "spring constant", _k_, based on maximum
        difference in Shannon radii:

        .. math::
            k = \Delta r_\mathrm{max}^{-2}.

        """
        shannon_file = os.path.join(data_directory, "shannon_radii.csv")

        self.shannon_data = pd.read_csv(shannon_file, index_col=0)

        self.k = (
            self.shannon_data["ionic_radius"].max()
            - self.shannon_data["ionic_radius"].min()
        ) ** -2

[docs]    def sub_prob(self, s1, s2):
        r"""Calculate the probability of substituting species s1 for s2.

        Based on the difference in Shannon radii, the probability is
        assumed to be:

        .. math::
            p = 1 - k \Delta r^2.

        Args:
            s1: The species being substituted.
            s2: The species substituting.

        Returns:
            The probability of substitution.

        """
        spec1 = parse_spec(s1)
        spec2 = parse_spec(s2)

        try:
            ele1_rows = self.shannon_data.loc[spec1[0]]
            ele2_rows = self.shannon_data.loc[spec2[0]]
        except KeyError as e:
            raise KeyError(f"Element not in Shannon radius data file: {e}")

        spec1_rows = ele1_rows.loc[ele1_rows["charge"] == spec1[1]]
        spec2_rows = ele2_rows.loc[ele2_rows["charge"] == spec2[1]]

        # Get mean so we don't need coordination information
        mean_spec1_r = spec1_rows["ionic_radius"].mean()
        mean_spec2_r = spec2_rows["ionic_radius"].mean()

        # Hooke's law-style probability
        return 1 - self.k * (mean_spec1_r - mean_spec2_r) ** 2