Source code for smact.oxidation_states

"""
smact.oxidation_states: Module for predicting the likelihood of species coexisting in a compound.

Based on statistical analysis of oxidation states.
It is possible to use the values obtained in the publication Materials
Discovery by Chemical Analogy: Role of Oxidation States in Structure
Prediction - DOI: 10.1039/C8FD00032H.
"""

from __future__ import annotations

import json
import warnings
from pathlib import Path
from typing import cast

import numpy as np
from pymatgen.core import Structure
from pymatgen.core.periodic_table import Species as pmgSpecies
from pymatgen.core.periodic_table import get_el_sp

from smact import Species, data_directory


def _deduplicate_species_pairs(
    species_pairs: list[tuple],
) -> list[tuple]:
    """Remove duplicate species pairs based on (symbol, oxidation) identity."""
    seen: set[tuple[str, int, str, int]] = set()
    unique: list[tuple] = []
    for anion_sp, cation_sp in species_pairs:
        key = (anion_sp.symbol, anion_sp.oxidation, cation_sp.symbol, cation_sp.oxidation)
        if key not in seen:
            seen.add(key)
            unique.append((anion_sp, cation_sp))
    return unique


[docs] class OxidationStateProbabilityFinder: """ Uses the model developed in the Faraday Discussions Paper (DOI:10.1039/C8FD00032H). Computes the likelihood of metal species existing in solids in the presence of certain anions. """ def __init__(self, probability_table: dict[tuple[str, str], float] | None = None) -> None: """ Initialise the oxidation state probability finder. Args: ---- probability_table (dict): Lookup table to get probabilities for anion-cation pairs. Must be of the format {(anion,cation): probability, ...} e.g. {('F-1', 'Li1'): 1.0,...}. If none, the default table is loaded from the data directory. """ if probability_table is None: with (Path(data_directory) / "oxidation_state_probability_table.json").open() as f: probability_data = json.load(f) # Put data into the required format probability_table = {} for i in probability_data: probability_table[(i[0], i[1])] = i[2] self._probability_table = probability_table # Define set of species for which we have data included_anions = {i[0] for i in self._probability_table} included_cations = {i[1] for i in self._probability_table} included_species = included_anions | included_cations self._included_species = included_species self._included_cations = included_cations self._included_anions = included_anions def _generate_lookup_key(self, species1: Species, species2: Species) -> tuple[str, str]: """ Internal function to generate keys to lookup table. Args: ---- species1 (smact.Species): Species species2 (smact.Species): Species Returns: ------- table_key (tuple): For looking up probability in the form (an_key, cat_key). """ # Check that there is one cation and one anion if (species1.oxidation > 0) and (species2.oxidation < 0): cation = species1 anion = species2 elif (species1.oxidation < 0) and (species2.oxidation > 0): anion = species1 cation = species2 else: msg = "One cation and one anion required." raise ValueError(msg) # Generate keys for lookup table cat_key = "".join([cation.symbol, str(int(cation.oxidation))]) an_key = "".join([anion.symbol, str(int(anion.oxidation))]) # Check that both the species are included in the probability table if not all(elem in self._included_species for elem in [an_key, cat_key]): msg = f"One or both of [{cat_key}, {an_key}] are not in the probability table." raise KeyError(msg) return (an_key, cat_key)
[docs] def pair_probability(self, species1: Species, species2: Species) -> float: r""" Get the anion-cation oxidation state probability for a provided pair of smact Species. i.e. :math:`P_{SA}=\\frac{N_{SX}}{N_{MX}}` in the original paper (DOI:10.1039/C8FD00032H). Args: ---- species1 (smact.Species): Cation or anion species species2 (smact.Species): Cation or anion species Returns: ------- prob (float): Species-anion probability """ # Generate lookup table key and use it to look up probability probability_table_key = self._generate_lookup_key(species1, species2) return self._probability_table[probability_table_key]
[docs] def get_included_species(self) -> set[str]: """Returns a set of species for which there exists data in the probability table used.""" return self._included_species
[docs] def compound_probability(self, structure: Structure | list, ignore_stoichiometry: bool = True) -> float: """ Calculate overall probability for structure or composition. Args: ---- structure (pymatgen.Structure): Compound for which the probability score will be generated. Can also be a list of pymatgen or SMACT Species. ignore_stoichiometry (bool): Whether to weight probabilities by stoichiometry. Defaults to True (i.e. stoichiometry is ignored). Returns: ------- compound_prob (float): Compound probability """ # Convert input to list of SMACT Species if isinstance(structure, list): if all(isinstance(i, Species) for i in structure): pass elif all(isinstance(i, pmgSpecies) for i in structure): pmg_species = cast("list[pmgSpecies]", structure) structure = [Species(i.symbol, int(i.oxi_state or 0)) for i in pmg_species] else: msg = "Input requires a list of SMACT or Pymatgen species." raise TypeError(msg) elif isinstance(structure, Structure): species = structure.species if not all(isinstance(i, pmgSpecies) for i in species): msg = "Structure must have oxidation states." raise TypeError(msg) structure_list: list[Species] = [] for site in structure: sp = cast("pmgSpecies", get_el_sp(site.species_string)) oxi = sp.oxi_state structure_list.append(Species(sp.symbol, int(oxi or 0))) structure = structure_list else: msg = "Input requires a list of SMACT or Pymatgen Species or a Structure." raise TypeError(msg) # Put most electronegative element last in list by sorting by electroneg structure.sort(key=lambda x: x.pauling_eneg if x.pauling_eneg is not None else 0.0) # Define necessary species pairs anion = structure[-1] cations = [i for i in structure if i.oxidation > 0] if not cations: msg = "No cations found in structure. Cannot calculate compound probability." raise ValueError(msg) species_pairs = [(anion, cation) for cation in cations] # Reduce down to unique pairs if ignoring stoichiometry if ignore_stoichiometry: species_pairs = _deduplicate_species_pairs(species_pairs) # Do the maths pair_probs = [self.pair_probability(pair[0], pair[1]) for pair in species_pairs] return float(np.mean(pair_probs))
def __getattr__(name: str) -> type: if name == "Oxidation_state_probability_finder": warnings.warn( "Oxidation_state_probability_finder is deprecated; use OxidationStateProbabilityFinder instead.", DeprecationWarning, stacklevel=2, ) return OxidationStateProbabilityFinder msg = f"module {__name__!r} has no attribute {name!r}" raise AttributeError(msg)