"""
smact.oxidation_states: Module for predicting the likelihood of species coexisting in a compound.
Based on statistical analysis of oxidation states.
It is possible to use the values obtained in the publication Materials
Discovery by Chemical Analogy: Role of Oxidation States in Structure
Prediction - DOI: 10.1039/C8FD00032H.
"""
from __future__ import annotations
import json
import warnings
from pathlib import Path
from typing import cast
import numpy as np
from pymatgen.core import Structure
from pymatgen.core.periodic_table import Species as pmgSpecies
from pymatgen.core.periodic_table import get_el_sp
from smact import Species, data_directory
def _deduplicate_species_pairs(
species_pairs: list[tuple],
) -> list[tuple]:
"""Remove duplicate species pairs based on (symbol, oxidation) identity."""
seen: set[tuple[str, int, str, int]] = set()
unique: list[tuple] = []
for anion_sp, cation_sp in species_pairs:
key = (anion_sp.symbol, anion_sp.oxidation, cation_sp.symbol, cation_sp.oxidation)
if key not in seen:
seen.add(key)
unique.append((anion_sp, cation_sp))
return unique
[docs]
class OxidationStateProbabilityFinder:
"""
Uses the model developed in the Faraday Discussions Paper (DOI:10.1039/C8FD00032H).
Computes the likelihood of metal species existing in solids in the presence of certain anions.
"""
def __init__(self, probability_table: dict[tuple[str, str], float] | None = None) -> None:
"""
Initialise the oxidation state probability finder.
Args:
----
probability_table (dict): Lookup table to get probabilities for anion-cation pairs.
Must be of the format {(anion,cation): probability, ...} e.g. {('F-1', 'Li1'): 1.0,...}.
If none, the default table is loaded from the data directory.
"""
if probability_table is None:
with (Path(data_directory) / "oxidation_state_probability_table.json").open() as f:
probability_data = json.load(f)
# Put data into the required format
probability_table = {}
for i in probability_data:
probability_table[(i[0], i[1])] = i[2]
self._probability_table = probability_table
# Define set of species for which we have data
included_anions = {i[0] for i in self._probability_table}
included_cations = {i[1] for i in self._probability_table}
included_species = included_anions | included_cations
self._included_species = included_species
self._included_cations = included_cations
self._included_anions = included_anions
def _generate_lookup_key(self, species1: Species, species2: Species) -> tuple[str, str]:
"""
Internal function to generate keys to lookup table.
Args:
----
species1 (smact.Species): Species
species2 (smact.Species): Species
Returns:
-------
table_key (tuple): For looking up probability in the form (an_key, cat_key).
"""
# Check that there is one cation and one anion
if (species1.oxidation > 0) and (species2.oxidation < 0):
cation = species1
anion = species2
elif (species1.oxidation < 0) and (species2.oxidation > 0):
anion = species1
cation = species2
else:
msg = "One cation and one anion required."
raise ValueError(msg)
# Generate keys for lookup table
cat_key = "".join([cation.symbol, str(int(cation.oxidation))])
an_key = "".join([anion.symbol, str(int(anion.oxidation))])
# Check that both the species are included in the probability table
if not all(elem in self._included_species for elem in [an_key, cat_key]):
msg = f"One or both of [{cat_key}, {an_key}] are not in the probability table."
raise KeyError(msg)
return (an_key, cat_key)
[docs]
def pair_probability(self, species1: Species, species2: Species) -> float:
r"""
Get the anion-cation oxidation state probability for a provided pair of smact Species.
i.e. :math:`P_{SA}=\\frac{N_{SX}}{N_{MX}}` in the original paper (DOI:10.1039/C8FD00032H).
Args:
----
species1 (smact.Species): Cation or anion species
species2 (smact.Species): Cation or anion species
Returns:
-------
prob (float): Species-anion probability
"""
# Generate lookup table key and use it to look up probability
probability_table_key = self._generate_lookup_key(species1, species2)
return self._probability_table[probability_table_key]
[docs]
def get_included_species(self) -> set[str]:
"""Returns a set of species for which there exists data in the probability table used."""
return self._included_species
[docs]
def compound_probability(self, structure: Structure | list, ignore_stoichiometry: bool = True) -> float:
"""
Calculate overall probability for structure or composition.
Args:
----
structure (pymatgen.Structure): Compound for which the probability score will be generated.
Can also be a list of pymatgen or SMACT Species.
ignore_stoichiometry (bool): Whether to weight probabilities by stoichiometry.
Defaults to True (i.e. stoichiometry is ignored).
Returns:
-------
compound_prob (float): Compound probability
"""
# Convert input to list of SMACT Species
if isinstance(structure, list):
if all(isinstance(i, Species) for i in structure):
pass
elif all(isinstance(i, pmgSpecies) for i in structure):
pmg_species = cast("list[pmgSpecies]", structure)
structure = [Species(i.symbol, int(i.oxi_state or 0)) for i in pmg_species]
else:
msg = "Input requires a list of SMACT or Pymatgen species."
raise TypeError(msg)
elif isinstance(structure, Structure):
species = structure.species
if not all(isinstance(i, pmgSpecies) for i in species):
msg = "Structure must have oxidation states."
raise TypeError(msg)
structure_list: list[Species] = []
for site in structure:
sp = cast("pmgSpecies", get_el_sp(site.species_string))
oxi = sp.oxi_state
structure_list.append(Species(sp.symbol, int(oxi or 0)))
structure = structure_list
else:
msg = "Input requires a list of SMACT or Pymatgen Species or a Structure."
raise TypeError(msg)
# Put most electronegative element last in list by sorting by electroneg
structure.sort(key=lambda x: x.pauling_eneg if x.pauling_eneg is not None else 0.0)
# Define necessary species pairs
anion = structure[-1]
cations = [i for i in structure if i.oxidation > 0]
if not cations:
msg = "No cations found in structure. Cannot calculate compound probability."
raise ValueError(msg)
species_pairs = [(anion, cation) for cation in cations]
# Reduce down to unique pairs if ignoring stoichiometry
if ignore_stoichiometry:
species_pairs = _deduplicate_species_pairs(species_pairs)
# Do the maths
pair_probs = [self.pair_probability(pair[0], pair[1]) for pair in species_pairs]
return float(np.mean(pair_probs))
def __getattr__(name: str) -> type:
if name == "Oxidation_state_probability_finder":
warnings.warn(
"Oxidation_state_probability_finder is deprecated; use OxidationStateProbabilityFinder instead.",
DeprecationWarning,
stacklevel=2,
)
return OxidationStateProbabilityFinder
msg = f"module {__name__!r} has no attribute {name!r}"
raise AttributeError(msg)