Filtering a search space using oxidation states#

Let’s attempt to perform the first steps in a high-throughput compound design workflow which involves:

  • Generating the search space using SMACT

  • Filtering the search space using the oxidation states model.

Open in Colab

# Install the required packages
try:
    import google.colab

    IN_COLAB = True
except:
    IN_COLAB = False

if IN_COLAB:
    !uv pip install smact --quiet

# Imports
import re
from itertools import combinations

import matplotlib.pyplot as plt
import multiprocess
import numpy as np
import pandas as pd
from pymatgen.core import Composition

import smact
from smact import Species, screening
from smact.oxidation_states import Oxidation_state_probability_finder

Composition generation#

Hide code cell source

# Instantiate the oxidation state probability finder class with the default table
ox_prob_finder = Oxidation_state_probability_finder()

# Get the cations that are in the probability table
cations = [species for species in ox_prob_finder.get_included_species() if "-" not in species]

# Get the symbols of the d-block elements
d_block_symbols = smact.ordered_elements(21, 30) + smact.ordered_elements(39, 48) + smact.ordered_elements(71, 80)

# This function will parse the species included in the probability table


def parse_species(species):
    match = re.match(r"([A-Za-z]+)([0-9.]+)", species)
    return match.group(1), int(match.group(2))


# Create a list of the d-block smact.Species objects from the d-block species in the table
allowed_dblock_species = [
    Species(parse_species(cation)[0], parse_species(cation)[1])
    for cation in cations
    if parse_species(cation)[0] in d_block_symbols
]

# Get all the combinations of the d_block smact.Species objects
d_block_species_combos = combinations(allowed_dblock_species, 2)

# Use this custom smact_filter function to generate the plausible compositions


def smact_filter(els):
    all_compounds = []
    halide_symbols = ["F", "Cl", "Br", "I"]

    for halide in halide_symbols:
        elements = [e.symbol for e in els] + [halide]

        halide_species = Species(halide, -1)

        # Get the Pauling electronegativities
        paul_a, paul_b, paul_x = (
            els[0].pauling_eneg,
            els[1].pauling_eneg,
            halide_species.pauling_eneg,
        )
        electronegativities = [paul_a, paul_b, paul_x]

        # For each set of species (in oxidation states) apply both SMACT tests
        ox_a, ox_b = els[0].oxidation, els[1].oxidation
        ox_states = [ox_a, ox_b, -1]
        # Test for charge balance
        cn_e, cn_r = smact.neutral_ratios(ox_states, threshold=8)
        if cn_e:
            # Electronegativity test
            electroneg_OK = screening.pauling_test(ox_states, electronegativities)
            if electroneg_OK:
                compound = tuple(
                    [
                        elements,
                        (ox_a, ox_b, -1),
                        cn_r[0],
                        list(els) + [halide_species],
                    ]
                )
                all_compounds.append(compound)
    return all_compounds

Hide code cell source

# Use multiprocess to speed up the generation of the combinations
with multiprocess.Pool(3) as p:
    result = p.map(smact_filter, d_block_species_combos)

flat_list = [item for sublist in result for item in sublist]

# Here we grab the species string for each composition generated by smact
list_of_species = [species[3] for species in flat_list]
A_species = [f"{species[0].symbol}{species[0].oxidation}+" for species in list_of_species]
B_species = [f"{species[1].symbol}{species[1].oxidation}+" for species in list_of_species]
X_species = [f"{species[2].symbol}1-" for species in list_of_species]

# Print out the first few entries from our search space
print(f"Number of compositions: {len(flat_list)}")
print("Each list entry looks like this:\n  elements, oxidation states, stoichiometries")
for i in flat_list[:5]:
    print(i[0], i[1], i[2])

Hide code cell output

Number of compositions: 14832
Each list entry looks like this:
  elements, oxidation states, stoichiometries
['Rh', 'Zn', 'F'] (4, 2, -1) (1, 1, 6)
['Rh', 'Zn', 'Cl'] (4, 2, -1) (1, 1, 6)
['Rh', 'Zn', 'Br'] (4, 2, -1) (1, 1, 6)
['Rh', 'Zn', 'I'] (4, 2, -1) (1, 1, 6)
['Rh', 'Mn', 'F'] (4, 2, -1) (1, 1, 6)

Hide code cell source

# Get the formulas for the compositions


def comp_maker(comp):
    form = []
    for el, ammt in zip(comp[0], comp[2]):
        form.append(el)
        form.append(ammt)
    form = "".join(str(e) for e in form)
    pmg_form = Composition(form).reduced_formula
    return pmg_form


if __name__ == "__main__":
    with multiprocess.Pool(3) as p:
        pretty_formulas = p.map(comp_maker, flat_list)

print("Each list entry now looks like this: ")
for i in pretty_formulas[:5]:
    print(i)

Hide code cell output

Each list entry now looks like this: 
ZnRhF6
ZnRhCl6
ZnRhBr6
ZnRhI6
MnRhF6

Applying the oxidation states model#

The method compound_probability of the Oxidation_state_probability_finder class enables us to compute the likelihood of the metal species existing in the presence of particular anions. We will apply this to all the compounds generated by smact

# Compute the compound probabilities
compound_probabilities = [ox_prob_finder.compound_probability(spec) for spec in list_of_species]
# Create a dataframe
data = {
    "formula_pretty": pretty_formulas,
    "A": A_species,
    "B": B_species,
    "X": X_species,
    "compound_probability": compound_probabilities,
}

df = pd.DataFrame(data)
df.head()
formula_pretty A B X compound_probability
0 ZnRhF6 Rh4+ Zn2+ F1- 0.775000
1 ZnRhCl6 Rh4+ Zn2+ Cl1- 0.625000
2 ZnRhBr6 Rh4+ Zn2+ Br1- 0.500000
3 ZnRhI6 Rh4+ Zn2+ I1- 0.500000
4 MnRhF6 Rh4+ Mn2+ F1- 0.514796
# Compute the number of non-zero compound probabilities

# Convert the probability values from a pandas.Series object to a numpy array
probs = df.compound_probability.to_numpy()

# Create a numpy array of the non-zero probabilities
non_Zero_probs = probs[(probs > 0)]

print(
    f"The original smact search space produced {len(probs)} compositons of which {len(non_Zero_probs)} had a non-zero compound probability"
)
The original smact search space produced 14832 compositons of which 12671 had a non-zero compound probability

Visualising the compound probabilities of the SMACT generated compositions.#

We will now plot the number of compounds as a function of the probability threshold.

# Create a numpy array of 100 threshold values
thresh = np.linspace(0.0, 1.0, 100)

# Use a list comprehension to generate a list of the number of compounds as a function of the probability threshold
num_compounds = [len(probs[(probs >= t)]) for t in thresh]

# Set up the figure and plot the number of compounds against the threshold
fig, ax = plt.subplots()

ax.scatter(x=thresh, y=num_compounds, marker="x", color="orange")
plt.xlabel("Threshold")
plt.ylabel("Number of compounds")

plt.show()
../_images/2a6c806cac6786deaed371867944bb8cf79ec6e638e05bd574ab4c093ff45b4c.png