"""
Semiconducting Materials from Analogy and Chemical Theory.
A collection of fast screening tools from elemental data
"""
from __future__ import annotations
__version__ = "4.0.0"
__all__ = [
"Element",
"Species",
"__version__",
"anions",
"are_eq",
"d_block",
"element_dictionary",
"lattices_are_same",
"metals",
"neutral_ratios",
"neutral_ratios_iter",
"ordered_elements",
]
import functools
import itertools
import warnings
from functools import reduce
from math import gcd
from operator import mul as multiply
from pathlib import Path
from typing import TYPE_CHECKING, cast
import numpy as np
if TYPE_CHECKING:
from collections.abc import Iterable, Iterator, Sequence
import pandas as pd
module_directory = str(Path(__file__).resolve().parent)
data_directory = str(Path(module_directory) / "data")
# get correct path for datafiles when called from another directory
from smact import data_loader
[docs]
class Element:
"""
Collection of standard elemental properties for given element.
Data is drawn from "data/element.txt", part of the Open Babel
package.
Atoms with a defined oxidation state draw properties from the
"Species" class.
Attributes:
----------
Element.symbol (string) : Elemental symbol used to retrieve data
Element.name (string) : Full name of element
Element.number (int) : Proton number of element
Element.pauling_eneg (float) : Pauling electronegativity (0.0 if unknown)
Element.ionpot (float) : Ionisation potential in eV (0.0 if unknown)
Element.e_affinity (float) : Electron affinity in eV (0.0 if unknown)
Element.dipol (float) : Static dipole polarizability in 1.6488e-41 C m^2 / V (0.0 if unknown)
Element.eig (float) : Electron eigenvalue (units unknown) N.B. For Cu, Au and Ag this defaults to d-orbital
Element.eig_s (float) : Eigenvalue of s-orbital
Element.SSE (float) : Solid State Energy
Element.SSEPauling (float) : SSE based on regression fit with Pauling electronegativity
Element.oxidation_states (list) : Default list of allowed oxidation
states for use in SMACT. In >3.0, these are the ICSD24 set.
In <3.0, these are the SMACT14 set.
Element.oxidation_states_smact14 (list): Original list of oxidation
states that were manually compiled for SMACT in 2014
(default in SMACT < 3.0)
Element.oxidation_states_sp (list) : List of oxidation states recognised by the Pymatgen Structure Predictor
Element.oxidation_states_icsd16 (list) : List of oxidation states that appear in the 2016 version of ICSD
Element.oxidation_states_wiki (list): List of oxidation states that
appear on Wikipedia. See
https://en.wikipedia.org/wiki/Template:List_of_oxidation_states_of_the_elements
Data retrieved: 2022-09-22
Element.oxidation_states_custom (list | None ): List of oxidation
states that appear in the custom data file supplied (if any)
Element.oxidation_states_icsd24 (list): List of oxidation states that appear in the 2024 version of the ICSD
Element.coord_envs (list): The allowed coordination environments for the ion
Element.covalent_radius (float) : Covalent radius of the element
Element.mass (float) : Molar mass of the element
Element.crustal_abundance (float) : Crustal abundance in the earths crust mg/kg taken from CRC
Element.HHI_p (float) : Herfindahl-Hirschman Index for elemental production
Element.HHI_r (float) : Herfindahl-Hirschman Index for elemental reserves
Element.mendeleev (int): Mendeleev number
Element.AtomicWeight (float): Atomic weight
Element.MeltingT (float): Melting temperature in K
Element.num_valence (int): Number of valence electrons
Element.num_valence_modified (int): Number of valence electrons based on a modified definition
Raises:
------
KeyError: Element not found in element.txt
"""
# Instance attribute annotations for static type checking
coord_envs: list[str] | None
covalent_radius: float
crustal_abundance: float
e_affinity: float
eig: float
eig_s: float
HHI_p: float | None
HHI_r: float | None
ionpot: float
mass: float
name: str
number: int
oxidation_states: list[int] | None
oxidation_states_smact14: list[int] | None
oxidation_states_icsd16: list[int] | None
oxidation_states_sp: list[int] | None
oxidation_states_wiki: list[int] | None
oxidation_states_icsd24: list[int] | None
oxidation_states_custom: list[int] | None
dipol: float
pauling_eneg: float | None
SSE: float | None
SSEPauling: float | None
symbol: str
mendeleev: int | None
AtomicWeight: float | None
MeltingT: float | None
num_valence: int | None
num_valence_modified: int | None
def __init__(self, symbol: str, oxi_states_custom_filepath: str | None = None) -> None:
"""
Initialise Element class.
Args:
----
symbol (str): Chemical element symbol (e.g. 'Fe')
oxi_states_custom_filepath (str): Path to custom oxidation states file
"""
# Get the oxidation states from the custom file if it exists
if oxi_states_custom_filepath:
try:
self.oxidation_states_custom = cast(
"list[int] | None",
data_loader.lookup_element_oxidation_states_custom(symbol, oxi_states_custom_filepath),
)
except (TypeError, FileNotFoundError, OSError, ValueError):
warnings.warn("Custom oxidation states file not found. Please check the file path.", stacklevel=2)
self.oxidation_states_custom = None
else:
self.oxidation_states_custom = None
self.symbol = symbol
dataset = data_loader.lookup_element_data(self.symbol, copy=False)
if dataset is None:
msg = f"Elemental data for {symbol} not found."
raise KeyError(msg)
# Set coordination-environment data from the Shannon-radius data.
# As above, it is safe to use copy = False with this Get* function.
shannon_data = data_loader.lookup_element_shannon_radius_data(symbol, copy=False)
coord_envs = [row["coordination"] for row in shannon_data] if shannon_data is not None else None
hhi_scores = data_loader.lookup_element_hhis(symbol)
if hhi_scores is None:
hhi_scores = (None, None)
sse_data = data_loader.lookup_element_sse_data(symbol)
sse = sse_data["SolidStateEnergy"] if sse_data else None
sse_pauling_data = data_loader.lookup_element_sse_pauling_data(symbol)
sse_pauling = sse_pauling_data["SolidStateEnergyPauling"] if sse_pauling_data else None
magpie_data = data_loader.lookup_element_magpie_data(symbol)
if magpie_data:
mendeleev = magpie_data["MendeleevNumber"]
atomic_weight = magpie_data["AtomicWeight"]
melting_t = magpie_data["MeltingT"]
num_valence = magpie_data["NValence"]
else:
mendeleev = None
atomic_weight = None
melting_t = None
num_valence = None
valence_data = data_loader.lookup_element_valence_data(symbol)
num_valence_modified = valence_data["NValence"] if valence_data else None
self.coord_envs = coord_envs
self.covalent_radius = dataset["r_cov"]
self.crustal_abundance = dataset["Abundance"]
self.e_affinity = dataset["e_affinity"]
self.eig = dataset["p_eig"]
self.eig_s = dataset["s_eig"]
self.HHI_p = hhi_scores[0]
self.HHI_r = hhi_scores[1]
self.ionpot = dataset["ion_pot"]
self.mass = dataset["Mass"]
self.name = dataset["Name"]
self.number = dataset["Z"]
self.oxidation_states = data_loader.lookup_element_oxidation_states_icsd24(symbol)
self.oxidation_states_smact14 = data_loader.lookup_element_oxidation_states(symbol)
self.oxidation_states_icsd16 = data_loader.lookup_element_oxidation_states_icsd(symbol)
self.oxidation_states_sp = data_loader.lookup_element_oxidation_states_sp(symbol)
self.oxidation_states_wiki = data_loader.lookup_element_oxidation_states_wiki(symbol)
self.oxidation_states_icsd24 = data_loader.lookup_element_oxidation_states_icsd24(symbol)
self.dipol = dataset["dipol"]
self.pauling_eneg = dataset["el_neg"]
self.SSE = sse
self.SSEPauling = sse_pauling
self.mendeleev = mendeleev
self.AtomicWeight = atomic_weight
self.MeltingT = melting_t
self.num_valence = num_valence
self.num_valence_modified = num_valence_modified
# snake_case aliases for PascalCase attributes, providing a consistent API
@property
def sse(self) -> float | None:
"""Solid State Energy (alias for SSE)."""
return self.SSE
@property
def sse_pauling(self) -> float | None:
"""SSE from Pauling electronegativity regression (alias for SSEPauling)."""
return self.SSEPauling
@property
def atomic_weight(self) -> float | None:
"""Atomic weight (alias for AtomicWeight)."""
return self.AtomicWeight
@property
def melting_t(self) -> float | None:
"""Melting temperature in K (alias for MeltingT)."""
return self.MeltingT
@property
def hhi_p(self) -> float | None:
"""Herfindahl-Hirschman Index for production (alias for HHI_p)."""
return self.HHI_p
@property
def hhi_r(self) -> float | None:
"""Herfindahl-Hirschman Index for reserves (alias for HHI_r)."""
return self.HHI_r
[docs]
class Species(Element):
"""
Class providing data for elements in a given chemical environment.
In addition to the standard properties from the periodic table
(inherited from the Element class), Species objects use the
oxidation state and coordination environment to provide further
properties.
The Species object can be created with either a default set of
shannon radii (radii_source='shannon') or with a set of
machine-learnt shannon radii (radii_source='extended').
The source of the machine-learnt shannon radii set is
Baloch, A.A., Alqahtani, S.M., Mumtaz, F., Muqaibel, A.H.,
Rashkeev, S.N. and Alharbi, F.H., 2021. Extending Shannon's
ionic radii database using machine learning.
Physical Review Materials, 5(4), p.043804.
Attributes:
----------
Species.symbol: Elemental symbol used to retrieve data
Species.name: Full name of element
Species.oxidation: Oxidation state of species (signed integer)
Species.coordination: Coordination number of species (integer)
Species.pauling_eneg: Pauling electronegativity (0.0 if unknown)
Species.ionpot: Ionisation potential in eV (0.0 if unknown)
Species.e_affinity: Electron affinity in eV (0.0 if unknown)
Species.eig: Electron eigenvalue (units unknown)
N.B. For Cu, Au and Ag this defaults to d-orbital.
Species.shannon_radius: Shannon radius of Species.
Species.ionic_radius: Ionic radius of Species.
Species.average_shannon_radius: An average shannon radius for
the species. The average is taken over all coordination
environments.
Species.average_ionic_radius: An average ionic radius for
the species. The average is taken over all coordination
environments.
Raises:
------
KeyError: Element not found in element.txt
"""
def __init__(
self,
symbol: str,
oxidation: int,
coordination: int = 4,
radii_source: str = "shannon",
) -> None:
"""
Initialise Species class.
Args:
----
symbol (str): Chemical element symbol (e.g. 'Fe')
oxidation (int): Oxidation state of species
coordination (int): Coordination number of species
radii_source (str): Source of shannon radii data. Choose 'shannon' for
the default shannon radii set or 'extended' for the machine-learnt shannon radii set
"""
Element.__init__(self, symbol)
self.oxidation = oxidation
self.coordination = coordination
# Get shannon radius for the oxidation state and coordination.
self.shannon_radius = None
self.ionic_radius = None
if radii_source == "shannon":
shannon_data = data_loader.lookup_element_shannon_radius_data(symbol)
elif radii_source == "extended":
shannon_data = data_loader.lookup_element_shannon_radius_data_extendedML(symbol)
else:
msg = f"Data source {radii_source!r} not recognised. Choose 'shannon' or 'extended'."
raise ValueError(msg)
# Get the average shannon and ionic radii
self.average_shannon_radius = None
self.average_ionic_radius = None
if shannon_data:
for dataset in shannon_data:
if dataset["charge"] == oxidation and str(coordination) == dataset["coordination"].split("_")[0]:
self.shannon_radius = dataset["crystal_radius"]
self.ionic_radius = dataset["ionic_radius"]
break
charge_rows = pd.DataFrame(shannon_data).loc[lambda df: df["charge"] == oxidation]
if not charge_rows.empty:
self.average_shannon_radius = charge_rows["crystal_radius"].mean()
self.average_ionic_radius = charge_rows["ionic_radius"].mean()
# Get SSE_2015 (revised) for the oxidation state.
self.SSE_2015 = None
sse_2015_data = data_loader.lookup_element_sse2015_data(symbol)
if sse_2015_data:
for dataset in sse_2015_data:
if dataset["OxidationState"] == oxidation:
self.SSE_2015 = dataset["SolidStateEnergy2015"]
@functools.cache
def _load_ordered_elements() -> list[str]:
"""Load and cache the ordered periodic table symbols."""
with (Path(data_directory) / "ordered_periodic.txt").open() as f:
return [line.split()[0] for line in f]
[docs]
def ordered_elements(x: int, y: int) -> list[str]:
"""Return a list of element symbols, ordered by proton number in the range x -> y.
Args:
x: Start of range (inclusive, 1-indexed proton number).
y: End of range (inclusive, 1-indexed proton number).
Returns:
list: Ordered list of element symbols.
"""
return _load_ordered_elements()[x - 1 : y]
[docs]
def element_dictionary(
elements: Iterable[str] | None = None,
oxi_states_custom_filepath: str | None = None,
) -> dict[str, Element]:
"""
Create a dictionary of initialised smact.Element objects.
Accessing an Element from a dict is significantly faster than
repeatedly initialising them on-demand within nested loops.
Args:
----
elements (iterable of strings) : Elements to include. If None,
use all elements up to 103.
oxi_states_custom_filepath (str): Path to custom oxidation states file
Returns:
-------
dict: Dictionary with element symbols as keys and smact.Element
objects as data
"""
if elements is None:
elements = ordered_elements(1, 103)
if oxi_states_custom_filepath:
return {symbol: Element(symbol, oxi_states_custom_filepath) for symbol in elements}
return {symbol: Element(symbol) for symbol in elements}
[docs]
def are_eq(a: Sequence[float], b: Sequence[float], tolerance: float = 1e-4) -> bool:
"""Check two arrays for approximate element-wise equality.
Args:
a: 1-D sequence of values.
b: 1-D sequence of values.
tolerance: Absolute tolerance for equality.
Returns:
True if arrays are element-wise equal within tolerance, False otherwise.
"""
if len(a) != len(b):
return False
return bool(np.allclose(a, b, atol=tolerance, rtol=0))
[docs]
def lattices_are_same(lattice1: Sequence, lattice2: Sequence, tolerance: float = 1e-4) -> bool:
"""Check whether two ASE lattices contain the same sites.
Args:
lattice1: ASE crystal class.
lattice2: ASE crystal class.
tolerance: Absolute tolerance for position equality.
Returns:
True if every site in *lattice1* has a matching site in *lattice2*.
"""
matched: set[int] = set()
for site1 in lattice1:
for j, site2 in enumerate(lattice2):
if (
j not in matched
and site1.symbol == site2.symbol
and are_eq(site1.position, site2.position, tolerance=tolerance)
):
matched.add(j)
break
return len(matched) == len(lattice1) == len(lattice2)
def _gcd_recursive(*args: int) -> int:
"""Get the greatest common denominator among any number of ints."""
return reduce(gcd, args)
def _isneutral(oxidations: Sequence[int], stoichs: Sequence[int]) -> bool:
"""
Check if set of oxidation states is neutral in given stoichiometry.
Args:
----
oxidations (tuple): Oxidation states of a set of oxidised elements
stoichs (tuple): Stoichiometry values corresponding to `oxidations`
"""
return sum(map(multiply, oxidations, stoichs)) == 0
[docs]
def neutral_ratios_iter(
oxidations: Sequence[int],
stoichs: Sequence[Sequence[int]] | None = None,
threshold: int | None = 5,
) -> Iterator[tuple[int, ...]]:
"""
Iterator for charge-neutral stoichiometries.
Given a list of oxidation states of arbitrary length, yield ratios in which
these form a charge-neutral compound. Stoichiometries may be provided as a
set of legal stoichiometries per site (e.g. a known family of compounds);
otherwise all unique ratios are tried up to a threshold coefficient.
Args:
----
oxidations : list of integers
stoichs : stoichiometric ratios for each site (if provided)
threshold : single threshold to go up to if stoichs are not provided
Returns:
-------
filter: Iterator of tuples; each tuple is a ratio that gives neutrality
"""
if stoichs is None:
if threshold is None:
msg = "threshold must be an int when stoichs is not provided"
raise ValueError(msg)
stoichs = [list(range(1, threshold + 1))] * len(oxidations)
# First filter: remove combinations which have a common denominator
# greater than 1 (i.e. Use simplest form of each set of ratios)
# Second filter: return only charge-neutral combinations
return filter(
lambda x: _isneutral(oxidations, x) and _gcd_recursive(*x) == 1,
# Generator: enumerate all combinations of stoichiometry
itertools.product(*stoichs),
)
[docs]
def neutral_ratios(
oxidations: Sequence[int],
stoichs: Sequence[Sequence[int]] | None = None,
threshold: int | None = 5,
) -> list[tuple[int, ...]]:
"""
Get a list of charge-neutral compounds.
Given a list of oxidation states of arbitrary length, return ratios in which
these form a charge-neutral compound. Stoichiometries may be provided as a
set of legal stoichiometries per site (e.g. a known family of compounds);
otherwise all unique ratios are tried up to a threshold coefficient.
Args:
----
oxidations (list of ints): Oxidation state of each site
stoichs (list of positive ints): A selection of valid stoichiometric
ratios for each site
threshold (int): Maximum stoichiometry coefficient; if no 'stoichs'
argument is provided, all combinations of integer coefficients up
to this value will be tried.
Returns:
-------
list of tuples: Ratios of atoms in given oxidation states which yield
a charge-neutral structure. Empty list if no ratios exist.
"""
return list(neutral_ratios_iter(oxidations, stoichs=stoichs, threshold=threshold))
# List of metals
metals = [
"Li",
"Be",
"Na",
"Mg",
"Al",
"K",
"Ca",
"Sc",
"Ti",
"V",
"Cr",
"Mn",
"Fe",
"Co",
"Ni",
"Cu",
"Zn",
"Ga",
"Ge",
"Rb",
"Sr",
"Y",
"Zr",
"Nb",
"Mo",
"Tc",
"Ru",
"Rh",
"Pd",
"Ag",
"Cd",
"In",
"Sn",
"Sb",
"Cs",
"Ba",
"La",
"Ce",
"Pr",
"Nd",
"Sm",
"Eu",
"Gd",
"Tb",
"Dy",
"Ho",
"Er",
"Tm",
"Yb",
"Lu",
"Hf",
"Ta",
"W",
"Re",
"Os",
"Ir",
"Pt",
"Au",
"Hg",
"Tl",
"Pb",
"Bi",
"Po",
"Fr",
"Ra",
"Ac",
"Th",
"Pa",
"U",
"Np",
"Pu",
"Am",
"Cm",
"Bk",
"Cf",
"Es",
"Fm",
"Md",
"No",
]
# List of elements that can be considered 'anions'.
# Similar to the Pymatgen 'electronegative elements' but excluding H, B, C & Si.
anions = ["N", "P", "As", "Sb", "O", "S", "Se", "Te", "F", "Cl", "Br", "I"]
# List of d-block metals
d_block = [
"Sc",
"Ti",
"V",
"Cr",
"Mn",
"Fe",
"Co",
"Ni",
"Cu",
"Zn",
"Y",
"Zr",
"Nb",
"Mo",
"Tc",
"Ru",
"Rh",
"Pd",
"Ag",
"Cd",
"La",
"Hf",
"Ta",
"W",
"Re",
"Os",
"Ir",
"Pt",
"Au",
"Hg",
]