Source code for smact

"""
Semiconducting Materials from Analogy and Chemical Theory

A collection of fast screening tools from elemental data
"""

import itertools
import warnings
from math import gcd
from operator import mul as multiply
from os import path
from typing import Iterable, List, Optional, Sequence, Tuple, Union

import pandas as pd

module_directory = path.abspath(path.dirname(__file__))
data_directory = path.join(module_directory, "data")
# get correct path for datafiles when called from another directory
from smact import data_loader


[docs]class Element: """Collection of standard elemental properties for given element. Data is drawn from "data/element.txt", part of the Open Babel package. Atoms with a defined oxidation state draw properties from the "Species" class. Attributes: Element.symbol (string) : Elemental symbol used to retrieve data Element.name (string) : Full name of element Element.number (int) : Proton number of element Element.pauling_eneg (float) : Pauling electronegativity (0.0 if unknown) Element.ionpot (float) : Ionisation potential in eV (0.0 if unknown) Element.e_affinity (float) : Electron affinity in eV (0.0 if unknown) Element.dipol (float) : Static dipole polarizability in 1.6488e-41 C m^2 / V (0.0 if unknown) Element.eig (float) : Electron eigenvalue (units unknown) N.B. For Cu, Au and Ag this defaults to d-orbital Element.eig_s (float) : Eigenvalue of s-orbital Element.SSE (float) : Solid State Energy Element.SSEPauling (float) : SSE based on regression fit with Pauling electronegativity Element.oxidation_states (list) : Default list of allowed oxidation states for use in SMACT Element.oxidation_states_sp (list) : List of oxdation states recognised by the Pymatgen Structure Predictor Element.oxidation_states_icsd (list) : List of oxidation states that appear in the ICSD Element.oxidation_states_wiki (list): List of oxidation states that appear wikipedia (https://en.wikipedia.org/wiki/Template:List_of_oxidation_states_of_the_elements) Data retrieved: 2022-09-22 Element.oxidation_states_custom (list | None ): List of oxidation states that appear in the custom data file supplied (if any) Element.coord_envs (list): The allowed coordination enviroments for the ion Element.covalent_radius (float) : Covalent radius of the element Element.mass (float) : Molar mass of the element Element.crustal_abundance (float) : Crustal abundance in the earths crust mg/kg taken from CRC Element.HHI_p (float) : Herfindahl-Hirschman Index for elemental production Element.HHI_r (float) : Hirfindahl-Hirschman Index for elemental reserves Raises: NameError: Element not found in element.txt Warning: Element not found in Eigenvalues.csv """ def __init__( self, symbol: str, oxi_states_custom_filepath: Optional[str] = None ): """Initialise Element class Args: symbol (str): Chemical element symbol (e.g. 'Fe') oxi_states_custom_filepath (str): Path to custom oxidation states file """ # Get the oxidation states from the custom file if it exists if oxi_states_custom_filepath: try: self._oxidation_states_custom = ( data_loader.lookup_element_oxidation_states_custom( symbol, oxi_states_custom_filepath ) ) self.oxidation_states_custom = self._oxidation_states_custom except TypeError: warnings.warn( "Custom oxidation states file not found. Please check the file path." ) self.oxidation_states_custom = None else: self.oxidation_states_custom = None self.symbol = symbol dataset = data_loader.lookup_element_data(self.symbol, copy=False) if dataset == None: raise NameError(f"Elemental data for {symbol} not found.") # Set coordination-environment data from the Shannon-radius data. # As above, it is safe to use copy = False with this Get* function. shannon_data = data_loader.lookup_element_shannon_radius_data( symbol, copy=False ) if shannon_data != None: coord_envs = [row["coordination"] for row in shannon_data] else: coord_envs = None HHI_scores = data_loader.lookup_element_hhis(symbol) if HHI_scores == None: HHI_scores = (None, None) sse_data = data_loader.lookup_element_sse_data(symbol) if sse_data: sse = sse_data["SolidStateEnergy"] else: sse = None sse_Pauling_data = data_loader.lookup_element_sse_pauling_data(symbol) if sse_Pauling_data: sse_Pauling = sse_Pauling_data["SolidStateEnergyPauling"] else: sse_Pauling = None for attribute, value in ( ("coord_envs", coord_envs), ("covalent_radius", dataset["r_cov"]), ("crustal_abundance", dataset["Abundance"]), ("e_affinity", dataset["e_affinity"]), ("eig", dataset["p_eig"]), ("eig_s", dataset["s_eig"]), ("HHI_p", HHI_scores[0]), ("HHI_r", HHI_scores[1]), ("ionpot", dataset["ion_pot"]), ("mass", dataset["Mass"]), ("name", dataset["Name"]), ("number", dataset["Z"]), ( "oxidation_states", data_loader.lookup_element_oxidation_states(symbol), ), ( "oxidation_states_icsd", data_loader.lookup_element_oxidation_states_icsd(symbol), ), ( "oxidation_states_sp", data_loader.lookup_element_oxidation_states_sp(symbol), ), ( "oxidation_states_wiki", data_loader.lookup_element_oxidation_states_wiki(symbol), ), ("dipol", dataset["dipol"]), ("pauling_eneg", dataset["el_neg"]), ("SSE", sse), ("SSEPauling", sse_Pauling), ("symbol", symbol), # ('vdw_radius', dataset['RVdW']), ): setattr(self, attribute, value)
[docs]class Species(Element): """ Class providing data for elements in a given chemical environment In addition to the standard properties from the periodic table (inherited from the Element class), Species objects use the oxidation state and coordination environment to provide further properties. The Species object can be created with either a default set of shannon radii (radii_source='shannon') or with a set of machine-learnt shannon radii (radii_source='extended'). The source of the machine-learnt shannon radii set is Baloch, A.A., Alqahtani, S.M., Mumtaz, F., Muqaibel, A.H., Rashkeev, S.N. and Alharbi, F.H., 2021. Extending Shannon's ionic radii database using machine learning. Physical Review Materials, 5(4), p.043804. Attributes: Species.symbol: Elemental symbol used to retrieve data Species.name: Full name of element Species.oxidation: Oxidation state of species (signed integer) Species.coordination: Coordination number of species (integer) Species.pauling_eneg: Pauling electronegativity (0.0 if unknown) Species.ionpot: Ionisation potential in eV (0.0 if unknown) Species.e_affinity: Electron affinity in eV (0.0 if unknown) Species.eig: Electron eigenvalue (units unknown) N.B. For Cu, Au and Ag this defaults to d-orbital. Species.shannon_radius: Shannon radius of Species. Species.ionic_radius: Ionic radius of Species. Species.average_shannon_radius: An average shannon radius for the species. The average is taken over all coordination environments. Species.average_ionic_radius: An average ionic radius for the species. The average is taken over all coordination environments. Raises: NameError: Element not found in element.txt Warning: Element not found in Eigenvalues.csv """ def __init__( self, symbol: str, oxidation: int, coordination: int = 4, radii_source: str = "shannon", ): Element.__init__(self, symbol) self.oxidation = oxidation self.coordination = coordination # Get shannon radius for the oxidation state and coordination. self.shannon_radius = None if radii_source == "shannon": shannon_data = data_loader.lookup_element_shannon_radius_data( symbol ) elif radii_source == "extended": shannon_data = ( data_loader.lookup_element_shannon_radius_data_extendedML( symbol ) ) else: print( "Data source not recognised. Please select 'shannon' or 'extended'. " ) if shannon_data: for dataset in shannon_data: if ( dataset["charge"] == oxidation and str(coordination) == dataset["coordination"].split("_")[0] ): self.shannon_radius = dataset["crystal_radius"] # Get ionic radius self.ionic_radius = None if shannon_data: for dataset in shannon_data: if ( dataset["charge"] == oxidation and str(coordination) == dataset["coordination"].split("_")[0] ): self.ionic_radius = dataset["ionic_radius"] # Get the average shannon and ionic radii self.average_shannon_radius = None self.average_ionic_radius = None if shannon_data: # Get the rows of the shannon radius table for the element shannon_data_df = pd.DataFrame(shannon_data) # Get the rows corresponding to the oxidation state of the species charge_rows = shannon_data_df.loc[ shannon_data_df["charge"] == oxidation ] # Get the mean self.average_shannon_radius = charge_rows["crystal_radius"].mean() self.average_ionic_radius = charge_rows["ionic_radius"].mean() # Get SSE_2015 (revised) for the oxidation state. self.SSE_2015 = None sse_2015_data = data_loader.lookup_element_sse2015_data(symbol) if sse_2015_data: for dataset in sse_2015_data: if dataset["OxidationState"] == oxidation: self.SSE_2015 = dataset["SolidStateEnergy2015"] else: self.SSE_2015 = None
[docs]def ordered_elements(x: int, y: int) -> List[str]: """ Return a list of element symbols, ordered by proton number in the range x -> y Args: x,y : integers Returns: list: Ordered list of element symbols """ with open(path.join(data_directory, "ordered_periodic.txt")) as f: data = f.readlines() elements = [] for line in data: inp = line.split() elements.append(inp[0]) ordered_elements = [] for i in range(x, y + 1): ordered_elements.append(elements[i - 1]) return ordered_elements
[docs]def element_dictionary( elements: Optional[Iterable[str]] = None, oxi_states_custom_filepath: Optional[str] = None, ): """ Create a dictionary of initialised smact.Element objects Accessing an Element from a dict is significantly faster than repeadedly initialising them on-demand within nested loops. Args: elements (iterable of strings) : Elements to include. If None, use all elements up to 103. oxi_states_custom_filepath (str): Path to custom oxidation states file Returns: dict: Dictionary with element symbols as keys and smact.Element objects as data """ if elements == None: elements = ordered_elements(1, 103) if oxi_states_custom_filepath: return { symbol: Element(symbol, oxi_states_custom_filepath) for symbol in elements } else: return {symbol: Element(symbol) for symbol in elements}
[docs]def are_eq(A: list, B: list, tolerance: float = 1e-4): """Check two arrays for tolerance [1,2,3]==[1,2,3]; but [1,3,2]!=[1,2,3] Args: A, B (lists): 1-D list of values for approximate equality comparison tolerance: numerical precision for equality condition Returns: boolean """ are_eq = True if len(A) != len(B): are_eq = False else: i = 0 while i < len(A): if abs(A[i] - B[i]) > tolerance: are_eq = False i = i + 1 return are_eq
[docs]def lattices_are_same(lattice1, lattice2, tolerance=1e-4): """Checks for the equivalence of two lattices Args: lattice1,lattice2 : ASE crystal class Returns: boolean """ lattices_are_same = False i = 0 for site1 in lattice1: for site2 in lattice2: if site1.symbol == site2.symbol: if are_eq(site1.position, site2.position, tolerance=tolerance): i += 1 if i == len(lattice1): lattices_are_same = True return lattices_are_same
def _gcd_recursive(*args: Iterable[int]): """ Get the greatest common denominator among any number of ints """ if len(args) == 2: return gcd(*args) else: return gcd(args[0], _gcd_recursive(*args[1:])) def _isneutral(oxidations: Tuple[int, ...], stoichs: Tuple[int, ...]): """ Check if set of oxidation states is neutral in given stoichiometry Args: oxidations (tuple): Oxidation states of a set of oxidised elements stoichs (tuple): Stoichiometry values corresponding to `oxidations` """ return 0 == sum(map(multiply, oxidations, stoichs))
[docs]def neutral_ratios_iter( oxidations: List[int], stoichs: Union[bool, List[List[int]]] = False, threshold: Optional[int] = 5, ): """ Iterator for charge-neutral stoichiometries Given a list of oxidation states of arbitrary length, yield ratios in which these form a charge-neutral compound. Stoichiometries may be provided as a set of legal stoichiometries per site (e.g. a known family of compounds); otherwise all unique ratios are tried up to a threshold coefficient. Args: oxidations : list of integers stoichs : stoichiometric ratios for each site (if provided) threshold : single threshold to go up to if stoichs are not provided Yields: tuple: ratio that gives neutrality """ if not stoichs: stoichs = [list(range(1, threshold + 1))] * len(oxidations) # First filter: remove combinations which have a common denominator # greater than 1 (i.e. Use simplest form of each set of ratios) # Second filter: return only charge-neutral combinations return filter( lambda x: _isneutral(oxidations, x) and _gcd_recursive(*x) == 1, # Generator: enumerate all combinations of stoichiometry itertools.product(*stoichs), )
[docs]def neutral_ratios( oxidations: List[int], stoichs: Union[bool, List[List[int]]] = False, threshold=5, ): """ Get a list of charge-neutral compounds Given a list of oxidation states of arbitrary length, yield ratios in which these form a charge-neutral compound. Stoichiometries may be provided as a set of legal stoichiometries per site (e.g. a known family of compounds); otherwise all unique ratios are tried up to a threshold coefficient. Given a list of oxidation states of arbitrary length it searches for neutral ratios in a given ratio of sites (stoichs) or up to a given threshold. Args: oxidations (list of ints): Oxidation state of each site stoichs (list of positive ints): A selection of valid stoichiometric ratios for each site threshold (int): Maximum stoichiometry coefficient; if no 'stoichs' argument is provided, all combinations of integer coefficients up to this value will be tried. Returns: (exists, allowed_ratios) (tuple): exists *bool*: True ifc any ratio exists, otherwise False allowed_ratios *list of tuples*: Ratios of atoms in given oxidation states which yield a charge-neutral structure """ allowed_ratios = [ x for x in neutral_ratios_iter( oxidations, stoichs=stoichs, threshold=threshold ) ] return (len(allowed_ratios) > 0, allowed_ratios)
# List of metals metals = [ "Li", "Be", "Na", "Mg", "Al", "K", "Ca", "Sc", "Ti", "V", "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Zn", "Ga", "Ge", "Rb", "Sr", "Y", "Zr", "Nb", "Mo", "Tc", "Ru", "Rh", "Pd", "Ag", "Cd", "In", "Sn", "Sb", "Cs", "Ba", "La", "Ce", "Pr", "Nd", "Sm", "Eu", "Gd", "Tb", "Dy", "Ho", "Er", "Tm", "Yb", "Lu", "Hf", "Ta", "W", "Re", "Os", "Ir", "Pt", "Au", "Hg", "Tl", "Pb", "Bi", "Po", "Fr", "Ra", "Ac", "Th", "Pa", "U", "Np", "Pu", "Am", "Cm", "Bk", "Cf", "Es", "Fm", "Md", "No", ] # List of elements that can be considered 'anions'. # Similar to the Pymatgen 'electronegative elements' but excluding H, B, C & Si. anions = ["N", "P", "As", "Sb", "O", "S", "Se", "Te", "F", "Cl", "Br", "I"] # List of d-block metals d_block = [ "Sc", "Ti", "V", "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Zn", "Y", "Zr", "Nb", "Mo", "Tc", "Ru", "Rh", "Pd", "Ag", "Cd", "La", "Hf", "Ta", "W", "Re", "Os", "Ir", "Pt", "Au", "Hg", ]