"""
Provide data from text files while transparently caching for efficiency.
This module handles the loading of external data used to initialise the
core smact.Element and smact.Species classes. It implements a
transparent data-caching system to avoid a large amount of I/O when
naively constructing several of these objects. It also implements a
switchable system to print verbose warning messages about possible
missing data (mainly for debugging purposes). In general these fuctions
are used in the background and it is not necessary to use them directly.
"""
import csv
import os
from smact import data_directory
# Module-level switch: print "verbose" warning messages
# about missing data.
_print_warnings = False
[docs]def set_warnings(enable=True):
"""Set verbose warning messages on and off.
In order to see any of the warnings, this function needs to be
called _before_ the first call to the smact.Element()
constructor.
Args:
enable (bool) : print verbose warning messages.
"""
global _print_warnings
_print_warnings = enable
def _get_data_rows(filename):
"""Generator for datafile entries by row"""
with open(filename) as file:
for line in file:
line = line.strip()
if line[0] != "#":
yield line.split()
[docs]def float_or_None(x):
"""Cast a string to a float or to a None."""
try:
return float(x)
except ValueError:
return None
# Loader and cache for the element oxidation-state data.
_el_ox_states = None
[docs]def lookup_element_oxidation_states(symbol, copy=True):
"""
Retrieve a list of known oxidation states for an element.
The oxidation states list used is the SMACT default and
most exhaustive list.
Args:
symbol (str) : the atomic symbol of the element to look up.
copy (Optional(bool)): if True (default), return a copy of the
oxidation-state list, rather than a reference to the cached
data -- only use copy=False in performance-sensitive code
and where the list will not be modified!
Returns:
list: List of known oxidation states for the element.
Returns None if oxidation states for the Element were not
found in the external data.
"""
global _el_ox_states
if _el_ox_states is None:
_el_ox_states = {}
for items in _get_data_rows(
os.path.join(data_directory, "oxidation_states.txt")
):
_el_ox_states[items[0]] = [
int(oxidationState) for oxidationState in items[1:]
]
if symbol in _el_ox_states:
if copy:
# _el_ox_states stores lists -> if copy is set, make an implicit
# deep copy. The elements of the lists are integers, which are
# "value types" in Python.
return [oxidationState for oxidationState in _el_ox_states[symbol]]
else:
return _el_ox_states[symbol]
else:
if _print_warnings:
print(
"WARNING: Oxidation states for element {} "
"not found.".format(symbol)
)
return None
_el_ox_states_icsd = None
[docs]def lookup_element_oxidation_states_icsd(symbol, copy=True):
"""
Retrieve a list of known oxidation states for an element.
The oxidation states list used contains only those found
in the ICSD (and judged to be non-spurious).
Args:
symbol (str) : the atomic symbol of the element to look up.
copy (Optional(bool)): if True (default), return a copy of the
oxidation-state list, rather than a reference to the cached
data -- only use copy=False in performance-sensitive code
and where the list will not be modified!
Returns:
list: List of known oxidation states for the element.
Return None if oxidation states for the Element were not
found in the external data.
"""
global _el_ox_states_icsd
if _el_ox_states_icsd is None:
_el_ox_states_icsd = {}
for items in _get_data_rows(
os.path.join(data_directory, "oxidation_states_icsd.txt")
):
_el_ox_states_icsd[items[0]] = [
int(oxidationState) for oxidationState in items[1:]
]
if symbol in _el_ox_states_icsd:
if copy:
# _el_ox_states_icsd stores lists -> if copy is set, make an implicit
# deep copy. The elements of the lists are integers, which are
# "value types" in Python.
return [
oxidationState for oxidationState in _el_ox_states_icsd[symbol]
]
else:
return _el_ox_states_icsd[symbol]
else:
if _print_warnings:
print(
"WARNING: Oxidation states for element {}"
"not found.".format(symbol)
)
return None
_el_ox_states_sp = None
[docs]def lookup_element_oxidation_states_sp(symbol, copy=True):
"""
Retrieve a list of known oxidation states for an element.
The oxidation states list used contains only those that
are in the Pymatgen default lambda table for structure prediction.
Args:
symbol (str) : the atomic symbol of the element to look up.
copy (Optional(bool)): if True (default), return a copy of the
oxidation-state list, rather than a reference to the cached
data -- only use copy=False in performance-sensitive code
and where the list will not be modified!
Returns:
list: List of known oxidation states for the element.
Return None if oxidation states for the Element were not
found in the external data.
"""
global _el_ox_states_sp
if _el_ox_states_sp is None:
_el_ox_states_sp = {}
for items in _get_data_rows(
os.path.join(data_directory, "oxidation_states_SP.txt")
):
_el_ox_states_sp[items[0]] = [
int(oxidationState) for oxidationState in items[1:]
]
if symbol in _el_ox_states_sp:
if copy:
# _el_ox_states_sp stores lists -> if copy is set, make an implicit
# deep copy. The elements of the lists are integers, which are
# "value types" in Python.
return [
oxidationState for oxidationState in _el_ox_states_sp[symbol]
]
else:
return _el_ox_states_sp[symbol]
else:
if _print_warnings:
print(
"WARNING: Oxidation states for element {} "
"not found.".format(symbol)
)
return None
_el_ox_states_wiki = None
[docs]def lookup_element_oxidation_states_wiki(symbol, copy=True):
"""
Retrieve a list of known oxidation states for an element.
The oxidation states list used contains only those that
are on Wikipedia (https://en.wikipedia.org/wiki/Template:List_of_oxidation_states_of_the_elements).
Args:
symbol (str) : the atomic symbol of the element to look up.
copy (Optional(bool)): if True (default), return a copy of the
oxidation-state list, rather than a reference to the cached
data -- only use copy=False in performance-sensitive code
and where the list will not be modified!
Returns:
list: List of known oxidation states for the element.
Return None if oxidation states for the Element were not
found in the external data.
"""
global _el_ox_states_wiki
if _el_ox_states_wiki is None:
_el_ox_states_wiki = {}
for items in _get_data_rows(
os.path.join(data_directory, "oxidation_states_wiki.txt")
):
_el_ox_states_wiki[items[0]] = [
int(oxidationState) for oxidationState in items[1:]
]
if symbol in _el_ox_states_wiki:
if copy:
# _el_ox_states_wiki stores lists -> if copy is set, make an implicit
# deep copy. The elements of the lists are integers, which are
# "value types" in Python.
return [
oxidationState for oxidationState in _el_ox_states_wiki[symbol]
]
else:
return _el_ox_states_wiki[symbol]
else:
if _print_warnings:
print(
"WARNING: Oxidation states for element {} "
"not found.".format(symbol)
)
return None
_el_ox_states_custom = None
[docs]def lookup_element_oxidation_states_custom(symbol, filepath, copy=True):
"""
Retrieve a list of known oxidation states for an element.
The oxidation states list is specified by the user in a text file.
Args:
symbol (str) : the atomic symbol of the element to look up.
copy (Optional(bool)): if True (default), return a copy of the
oxidation-state list, rather than a reference to the cached
data -- only use copy=False in performance-sensitive code
and where the list will not be modified!
Returns:
list: List of known oxidation states for the element.
Return None if oxidation states for the Element were not
found in the external data.
"""
global _el_ox_states_custom
if _el_ox_states_custom is None:
_el_ox_states_custom = {}
for items in _get_data_rows(filepath):
_el_ox_states_custom[items[0]] = [
int(oxidationState) for oxidationState in items[1:]
]
if symbol in _el_ox_states_custom:
if copy:
# _el_ox_states_custom stores lists -> if copy is set, make an implicit
# deep copy. The elements of the lists are integers, which are
# "value types" in Python.
return [
oxidationState
for oxidationState in _el_ox_states_custom[symbol]
]
else:
return _el_ox_states_custom[symbol]
else:
if _print_warnings:
print(
"WARNING: Oxidation states for element {} "
"not found.".format(symbol)
)
return None
# Loader and cache for the element HHI scores.
_element_hhis = None
[docs]def lookup_element_hhis(symbol):
"""
Retrieve the HHI_R and HHI_p scores for an element.
Args:
symbol : the atomic symbol of the element to look up.
Returns:
tuple : (HHI_p, HHI_R)
Return None if values for the elements were
not found in the external data.
"""
global _element_hhis
if _element_hhis is None:
_element_hhis = {}
with open(os.path.join(data_directory, "HHIs.txt")) as file:
for line in file:
line = line.strip()
if line[0] != "#":
items = line.split()
_element_hhis[items[0]] = (
float(items[1]),
float(items[2]),
)
if symbol in _element_hhis:
return _element_hhis[symbol]
else:
if _print_warnings:
print(
"WARNING: HHI data for element " "{} not found.".format(symbol)
)
return None
# Loader and cache for elemental data
_element_data = None
[docs]def lookup_element_data(symbol, copy=True):
"""
Retrieve tabulated data for an element.
The table "data/element_data.txt" contains a collection of relevant
atomic data. If a cache exists in the form of the module-level
variable _element_data, this is returned. Otherwise, a dictionary is
constructed from the data table and cached before returning it.
Args:
symbol (str) : Atomic symbol for lookup
copy (Optional(bool)) : if True (default), return a copy of the
data dictionary, rather than a reference to the cached
object -- only used copy=False in performance-sensitive code
and where you are certain the dictionary will not be
modified!
Returns (dict) : Dictionary of data for given element, keyed by
column headings from data/element_data.txt.
"""
global _element_data
if _element_data is None:
_element_data = {}
keys = (
"Symbol",
"Name",
"Z",
"Mass",
"r_cov",
"e_affinity",
"p_eig",
"s_eig",
"Abundance",
"el_neg",
"ion_pot",
"dipol",
)
for items in _get_data_rows(
os.path.join(data_directory, "element_data.txt")
):
# First two columns are strings and should be left intact
# Everything else is numerical and should be cast to a float
# or, if not clearly a number, to None
clean_items = items[0:2] + list(map(float_or_None, items[2:]))
_element_data.update({items[0]: dict(list(zip(keys, clean_items)))})
if symbol in _element_data:
if copy:
# _element_open_babel_derived_data stores dictionaries
# -> if copy is set, use the dict.copy() function to return
# a copy. The values are all Python "value types", so
# explicitly cloning the elements is not necessary to make
# a deep copy.
return _element_data[symbol].copy()
else:
return _element_data[symbol]
else:
if _print_warnings:
print("WARNING: Elemental data for {}" " not found.".format(symbol))
print(_element_data)
return None
# Loader and cache for the element Shannon radii datasets.
_element_shannon_radii_data = None
[docs]def lookup_element_shannon_radius_data(symbol, copy=True):
"""
Retrieve Shannon radii for known states of an element.
Retrieve Shannon radii for known oxidation states and coordination
environments of an element.
Args:
symbol (str) : the atomic symbol of the element to look up.
copy (Optional(bool)): if True (default), return a copy of the data
dictionary, rather than a reference to the cached object --
only use copy=False in performance-sensitive code and where
you are certain the dictionary will not be modified!
Returns:
list:
Shannon radii datasets.
Returns None if the element was not found among the external
data.
Shannon radii datasets are dictionaries with the keys:
charge
*int* charge
coordination
*int* coordination
crystal_radius
*float*
ionic_radius
*float*
comment
*str*
"""
global _element_shannon_radii_data
if _element_shannon_radii_data is None:
_element_shannon_radii_data = {}
with open(os.path.join(data_directory, "shannon_radii.csv")) as file:
reader = csv.reader(file)
# Skip the first row (headers).
next(reader)
for row in reader:
# For the shannon radii, there are multiple datasets for
# different element/oxidation-state/coordination
# combinations.
key = row[0]
dataset = {
"charge": int(row[1]),
"coordination": row[2],
"crystal_radius": float(row[3]),
"ionic_radius": float(row[4]),
"comment": row[5],
}
if key in _element_shannon_radii_data:
_element_shannon_radii_data[key].append(dataset)
else:
_element_shannon_radii_data[key] = [dataset]
if symbol in _element_shannon_radii_data:
if copy:
# _element_shannon_radii_data stores a list of dictionaries
# -> if copy is set, copy the list and use the dict.copy()
# function on each element.
# The dictionary values are all Python "value types", so
# nothing further is required to make a deep copy.
return [item.copy() for item in _element_shannon_radii_data[symbol]]
else:
return _element_shannon_radii_data[symbol]
else:
if _print_warnings:
print(
"WARNING: Shannon-radius data for element {} not "
"found.".format(symbol)
)
return None
# Loader and cache for the machine-learned extended element Shannon radii datasets.
_element_shannon_radii_data_extendedML = None
[docs]def lookup_element_shannon_radius_data_extendedML(symbol, copy=True):
"""
Retrieve the machine learned extended Shannon radii for
known states of an element.
Retrieve Shannon radii for known oxidation states and coordination
environments of an element.
Source of extended radii is:
Baloch, A.A., Alqahtani, S.M., Mumtaz, F., Muqaibel, A.H., Rashkeev,
S.N. and Alharbi, F.H., 2021.
Extending Shannon's Ionic Radii Database Using Machine Learning.
arXiv preprint arXiv:2101.00269.
Args:
symbol (str) : the atomic symbol of the element to look up.
copy (Optional(bool)): if True (default), return a copy of the data
dictionary, rather than a reference to the cached object --
only use copy=False in performance-sensitive code and where
you are certain the dictionary will not be modified!
Returns:
list:
Extended Shannon radii datasets.
Returns None if the element was not found among the external
data.
Shannon radii datasets are dictionaries with the keys:
charge
*int* charge
coordination
*int* coordination
ionic_radius
*float*
comment
*str*
"""
global _element_shannon_radii_data_extendedML
if _element_shannon_radii_data_extendedML is None:
_element_shannon_radii_data_extendedML = {}
with open(
os.path.join(data_directory, "shannon_radii_ML_extended.csv")
) as file:
reader = csv.reader(file)
# Skip the first row (headers).
next(reader)
for row in reader:
# For the shannon radii, there are multiple datasets for
# different element/oxidation-state/coordination
# combinations.
key = row[0]
dataset = {
"charge": int(row[1]),
"coordination": row[2],
"crystal_radius": float(row[3]),
"ionic_radius": float(row[4]),
"comment": row[5],
}
if key in _element_shannon_radii_data_extendedML:
_element_shannon_radii_data_extendedML[key].append(dataset)
else:
_element_shannon_radii_data_extendedML[key] = [dataset]
if symbol in _element_shannon_radii_data_extendedML:
if copy:
# _element_shannon_radii_data_extendedML stores a list of dictionaries
# -> if copy is set, copy the list and use the dict.copy()
# function on each element.
# The dictionary values are all Python "value types", so
# nothing further is required to make a deep copy.
return [
item.copy()
for item in _element_shannon_radii_data_extendedML[symbol]
]
else:
return _element_shannon_radii_data_extendedML[symbol]
else:
if _print_warnings:
print(
"WARNING: Extended Shannon-radius data for element {} not "
"found.".format(symbol)
)
return None
# Loader and cache for the element solid-state energy (SSE) datasets.
_element_ssedata = None
[docs]def lookup_element_sse_data(symbol):
"""
Retrieve the solid-state energy (SSE) data for an element.
Taken from J. Am. Chem. Soc., 2011, 133 (42), pp 16852-16960,
DOI: 10.1021/ja204670s
Args:
symbol : the atomic symbol of the element to look up.
Returns:
list : SSE datasets for the element, or None
if the element was not found among the external data.
SSE datasets are dictionaries with the keys:
AtomicNumber
*int*
SolidStateEnergy
*float* SSE
IonisationPotential
*float*
ElectronAffinity
*float*
MullikenElectronegativity
*str*
SolidStateRenormalisationEnergy
*float*
"""
global _element_ssedata
if _element_ssedata is None:
_element_ssedata = {}
with open(os.path.join(data_directory, "SSE.csv")) as file:
reader = csv.reader(file)
for row in reader:
dataset = {
"AtomicNumber": int(row[1]),
"SolidStateEnergy": float(row[2]),
"IonisationPotential": float(row[3]),
"ElectronAffinity": float(row[4]),
"MullikenElectronegativity": float(row[5]),
"SolidStateRenormalisationEnergy": float(row[6]),
}
_element_ssedata[row[0]] = dataset
if symbol in _element_ssedata:
return _element_ssedata[symbol]
else:
if _print_warnings:
print(
"WARNING: Solid-state energy data for element {} not"
" found.".format(symbol)
)
return None
# Loader and cache for the revised (2015) element solid-state energy
# (SSE) datasets.
_element_sse2015_data = None
[docs]def lookup_element_sse2015_data(symbol, copy=True):
"""
Retrieve SSE (2015) data for element in oxidation state.
Retrieve the solid-state energy (SSE2015) data for an element in an
oxidation state. Taken from J. Solid State Chem., 2015, 231,
pp138-144, DOI: 10.1016/j.jssc.2015.07.037.
Args:
symbol : the atomic symbol of the element to look up.
copy: if True (default), return a copy of the data dictionary,
rather than a reference to a cached object -- only use
copy=False in performance-sensitive code and where you are
certain the dictionary will not be modified!
Returns:
list : SSE datasets for the element, or None
if the element was not found among the external data.
SSE datasets are dictionaries with the keys:
OxidationState
*int*
SolidStateEnergy2015
*float* SSE2015
"""
global _element_sse2015_data
if _element_sse2015_data is None:
_element_sse2015_data = {}
with open(os.path.join(data_directory, "SSE_2015.csv")) as file:
reader = csv.reader(file)
for row in reader:
# Elements can have multiple SSE values depending on
# their oxidation state
key = row[0]
dataset = {
"OxidationState": int(row[1]),
"SolidStateEnergy2015": float(row[2]),
}
if key in _element_sse2015_data:
_element_sse2015_data[key].append(dataset)
else:
_element_sse2015_data[key] = [dataset]
if symbol in _element_sse2015_data:
if copy:
return [item.copy() for item in _element_sse2015_data[symbol]]
else:
return _element_sse2015_data[symbol]
else:
if _print_warnings:
print(
"WARNING: Solid-state energy (revised 2015) data for "
"element {} not found.".format(symbol)
)
return None
# Loader and cache for the element solid-state energy (SSE) from Pauling
# electronegativity datasets.
_element_ssepauling_data = None
[docs]def lookup_element_sse_pauling_data(symbol):
"""Retrieve Pauling SSE data
Retrieve the solid-state energy (SSEPauling) data for an element
from the regression fit when SSE2015 is plotted against Pauling
electronegativity. Taken from J. Solid State Chem., 2015, 231,
pp138-144, DOI: 10.1016/j.jssc.2015.07.037
Args:
symbol (str) : the atomic symbol of the element to look up.
Returns: A dictionary containing the SSE2015 dataset for the
element, or None if the element was not found among the external
data.
"""
global _element_ssepauling_data
if _element_ssepauling_data is None:
_element_ssepauling_data = {}
with open(os.path.join(data_directory, "SSE_Pauling.csv")) as file:
reader = csv.reader(file)
for row in reader:
dataset = {"SolidStateEnergyPauling": float(row[1])}
_element_ssepauling_data[row[0]] = dataset
if symbol in _element_ssepauling_data:
return _element_ssepauling_data[symbol]
else:
if _print_warnings:
print(
"WARNING: Solid-state energy data from Pauling "
" electronegativity regression fit for "
" element {} not found.".format(symbol)
)
return None