"""Minimalist structure representation for comprehensible manipulation."""
import logging
import re
from collections import defaultdict
from functools import reduce
from math import gcd
from operator import itemgetter
from typing import Dict, List, Optional, Tuple, Union
import numpy as np
import pymatgen
from pymatgen.analysis.bond_valence import BVAnalyzer
from pymatgen.ext.matproj import MPRester
from pymatgen.transformations.standard_transformations import (
OxidationStateDecorationTransformation,
)
import smact
from . import logger
from .utilities import convert_next_gen_mprest_data, get_sign
[docs]class SmactStructure:
"""SMACT implementation inspired by pymatgen Structure class.
Handles basic structural and compositional information for a compound.
Includes a lossless POSCAR-style specification for storing structures,
allowing structures to be stored in files or databases, or to be pulled
from the `Materials Project <https://www.materialsproject.org>`_.
Attributes:
species: A list of tuples describing the composition of the structure,
stored as (element, oxidation, stoichiometry). The list is sorted
alphabetically based on element symbol, and identical elements
are sorted with highest charge first.
lattice_mat: A numpy 3x3 array containing the lattice vectors.
sites: A dictionary of {species: coords}, where species is a string
representation of the species and coords is a list of position
vectors, given as lists of length 3. For example:
>>> s = SmactStructure.from_file('tests/files/NaCl.txt')
>>> s.sites
{'Cl1-': [[2.323624165, 1.643050405, 4.02463512]], 'Na1+': [[0.0, 0.0, 0.0]]}
lattice_param: The lattice parameter.
"""
def __init__(
self,
species: List[Union[Tuple[str, int, int], Tuple[smact.Species, int]]],
lattice_mat: np.ndarray,
sites: Dict[str, List[List[float]]],
lattice_param: Optional[float] = 1.0,
sanitise_species: Optional[bool] = True,
):
"""Initialize structure with constituent species.
Args:
species: See :class:`~.SmactStructure`. May be supplied as either a list of
(element, oxidation, stoichiometry) or (:class:`~smact.Species`, stoichiometry).
lattice_mat: See :class:`~.SmactStructure`.
sites: See :class:`~.SmactStructure`.
lattice_param: See :class:`~.SmactStructure`.
sanitise_species: Whether to sanitise check species. Should be `True` unless
species have already been sanitised by a different constructor like
:meth:`~.from_mp`.
"""
self.species = (
self._sanitise_species(species) if sanitise_species else species
)
self.lattice_mat = lattice_mat
self.sites = {
spec: sites[spec] for spec in self.get_spec_strs()
} # Sort sites
self.lattice_param = lattice_param
def __repr__(self):
"""Represent the structure as a POSCAR.
Alias for :meth:`~.as_poscar`.
"""
return self.as_poscar()
def __eq__(self, other):
"""Determine equality of SmactStructures based on their attributes.
:attr:`~.species`, :attr:`~.lattice_mat`, :attr:`~.lattice_param` and
:attr:`~.sites` must all be equal for the comparison to be True.
Note:
For the SmactStructures to be equal their attributes must be
*identical*. For example, it is insufficient that the two
structures have the same space group or the same species;
the site coordinates must be equal also.
"""
if not isinstance(other, SmactStructure):
return False
return all(
[
self.species == other.species,
np.array_equal(self.lattice_mat, other.lattice_mat),
self.lattice_param == other.lattice_param,
self.sites == other.sites,
list(self.sites.keys()) == list(other.sites.keys()),
]
)
@staticmethod
def _sanitise_species(
species: List[Union[Tuple[str, int, int], Tuple[smact.Species, int]]],
) -> List[Tuple[str, int, int]]:
"""Sanitise and format a list of species.
Args:
species: See :meth:`~.__init__`.
Returns:
sanit_species: Sanity-checked species in the format of
a list of (element, oxidation, stoichiometry).
Raises:
TypeError: species contains the wrong types.
ValueError: species is either empty or contains tuples of
incorrect length.
"""
if not isinstance(species, list):
raise TypeError(f"`species` must be a list, got {type(species)}.")
if len(species) == 0:
raise ValueError("`species` cannot be empty.")
if not isinstance(species[0], tuple):
raise TypeError(
f"`species` must be a list of tuples, got list of {type(species[0])}."
)
species_error = (
"`species` list of tuples must contain either "
"2-tuples of Species objects and stoichiometries, "
"or 3-tuples of elements, oxidations and stoichiometries."
)
if len(species[0]) not in {2, 3}:
raise ValueError(species_error)
if isinstance(species[0][0], str): # String variation of instantiation
species.sort(key=itemgetter(1), reverse=True)
species.sort(key=itemgetter(0))
sanit_species = species
elif isinstance(
species[0][0], smact.Species
): # Species class variation of instantiation
species.sort(key=lambda x: (x[0].symbol, -x[0].oxidation))
sanit_species = [
(x[0].symbol, x[0].oxidation, x[1]) for x in species
]
else:
raise TypeError(species_error)
return sanit_species
@staticmethod
def __parse_py_sites(
structure: pymatgen.core.Structure,
) -> Tuple[Dict[str, List[List[float]]], List[Tuple[str, int, int]]]:
"""Parse the sites of a pymatgen Structure.
Args:
structure: A :class:`pymatgen.core.Structure` instance.
Returns:
sites (dict): In which a key is a species string
and its corresponding value is a list of the coordinates
that species occupies in the supercell. The coordinates
are represented by lists containing three elements: one
for each spatial dimension.
species (list): A list of each species in the structure,
represented by a tuple of (element, charge, stoichiometry).
"""
if not isinstance(structure, pymatgen.core.Structure):
raise TypeError(
"structure must be a pymatgen.core.Structure instance."
)
sites = defaultdict(list)
for site in structure.sites:
site_type = site.species_string
# Add charge magnitude, for cases of unit charge
if all(
[
site_type[-2] not in map(str, range(10)),
site_type[-1] in ("+", "-"),
]
):
site_type = site_type[:-1] + "1" + site_type[-1]
sites[site_type].append(site.coords.tolist())
sites = dict(sites)
# Find stoichiometry
total_specs = [len(val) for val in sites.values()]
hcf = reduce(gcd, total_specs)
total_specs = [int(x / hcf) for x in total_specs]
species = []
for spec, stoic in zip(sites.keys(), total_specs):
charge_match = re.search(r"\d+", spec)
if charge_match:
charge_loc = charge_match.start()
symb = spec[:charge_loc]
charge = int(spec[-1] + spec[charge_loc:-1])
else:
symb = spec
charge = 0
species.append((symb, charge, stoic))
return sites, species
[docs] @staticmethod
def from_py_struct(
structure: pymatgen.core.Structure, determine_oxi: str = "BV"
):
"""Create a SmactStructure from a pymatgen Structure object.
Args:
structure: A pymatgen Structure.
determine_oxi (str): The method to determine the assignments oxidation states in the structure.
Options are 'BV', 'comp_ICSD','both' for determining the oxidation states by bond valence,
ICSD statistics or trial both sequentially, respectively.
Returns:
:class:`~.SmactStructure`
"""
if not isinstance(structure, pymatgen.core.Structure):
raise TypeError(
"Structure must be a pymatgen.core.Structure instance."
)
if determine_oxi == "BV":
bva = BVAnalyzer()
struct = bva.get_oxi_state_decorated_structure(structure)
elif determine_oxi == "comp_ICSD":
comp = structure.composition
oxi_transform = OxidationStateDecorationTransformation(
comp.oxi_state_guesses()[0]
)
struct = oxi_transform.apply_transformation(structure)
print("Charge assigned based on ICSD statistics")
elif determine_oxi == "both":
try:
bva = BVAnalyzer()
struct = bva.get_oxi_state_decorated_structure(structure)
print("Oxidation states assigned using bond valence")
except ValueError:
comp = structure.composition
oxi_transform = OxidationStateDecorationTransformation(
comp.oxi_state_guesses()[0]
)
struct = oxi_transform.apply_transformation(structure)
print("Oxidation states assigned based on ICSD statistics")
elif determine_oxi == "predecorated":
struct = structure
else:
raise ValueError(
f"Argument for 'determine_oxi', <{determine_oxi}> is not valid. Choose either 'BV','comp_ICSD','both' or 'predecorated'."
)
sites, species = SmactStructure.__parse_py_sites(struct)
lattice_mat = struct.lattice.matrix
lattice_param = 1.0
return SmactStructure(
species,
lattice_mat,
sites,
lattice_param,
sanitise_species=True,
)
[docs] @staticmethod
def from_mp(
species: List[Union[Tuple[str, int, int], Tuple[smact.Species, int]]],
api_key: str,
determine_oxi: str = "BV",
):
"""Create a SmactStructure using the first Materials Project entry for a composition.
Args:
species: See :meth:`~.__init__`.
determine_oxi (str): The method to determine the assignments oxidation states in the structure.
Options are 'BV', 'comp_ICSD','both' for determining the oxidation states by bond valence,
ICSD statistics or trial both sequentially, respectively.
api_key: A www.materialsproject.org API key.
Returns:
:class:`~.SmactStructure`
"""
sanit_species = SmactStructure._sanitise_species(species)
with MPRester(api_key) as m:
eles = SmactStructure._get_ele_stoics(sanit_species)
formula = "".join(f"{ele}{stoic}" for ele, stoic in eles.items())
try:
# Legacy API routine
structs = m.query(
criteria={"reduced_cell_formula": formula},
properties=["structure"],
)
except NotImplementedError:
# New API routine
docs = m.summary.search(formula=formula, fields=["structure"])
structs = [convert_next_gen_mprest_data(doc) for doc in docs]
if len(structs) == 0:
raise ValueError(
"Could not find composition in Materials Project Database, "
"please supply a structure."
)
# Default to first found structure
struct = structs[0]["structure"]
if 0 not in (
spec[1] for spec in sanit_species
): # If everything's charged
if determine_oxi == "BV":
bva = BVAnalyzer()
struct = bva.get_oxi_state_decorated_structure(struct)
elif determine_oxi == "comp_ICSD":
comp = struct.composition
oxi_transform = OxidationStateDecorationTransformation(
comp.oxi_state_guesses()[0]
)
struct = oxi_transform.apply_transformation(struct)
print("Charge assigned based on ICSD statistics")
elif determine_oxi == "both":
try:
bva = BVAnalyzer()
struct = bva.get_oxi_state_decorated_structure(struct)
print("Oxidation states assigned using bond valence")
except ValueError:
comp = struct.composition
oxi_transform = OxidationStateDecorationTransformation(
comp.oxi_state_guesses()[0]
)
struct = oxi_transform.apply_transformation(struct)
print("Oxidation states assigned based on ICSD statistics")
else:
raise ValueError(
f"Argument for 'determine_oxi', <{determine_oxi}> is not valid. Choose either 'BV','comp_ICSD' or 'both'."
)
lattice_mat = struct.lattice.matrix
lattice_param = 1.0 # TODO Use actual lattice parameter
sites, _ = SmactStructure.__parse_py_sites(struct)
return SmactStructure(
sanit_species,
lattice_mat,
sites,
lattice_param,
sanitise_species=False,
)
[docs] @staticmethod
def from_file(fname: str):
"""Create SmactStructure from a POSCAR file.
Args:
fname: The name of the POSCAR file.
See :meth:`~.as_poscar` for format specification.
Returns:
:class:`~.SmactStructure`
"""
with open(fname) as f:
return SmactStructure.from_poscar(f.read())
[docs] @staticmethod
def from_poscar(poscar: str):
"""Create SmactStructure from a POSCAR string.
Args:
poscar: A SMACT-formatted POSCAR string.
See :meth:`~.as_poscar` for format specification.
Returns:
:class:`~.SmactStructure`
"""
lines = poscar.split("\n")
# Find stoichiometry
total_specs = [int(x) for x in lines[6].split(" ")]
hcf = reduce(gcd, total_specs)
total_specs = [int(x / hcf) for x in total_specs]
species = []
for spec_str, stoic in zip(lines[0].split(" "), total_specs):
charge_match = re.search(r"\d+", spec_str)
if charge_match:
charge_loc = charge_match.start()
symb = spec_str[:charge_loc]
charge = int(spec_str[-1] + spec_str[charge_loc:-1])
else:
symb = spec_str
charge = 0
species.append((symb, charge, stoic))
lattice_param = float(lines[1])
lattice = np.array(
[[float(point) for point in line.split(" ")] for line in lines[2:5]]
)
sites = defaultdict(list)
for line in lines[8:]:
if not line: # EOF
break
split_line = line.split(" ")
coords = [float(x) for x in split_line[:3]]
spec = split_line[-1]
sites[spec].append(coords)
sites = dict(sites)
return SmactStructure(species, lattice, sites, lattice_param)
def _format_style(
self,
template: str,
delim: Optional[str] = " ",
include_ground: Optional[bool] = False,
) -> str:
"""Format a given template string with the composition.
Formats a python template string with species information,
with each species separated by a given delimiter.
Args:
template: Template string to format, using python's
curly brackets notation. Supported keywords are
`ele` for the elemental symbol, `stoic` for the
stoichiometry, `charge` for the absolute value
of oxidation state and `sign` for the
oxidation state's sign.
delim: The delimeter between species' templates.
include_ground: Whether to include the charge and sign
of neutral species.
Returns:
String of templates formatted for each species, separated
by `delim`.
Examples:
>>> s = SmactStructure.from_file('tests/files/CaTiO3.txt')
>>> template = '{stoic}x{ele}{charge}{sign}'
>>> print(s._format_style(template))
1xCa2+ 3xO2- 1xTi4+
"""
if include_ground:
return delim.join(
template.format(
ele=specie[0],
stoic=specie[2],
charge=abs(specie[1]),
sign="+" if specie[1] >= 0 else "-",
)
for specie in self.species
)
return delim.join(
template.format(
ele=specie[0],
stoic=specie[2],
charge=abs(specie[1]) if specie[1] != 0 else "",
sign=get_sign(specie[1]),
)
for specie in self.species
)
@staticmethod
def _get_ele_stoics(species: List[Tuple[str, int, int]]) -> Dict[str, int]:
"""Get the number of each element type in the compound, irrespective of oxidation state.
Args:
species: See :meth:`~.__init__`.
Returns:
eles: Dictionary of {element: stoichiometry}.
Examples:
>>> species = [('Fe', 2, 1), ('Fe', 3, 2), ('O', -2, 4)]
>>> print(SmactStructure._get_ele_stoics(species))
{'Fe': 3, 'O': 4}
"""
eles = defaultdict(int)
for specie in species:
eles[specie[0]] += specie[2]
return dict(eles)
[docs] def has_species(self, species: Tuple[str, int]) -> bool:
"""Determine whether a given species is in the structure."""
return species in map(itemgetter(0, 1), self.species)
[docs] def get_spec_strs(self) -> List[str]:
"""Get string representations of the constituent species.
Returns:
A list of strings, formatted as '{element}{charge}{sign}'.
Examples:
>>> s = SmactStructure.from_file('tests/files/CaTiO3.txt')
>>> s.get_spec_strs()
['Ca2+', 'O2-', 'Ti4+']
"""
return self._format_style("{ele}{charge}{sign}").split(" ")
[docs] def composition(self) -> str:
"""Generate a key that describes the composition.
Key format is '{element}_{stoichiometry}_{charge}{sign}'
with no delimiter, *sans brackets*. Species are ordered as stored within
the structure, see :class:`~.SmactStructure`.
Returns:
Key describing constituent species.
Examples:
>>> s = SmactStructure.from_file('tests/files/CaTiO3.txt')
>>> print(s.composition())
Ca_1_2+O_3_2-Ti_1_4+
"""
comp_style = "{ele}_{stoic}_{charge}{sign}"
return self._format_style(comp_style, delim="", include_ground=True)
[docs] def as_poscar(self) -> str:
"""Represent the structure as a POSCAR file compatible with VASP5.
The POSCAR format adopted is as follows:
The first line contains the species' names separated by a whitespace.
The second through fourth line, inclusive, contain the lattice
matrix: each line contains a lattice vector, with elements
separated by a whitespace.
The fifth line contains the elements' names separated by a whitespace.
If more than one oxidation state exists for an element, the element
appears multiple times; once for each oxidation state.
The sixth line is the string 'Cartesian'.
The seventh line onwards are the Cartesian coordinates of each site,
separated by a whitespace. In addition, at the end of each line is the
species' name, separated by a whitespace.
For examples of this format, see the text files under tests/files.
Returns:
str: POSCAR-style representation of the structure.
"""
poscar = " ".join(self.get_spec_strs()) + "\n"
poscar += f"{self.lattice_param}\n"
poscar += (
"\n".join(
" ".join(map(str, vec)) for vec in self.lattice_mat.tolist()
)
+ "\n"
)
spec_count = {spec: len(coords) for spec, coords in self.sites.items()}
poscar += self._format_style("{ele}") + "\n"
poscar += (
" ".join(str(spec_count[spec]) for spec in self.get_spec_strs())
+ "\n"
)
poscar += "Cartesian\n"
for spec, coords in self.sites.items():
for coord in coords:
poscar += " ".join(map(str, coord))
poscar += f" {spec}\n"
return poscar