Source code for smact.utils.species

"""Pure-string utilities for parsing and formatting SMACT species strings."""

from __future__ import annotations

import re

__all__ = ["get_sign", "parse_spec", "unparse_spec"]

_SPEC_RE = re.compile(r"([A-Za-z]+)([0-9]*[\+\-])")
_ELE_RE = re.compile(r"[A-Za-z]+")
_CHARGE_RE = re.compile(r"\d+")


[docs] def parse_spec(species: str) -> tuple[str, int]: """ Parse a species string into its atomic symbol and oxidation state. :param species: the species string :return: a tuple of the atomic symbol and oxidation state """ m = _SPEC_RE.match(species) if m is None: return _parse_spec_old(species) ele, oxi_state = m.groups() # The regex guarantees oxi_state ends in '+' or '-', so this branch # is always taken; the else clause is unreachable by construction. charge = (int(oxi_state[:-1] or 1)) * (-1 if "-" in oxi_state else 1) return ele, charge
def _parse_spec_old(species: str) -> tuple[str, int]: """ Parse a species string into its atomic symbol and oxidation state. :param species: the species string :return: a tuple of the atomic symbol and oxidation state """ match = _ELE_RE.match(species) if match is None: msg = f"Invalid species string (no element symbol found): {species!r}" raise ValueError(msg) ele = match.group(0) charge_match = _CHARGE_RE.search(species) ox_state = int(charge_match.group(0)) if charge_match else 0 if "-" in species: ox_state *= -1 # Handle cases of X+ or X- (instead of X1+ or X1-) as well as X0+ / X0-. # Short-circuit order matters: "0" wins over bare "+" or "-" so that "X0+" # correctly returns 0 rather than +1. if ox_state == 0 and "0" in species: ox_state = 0 elif "+" in species and ox_state == 0: ox_state = 1 elif ox_state == 0 and "-" in species: ox_state = -1 return ele, ox_state
[docs] def unparse_spec(species: tuple[str, int], include_one: bool = True) -> str: """Unparse a species into a string representation. The analogue of :func:`parse_spec`. Args: species (tuple[str,int]): A tuple of (element, signed_charge). include_one (bool): If True, include charge of 1 in the output if charge is 1 or -1. Returns: ------- String of {element}{absolute_charge}{sign}. Examples: -------- >>> unparse_spec(("Fe", 2)) 'Fe2+' >>> unparse_spec(("O", -2)) 'O2-' """ if include_one or abs(species[1]) != 1: return f"{species[0]}{abs(species[1])}{get_sign(species[1])}" return f"{species[0]}{get_sign(species[1])}"
[docs] def get_sign(charge: int) -> str: """ Get string representation of a number's sign. Args: ---- charge (int): The number whose sign to derive. Returns: ------- sign (str): either '+', '-', or '' for neutral. """ if charge > 0: return "+" if charge < 0: return "-" return ""