Source code for CoREMOF.mosaec

try:
    from typing import Literal
except ImportError:
    from typing_extensions import Literal

import math

import mendeleev

from ccdc import io
from ccdc import crystal
from ccdc import molecule

from ccdc.crystal import Crystal
from ccdc.molecule import Atom, Bond, Molecule

from ccdc import descriptors


[docs]
def readentry(input_cif: str) -> Crystal:
    """
    Reads a CIF file containing structure data and converts it to a
    standard atom labeling convention using the ccdc.crystal module.

    Parameters:
        input_cif (str): filename (.CIF) containing crystal structure data.

    Returns:
        newcif (ccdc.crystal.Crystal): Crystal object containing structural data
                                       in the standard atom labeling convention.
    """
    # read in the cif to a crystal object
    with io.CrystalReader(input_cif, format="cif") as readcif:
        cif = readcif[0]
    readcif.close()

    # to remove duplicate atoms, need the empirical formula
    formula = cif.formula
    elamnt = formula.split(" ")

    # now convert to standard labelling convention and identify
    # duplicate atoms to be removed
    with open(input_cif, "r") as file:
        file.seek(0)
        newstring = str()
        lines = file.readlines()
        loop_pos = 0
        start = 0
        end = 0
        columncount = 0
        type_pos = 0
        label_pos = 0
        x_pos = 0
        y_pos = 0
        z_pos = 0
        for i, line in enumerate(lines):
            lines[i] = lines[i].lstrip()
        for i, line in enumerate(lines):
            # locate atom type and site label columns
            if "loop_" in line:
                loop_pos = i
            if ("_atom" in line) and (not "_geom" in line) and (not "_aniso" in line):
                start = loop_pos + 1
                end = i + 1
        for i in range(start, end):
            if "atom_site_type_symbol" in lines[i]:
                type_pos = columncount
            if "atom_site_label" in lines[i]:
                label_pos = columncount
            columncount += 1
        counting = {}
        cutoff = {}
        to_remove = []
        for i in range(end, len(lines)):
            if "loop_" in lines[i]:
                break
            # lines with atom information will contain a ., so only look at these
            if "." in lines[i]:
                # split lines by whitespace
                col = lines[i].split()
                # keep count of how many of each element type
                if not col[type_pos] in counting:
                    counting[col[type_pos]] = 1
                elif col[type_pos] in counting:
                    counting[col[type_pos]] += 1
                # new atom labels
                newlabel = f"{col[type_pos]}{counting[col[type_pos]]}"
                lines[i] = lines[i].replace(col[label_pos], newlabel)
                # cutoff repeated atoms
                if newlabel in elamnt:
                    cutoff[col[type_pos]] = counting[col[type_pos]]
                if col[type_pos] in cutoff:
                    if counting[col[type_pos]] > cutoff[col[type_pos]]:
                        to_remove.append(lines[i])
        # remove unnecessary atoms
        for i in to_remove:
            lines.remove(i)
        # combine to new string
        for i in lines:
            newstring += i
        # read into new crystal object and assign bonds
        newcif = crystal.Crystal.from_string(newstring, format="cif")
        newcif.assign_bonds()
        file.close()
    return newcif




[docs]
def readSBU(input_mol2: str) -> Crystal:
    """
    Reads a MOL2 file containing SBU/metal complex structural data
    and converts it to a standard atom labeling convention using
    the ccdc.crystal module.

    Parameters:
        input_mol2 (str): filename (.mol2) containing SBU/metal complex
                          structural data.

    Returns:
        mol (ccdc.crystal.Crystal): Crystal object containing structural data
                                    in the standard atom labeling convention.
    """
    # First have to convert the connection points (unknown atom types)
    # to H. Has to be done as a string so CSD reader can
    # interpret bonding
    with open(input_mol2, "r") as file:
        file.seek(0)
        newstring = str()
        lines = file.readlines()
        for i, line in enumerate(lines):
            if "*" in line:
                lines[i] = lines[i].replace("Du", "H")
            newstring += lines[i]
        cif = crystal.Crystal.from_string(newstring, format="mol2")
        cif.assign_bonds()
        file.close()

    mol = cif.molecule
    # MOSAEC needs atoms to have unique labels, so make them unique
    count = 1
    for atom in mol.atoms:
        atom.label = f"{atom.label}{count}"
        count += 1

    return mol




[docs]
def read_CSD_entry(input_refcode: str) -> Crystal:
    """
    Read entries directly from the CSD CrystalReader according to CSD refcode.

    Parameters:
        input_refcode (str): string used to identify materials in the CSD.

    Returns:
        cif (ccdc.crystal.Crystal): Crystal object containing structural data
                                    in the standard atom labeling convention.
    """
    # read in the cif to a crystal object
    csd_crystal_reader = io.CrystalReader("CSD")
    cif = csd_crystal_reader.crystal(input_refcode)
    cif.assign_bonds()
    csd_crystal_reader.close()
    return cif




[docs]
def get_no_metal_molecule(inputmolecule: Molecule) -> Molecule:
    """
    Remove metal atoms from the input Molecule object.

    Parameters:
        inputmolecule (ccdc.molecule.Molecule): original Molecule object.

    Returns:
        workingmol (ccdc.molecule.Molecule): Molecule object with all metal
                                             atoms removed.
    """
    workingmol = inputmolecule.copy()
    for atom in workingmol.atoms:
        if atom.is_metal:
            workingmol.remove_atom(atom)
    workingmol.assign_bond_types(which="All")
    return workingmol




[docs]
def get_unique_sites(mole: Molecule, asymmole: Molecule) -> list[Atom]:
    """
    Get the unique atoms in a structure belonging to the asymmetric unit.

    Parameters:
        mole (ccdc.molecule.Molecule): original structure Molecule object.
        asymmole (ccdc.molecule.Molecule): asymmetric unit of the structure.

    Returns:
        uniquesites (list[ccdc.molecule.Atom]): list of unique atoms in the structure
                                                that belong to the asymmetric unit.
    """
    # blank list for unique sites
    uniquesites = []
    labels = []
    asymmcoords = []
    molecoords = []
    duplicates = []
    for atom in asymmole.atoms:
        asymmcoords.append(atom.coordinates)
    for atom in mole.atoms:
        if atom.coordinates in asymmcoords:
            if not atom.coordinates in molecoords:
                if not atom.label in labels:
                    uniquesites.append(atom)
                    molecoords.append(atom.coordinates)
                    labels.append(atom.label)
                else:
                    duplicates.append(atom)
            else:
                duplicates.append(atom)
    if len(duplicates) >= 1:
        for datom in duplicates:
            for atom in uniquesites:
                if any(
                    [
                        (datom.coordinates == atom.coordinates),
                        (datom.label == atom.label),
                    ]
                ):
                    if datom.atomic_symbol == atom.atomic_symbol:
                        if len(datom.neighbours) > len(atom.neighbours):
                            uniquesites.remove(atom)
                            uniquesites.append(datom)
                    if not datom.label in labels:
                        uniquesites.append(datom)
                        labels.append(datom.label)
    return uniquesites




[docs]
def get_metal_sites(sites: list[Atom]) -> list[Atom]:
    """
    Get the metal sites in a structure belonging to the asymmetric unit.

    Parameters:
        sites (list[ccdc.molecule.Atom]): list of unique atoms in the structure
                                          that belong to the asymmetric unit.

    Returns:
        metalsites (list[ccdc.molecule.Atom]): list of metal sites in the structure
                                               that belong to the asymmetric unit.
    """
    metalsites = []
    for site in sites:
        if site.is_metal == True:
            metalsites.append(site)
    return metalsites




[docs]
def get_ligand_sites(
    metalsites: list[Atom], sites: list[Atom]
) -> dict[Atom, list[Atom]]:
    """
    Get the ligand sites binding each metal atom in a structure.

    Parameters:
        metalsites (list[ccdc.molecule.Atom]): list of metal sites in the structure
                                               that belong to the asymmetric unit.
        sites (list[ccdc.molecule.Atom]):  list of unique atoms in the structure
                                           that belong to the asymmetric unit.

    Returns:
        metal_sphere (dict[ccdc.molecule.Atom, list[ccdc.molecule.Atom]]):
                        dictionary with metal Atom object as keys and the the list
                        of ligand atoms which bind them as values.
    """
    metal_sphere = {}
    for metal in metalsites:
        sphere1 = []
        for ligand in metal.neighbours:
            if not ligand.is_metal == True:
                for site in sites:
                    if ligand.label == site.label:
                        sphere1.append(site)
        metal_sphere[metal] = sphere1
    return metal_sphere




[docs]
def get_binding_sites(metalsites: list[Atom], uniquesites: list[Atom]) -> list[Atom]:
    """
    Get the binding sites in a structure, given the list of unique metal atoms
    and all unique atoms.

    Parameters:
        metalsites (list[ccdc.molecule.Atom]): list of unique metal atoms.
        uniquesites (list[ccdc.molecule.Atom]): list of unique atoms.

    Returns:
        binding_sites (list[ccdc.molecule.Atom]): list of binding sites connecting
                                                  metal atoms and ligands.
    """
    binding_sites = set()
    for metal in metalsites:
        for ligand in metal.neighbours:
            for site in uniquesites:
                if ligand.label == site.label:
                    binding_sites.add(site)
    return binding_sites




[docs]
def ringVBOs(mole: Molecule) -> dict[int, int]:
    """
    Calculates the VBO (valence bond order) for each atom in the structure.

    Parameters:
        mole (ccdc.molecule.Molecule): Molecule object representing the structure.

    Returns:
        ringVBO (dict[int, int]): dictionary with each atom's index in mole.atoms
                                  as keys and VBO (valence bond order) as values.
    """
    ringVBO = {}
    unassigned = mole.atoms
    ringcopy = mole.copy()
    oncycle_atoms = []
    offcycle_atoms = []
    oncycle_labels = []
    offcycle_labels = []
    cyclic_periodic = []
    cyclic_periodic_labels = []
    offcycle_periodic = []

    # remove all the metals, this
    # prevents metal-containing rings (i.e. pores)
    # from interfering
    for atom in ringcopy.atoms:
        if atom.is_metal:
            ringcopy.remove_atom(atom)

    # collect all the cyclic atoms
    for atom in ringcopy.atoms:
        if atom.is_cyclic:
            if not atom in oncycle_atoms:
                oncycle_atoms.append(atom)
                oncycle_labels.append(atom.label)

    # we also need everything that the cyclic atoms are bound to
    for atom in oncycle_atoms:
        for neighbour in atom.neighbours:
            if not neighbour in oncycle_atoms:
                if not neighbour in offcycle_atoms:
                    offcycle_atoms.append(neighbour)
                    offcycle_labels.append(neighbour.label)

    # combine cyclic atoms and 1st coordination sphere
    cyclicsystem = oncycle_atoms + offcycle_atoms

    # initialize ringVBO dictionary
    for atom in unassigned:
        if atom.label in oncycle_labels:
            ringVBO[atom] = 0

    # CSD doesn't do periodic boundary conditions, need a workaround
    # check for any periodic copies of cyclic atoms
    for atom in ringcopy.atoms:
        if all([(not atom in oncycle_atoms), (atom.label in oncycle_labels)]):
            if not atom in cyclic_periodic:
                cyclic_periodic.append(atom)
                cyclic_periodic_labels.append(atom.label)
    for atom in cyclic_periodic:
        # print (atom.neighbours)
        for neighbour in atom.neighbours:
            if not neighbour in (offcycle_periodic + cyclic_periodic):
                if not neighbour.label in (oncycle_labels):
                    offcycle_periodic.append(neighbour)

    # remove every atom that isn't part of or directly bound to a cycle
    for atom in ringcopy.atoms:
        if not atom in (cyclicsystem + cyclic_periodic):
            ringcopy.remove_atom(atom)

    # find all non-cyclic bonds
    # single bonds between cycles, break and cap with H
    for bond in ringcopy.bonds:
        if not bond.is_cyclic:
            # bonds between cycles
            if all(
                [
                    (all((member.label in oncycle_labels for member in bond.atoms))),
                    (
                        all(
                            (
                                not member.label in cyclic_periodic_labels
                                for member in bond.atoms
                            )
                        )
                    ),
                ]
            ):
                member1 = bond.atoms[0]
                member2 = bond.atoms[1]
                Hcap1 = molecule.Atom("H", coordinates=member1.coordinates)
                Hcap2 = molecule.Atom("H", coordinates=member2.coordinates)
                Hcap1_id = ringcopy.add_atom(Hcap1)
                Hcap2_id = ringcopy.add_atom(Hcap2)
                ringcopy.add_bond(bond.bond_type, Hcap1_id, member2)
                ringcopy.add_bond(bond.bond_type, Hcap2_id, member1)
                ringcopy.remove_bond(bond)

    # cap off-cycle atoms
    for offatom in offcycle_atoms + offcycle_periodic:
        # get the VBO for each off-cycle atom
        # (VBO with respect to cyclic atoms)
        offVBO = 0

        # quick check for delocalized systems in the ring
        # if there are any, get the delocalised bond orders
        if any(bond.bond_type == "Delocalised" for bond in offatom.bonds):
            offdVBO = delocalisedLBO(ringcopy)
        # add the non-delocalized bond orders
        for bond in offatom.bonds:
            # only interested in bonds to cyclic atoms
            if any(batom.label in oncycle_labels for batom in bond.atoms):
                # Each bond contributes to Ligand Bond Order according to its type
                if bond.bond_type == "Single":
                    offVBO += 1
                elif bond.bond_type == "Double":
                    offVBO += 2
                elif bond.bond_type == "Triple":
                    offVBO += 3
                elif bond.bond_type == "Quadruple":
                    offVBO += 4
                elif bond.bond_type == "Delocalised":
                    offVBO += offdVBO[offatom]
                elif bond.bond_type == "Aromatic":
                    offVBO += 0
                    print("impossible Aromatic bond")
        # cap with appropriate element for VBO
        if offVBO == 1:
            offatom.atomic_symbol = "H"
        elif offVBO == 2:
            offatom.atomic_symbol = "O"
        elif offVBO == 3:
            offatom.atomic_symbol = "N"
        elif offVBO == 4:
            offatom.atomic_symbol = "C"
        elif offVBO == 5:
            offatom.atomic_symbol = "P"
        elif offVBO == 6:
            offatom.atomic_symbol = "S"
        elif offVBO > 6:
            print("no, that's too many")

    # for each cyclic system, reassign bonds, kekulize, and get VBO
    # the bond and atom pruning we did above ensures that fused cycles
    # will be treated as a single system
    # while non-fused cycles that are connected via bonding are treated
    # as seperate systems
    for cyclesys in ringcopy.components:
        # reassign bonds and kekulize
        cyclesys.assign_bond_types()
        cyclesys.kekulize()

        # porhpyrins and similar molecules are misassigned, we will code a hard fix
        # first identify and isolate the inner porphyrin(/like) atoms
        # (these atoms determine overall charge)
        # store these in a dictionary for later
        joining_atoms = dict()
        joining_rings = dict()
        subring_labels = dict()
        porphyrinatoms = dict()
        porphyrin_to_correct = set()
        # ring by ring
        for subring in cyclesys.rings:
            subring_labels[subring] = []
            # get a list of all atom labels in each subring
            for sratom in subring.atoms:
                subring_labels[subring].append(sratom.label)
            # check atom by atom
            for sratom in subring.atoms:
                # check each atom neighbour
                for srneighbour in sratom.neighbours:
                    srn_label = srneighbour.label
                    # if the neighbour is not part of the current ring
                    if not srn_label in subring_labels[subring]:
                        # if the nighbour IS a cyclic atom
                        # consider this a "joining atom"
                        if srn_label in oncycle_labels:
                            try:
                                joining_atoms[srn_label].append(subring)
                            except KeyError:
                                joining_atoms[srn_label] = [subring]
                            try:
                                joining_rings[subring].append(srn_label)
                            except KeyError:
                                joining_rings[subring] = [srn_label]
        for jring in joining_rings:
            if all([(len(jring) == 16), (jring.is_fully_conjugated)]):
                for patom in jring.atoms:
                    plabel = patom.label
                    if not plabel in joining_atoms:
                        ncyclicbonds = 0
                        for pbond in patom.bonds:
                            if pbond.is_cyclic:
                                ncyclicbonds += 1
                        if ncyclicbonds == 2:
                            try:
                                porphyrinatoms[jring].append(patom)
                            except KeyError:
                                porphyrinatoms[jring] = [patom]
        for porph in porphyrinatoms:
            if all(i.atomic_symbol == "N" for i in porphyrinatoms[porph]):
                protonated = 0
                for patom in porphyrinatoms[porph]:
                    if any(i.atomic_symbol == "H" for i in patom.neighbours):
                        protonated += 1
                if protonated == 0:
                    for patom in porphyrinatoms[porph]:
                        porphyrin_to_correct.add(patom.label)

        # quick check for delocalized systems in the ring
        # if there are any, get the delocalised bond orders
        if any(bond.bond_type == "Delocalised" for bond in cyclesys.bonds):
            rdVBO = delocalisedLBO(cyclesys)

        # assign VBO for each on-cycle atom
        for ratom in cyclesys.atoms:
            rVBO = 0
            if ratom.label in oncycle_labels:
                if ratom.label in porphyrin_to_correct:
                    rVBO -= 0.5
                for rbond in ratom.bonds:
                    # Each bond contributes to Ligand Bond Order
                    # according to its type except periodic copies
                    if any(
                        [
                            (rbond.is_cyclic),
                            (
                                not all(
                                    (
                                        mem.label in cyclic_periodic_labels
                                        for mem in rbond.atoms
                                    )
                                )
                            ),
                        ]
                    ):
                        if rbond.bond_type == "Single":
                            rVBO += 1
                        elif rbond.bond_type == "Double":
                            rVBO += 2
                        elif rbond.bond_type == "Triple":
                            rVBO += 3
                        elif rbond.bond_type == "Quadruple":
                            rVBO += 4
                        elif rbond.bond_type == "Delocalised":
                            rVBO += rdVBO[ratom]
                        elif rbond.bond_type == "Aromatic":
                            rVBO += 0
                            print("impossible Aromatic bond")

                # the VBOs are currently associated to atom objects
                # in molecule objects that we have modified
                # we need these to be associated to atom objects in
                # the parent (unmodified) molecule object
                for matom in unassigned:
                    if matom.label == ratom.label:
                        ringVBO[matom] += rVBO
                        # unassigned.remove(matom)
    return ringVBO




[docs]
def assign_VBS(atom: Atom, rVBO: dict[int, int], dVBO: dict[int, float]) -> int:
    """
    Assigns a Valence-Bond-Sum (VBS) to an atom.

    Parameters:
        atom (ccdc.molecule.Atom): Atom object.
        rVBO (dict[int, int]): dictionary with each atom's index in mole.atoms
                               as keys and VBO (valence bond order) as values.
        dVBO (dict[int, float]): dictionary with delocalized bond-possessing
                                 atom's index in mole.atoms as keys and their
                                 corresponding (delocalized-only) VBS.

    Returns:
        VBO (int): valence bond sum value.
    """
    VBO = 0
    if atom.is_metal:
        return 0
    if atom in rVBO:
        VBO = rVBO[atom]
    else:
        for bond in atom.bonds:
            if any(batom.is_metal for batom in bond.atoms):
                VBO += 0
            # Each bond contributes to Ligand Bond Order according to its type
            elif bond.bond_type == "Single":
                VBO += 1
            elif bond.bond_type == "Double":
                VBO += 2
            elif bond.bond_type == "Triple":
                VBO += 3
            elif bond.bond_type == "Quadruple":
                VBO += 4
            elif bond.bond_type == "Delocalised":
                VBO += dVBO[atom]
            elif bond.bond_type == "Aromatic":
                # necessary? aVBO not defined
                # VBO += aVBO[atom]
                VBO += dVBO[atom]
    return VBO




[docs]
def delocalisedLBO(molecule: Molecule) -> dict[int, float]:
    """
    Writes a dictionary of all atoms in the molecule with delocalized bonds
    and their (delocalized-only) valence bond sum (VBS).

    Parameters:
        molecule (ccdc.molecule.Molecule): Molecule object.

    Returns:
        delocal_dict (dict[int, float]): dictionary with delocalized bond-possessing
                                        atom's index in mole.atoms as keys and their
                                        corresponding (delocalized-only) VBS.
    """

    def TerminusCounter(atomlist: list[Atom]) -> int:
        """
        Counts the number of termini in the input delocalized bond system.

        Parameters:
            atomlist (list[ccdc.molecule.Atom]): list of atoms in delocalised system.

        Returns:
            NTerminus (int): number of termini in delocalized bond system.
        """
        NTerminus = 0
        for member in atomlist:
            connectivity = 0
            for bond in member.bonds:
                if bond.bond_type == "Delocalised":
                    connectivity += 1
            if connectivity == 1:
                NTerminus += 1
        return NTerminus

    def delocal_crawl(atomlist: list[Atom]) -> list[Atom]:
        """
        Recursively searches for atoms in delocalised bond systems starting from
        an input list containing at least one delocalised bonding atom.

        Parameters:
            atomlist (list[ccdc.molecule.Atom)]: list of atoms in delocalised system.

        Returns:
            atomlist (list[ccdc.molecule.Atom]): modified list of atoms in
                                                 delocalised system.
        """
        for delocatom in atomlist:
            for bond in delocatom.bonds:
                if bond.bond_type == "Delocalised":
                    for member in bond.atoms:
                        if not member in atomlist:
                            atomlist.append(member)
                            return delocal_crawl(atomlist)
        return atomlist

    delocal_dict = {}
    for atom in molecule.atoms:
        if all(
            [
                (any(bond.bond_type == "Delocalised" for bond in atom.bonds)),
                (not atom in delocal_dict),
            ]
        ):
            delocal_dict[atom] = []
            delocal_system = delocal_crawl([atom])
            NTerminus = TerminusCounter(delocal_system)
            for datom in delocal_system:
                connectivity = 0
                delocLBO = 0
                for neighbour in datom.neighbours:
                    if neighbour in delocal_system:
                        connectivity += 1
                if connectivity == 1:
                    # terminus
                    delocLBO = (NTerminus + 1) / NTerminus
                if connectivity > 1:
                    # node
                    delocLBO = (connectivity + 1) / connectivity
                delocal_dict[datom] = delocLBO
    return delocal_dict




[docs]
def iVBS_FormalCharge(atom: Atom) -> int:
    """
    Determines the formal charge of an atom NOT involved in any aromatic or
    delocalized bonding system.

    Parameters:
        atom (ccdc.molecule.Atom): Atom object

    Returns:
        charge (int): formal charge of the input atom.
    """
    VBO = 0
    if atom.is_metal:
        return VBO
    CN = 0
    for neighbour in atom.neighbours:
        if not neighbour.is_metal:
            CN += 1
    valence = valence_e(atom)
    charge = 0
    for bond in atom.bonds:
        if any(batom.is_metal for batom in bond.atoms):
            VBO += 0
        # Each bond contributes to Ligand Bond Order according to its type
        elif bond.bond_type == "Single":
            VBO += 1
        elif bond.bond_type == "Double":
            VBO += 2
        elif bond.bond_type == "Triple":
            VBO += 3
        elif bond.bond_type == "Quadruple":
            VBO += 4
    # need the unpaired electrons
    unpaired_e = 4 - abs(4 - valence)
    # expanded valences require special handling
    if VBO <= (unpaired_e):
        charge = VBO - unpaired_e
    # Expanded (2e) valences:
    elif (VBO > unpaired_e) and (VBO < valence):
        diff = VBO - unpaired_e
        if diff <= 2:
            UPE = valence - unpaired_e - 2
        elif diff <= 4:
            UPE = valence - unpaired_e - 4
        elif diff <= 6:
            UPE = valence - unpaired_e - 6
        elif diff <= 8:
            UPE = valence - unpaired_e - 8
        charge = valence - (VBO + UPE)
    elif VBO >= (valence):
        charge = valence - VBO
    return charge




[docs]
def get_CN(atom: Atom) -> int:
    """
    Determines the coordination number of the input atom.

    Parameters:
        atom (ccdc.molecule.Atom): Atom object.

    Returns:
        coord_number (int): Atom's coordination number.
    """
    CN = 0
    for neighbour in atom.neighbours:
        if not neighbour.is_metal:
            CN += 1
    return CN




[docs]
def valence_e(elmnt: Atom) -> int:
    """
    Determines the number of valence electrons of an atom/element.

    Parameters:
        elmnt (ccdc.molecule.Atom): Atom object.

    Returns:
        valence (int): Atom's valence electron count.
    """
    atom = mendeleev.element(elmnt.atomic_symbol)
    if atom.block == "s":
        valence = atom.group_id
    if atom.block == "p":
        valence = atom.group_id - 10
    if atom.block == "d":
        valence = atom.group_id
    if atom.block == "f":
        if atom.atomic_number in range(56, 72):
            valence = atom.atomic_number - 57 + 3
        elif atom.atomic_number in range(88, 104):
            valence = atom.atomic_number - 89 + 3
    if atom.group_id == 18:
        valence = 8
    if atom.symbol == "He":
        valence = 2
    return valence




[docs]
def carbocation_check(atom: Atom) -> Literal["tetrahedral", "trigonal"]:
    """
    Check carbocation/carbanion geometry according to bond angles.

    Parameters:
        atom (ccdc.molecule.Atom): Atom object.

    Returns:
        Literal["tetrahedral", "trigonal"]: geometry at input atom.
    """
    abc = []
    # get atom neighbours
    for neighbours in atom.neighbours:
        if not neighbours.is_metal:
            abc.append(neighbours)
    # get all three relevant bond angles
    angle1 = descriptors.MolecularDescriptors.atom_angle(abc[0], atom, abc[1])
    angle2 = descriptors.MolecularDescriptors.atom_angle(abc[0], atom, abc[2])
    angle3 = descriptors.MolecularDescriptors.atom_angle(abc[1], atom, abc[2])
    # average the angels
    AVGangle = abs(angle1 + angle2 + angle3) / 3
    # take the difference between the averaged bond angles and
    # ideal trigonal planar/tetrahedral bond angles
    tet = abs(AVGangle - 109.5)
    trig = abs(AVGangle - 120)
    if tet < trig:
        return "tetrahedral"
    if trig < tet:
        return "trigonal"




[docs]
def carbene_type(atom: Atom) -> Literal["singlet", "triplet"]:
    """
    Distinguishes between singlet and triplet carbenes.

    Parameters:
        atom (ccdc.molecule.Atom): Atom object(s) suspected of belonging to a
                                   carbene (2-coordinate carbon II).

    Returns:
        Literal["singlet", "triplet"]: carbene type at input atom.
    """
    # get alpha-atoms
    alpha = atom.neighbours
    alpha_type = []
    # get element symbols for alpha atoms
    for a in alpha:
        if not a.is_metal:
            alpha_type.append(a.atomic_symbol)
    # if any alpha atom is a heteroatom, return "singlet"
    # these are Fischer carbenes
    for a in alpha_type:
        if not any([(a == "C"), (a == "H")]):
            return "singlet"
    # if the carbene C is in a heterocycle,
    # return "singlet"
    # there are Arduengo carbenes (NHCs, CAACs)
    if atom.is_cyclic == True:
        for ring in atom.rings:
            for species in ring.atoms:
                if not species.atomic_symbol == "C":
                    return "singlet"
    # for all other carbenes, return "triplet"
    # these are Schrock carbenes
    return "triplet"




[docs]
def hapticity(atom: Atom, metalsite: list[Atom]) -> bool:
    """
    Determines if a ligand binding site possesses hapticity (any n-hapto).

    Parameters:
        atom (ccdc.molecule.Atom): Atom object.
        metalsites (list[ccdc.molecule.Atom]): list of metal sites in the structure
                                               that belong to the asymmetric unit.

    Returns:
        bool: whether the the input ligand is hapto-.
    """
    for atom2 in atom.neighbours:
        if not atom2.is_metal:
            if any(n2.label == metalsite.label for n2 in atom2.neighbours):
                return True
    return False




[docs]
def bridging(atom: Atom) -> int:
    """
    Determines how many metal atoms the input atom binds to search for
    bridging sites.

    Parameters:
        atom (ccdc.molecule.Atom): binding site Atom object.

    Returns:
       bridge (int): number of metal atoms bound to the atom.
    """
    bridge = 0
    for n in atom.neighbours:
        if n.is_metal:
            bridge += 1
    return bridge




[docs]
def iVBS_Oxidation_Contrib(
    unique_atoms: list[Atom], rVBO: dict[int, int], dVBO: dict[int, float]
) -> dict[Atom, float]:
    """
    Determines the oxidation state contribution of all unique atoms.

    Parameters:
        unique_atoms (list[ccdc.molecule.Atom]): unique atoms belonging to the
                                                 asymmetric unit.
        rVBO (dict[int, int]): dictionary with each atom's index in mole.atoms
                               as keys and VBO (valence bond order) as values.
        dVBO (dict[int, float]): dictionary with delocalized bond-possessing atom's
                                index in mole.atoms as keys and their corresponding
                                (delocalized-only) VBS.

    Returns:
        oxi_contrib (dict[ccdc.molecule.Atom, float)]: dictionary with Atom object
                         as keys and their oxidation state contribution as values.
    """
    VBS = 0
    CN = 0
    valence = 0
    oxi_contrib = {}
    # for each unique atom
    for atom in unique_atoms:
        # assign valence-bond-sum
        VBS = assign_VBS(atom, rVBO, dVBO)
        # determine coordination number
        CN = get_CN(atom)
        #  determine number of valence electrons
        valence = valence_e(atom)
        # get number of unpaired electrons in the free element
        unpaired_e = 4 - abs(4 - valence)

        #  metals do not contribute:
        if atom.is_metal:
            oxi_contrib[atom] = 0
        # Normal valences:
        elif VBS <= (unpaired_e):
            oxi_contrib[atom] = unpaired_e - VBS
        # Expanded (2e) valences:
        elif (VBS > unpaired_e) and (VBS < valence):
            diff = VBS - unpaired_e
            if diff <= 2:
                UPE = valence - unpaired_e - 2
            elif diff <= 4:
                UPE = valence - unpaired_e - 4
            elif diff <= 6:
                UPE = valence - unpaired_e - 6
            elif diff <= 8:
                UPE = valence - unpaired_e - 8
            oxi_contrib[atom] = VBS + UPE - valence
        elif VBS >= (valence):
            oxi_contrib[atom] = VBS - valence

        # need to check for 3-coordinate carbocations,
        # 3-coordinate carbanions, carbenes, and heavier
        # homologues (these are not immediately detectable)
        if any(
            [
                (atom.atomic_symbol == "C"),
                (atom.atomic_symbol == "Si"),
                (atom.atomic_symbol == "Ge"),
                (atom.atomic_symbol == "Pb"),
            ]
        ):
            if not atom in rVBO:
                # 3 coordinate and VBS 3 could be
                # carbanion or carbocation
                if VBS == 3 and CN == 3:
                    geom = carbocation_check(atom)
                    if geom == "trigonal":
                        oxi_contrib[atom] = -1
                    if geom == "tetrahedral":
                        oxi_contrib[atom] = 1
            # VBS 2 and 2 coordinate is carbene,
            # but singlet or triplet?
            if VBS == 2 and CN == 2:
                carbene = carbene_type(atom)
                if carbene == "singlet":
                    oxi_contrib[atom] = 0
                if carbene == "triplet":
                    oxi_contrib[atom] = 2

        # Nitro groups frequently have both N-O bonds assigned
        # as double bonds, giving incorrect VBS of 5
        # and oxidation contribution of -2
        # this block catches this and applies a fix
        if all(
            [
                (atom.atomic_symbol == "N"),
                (VBS == 5 and CN == 3),
            ]
        ):
            N_sphere1 = atom.neighbours
            O_count = 0
            for neighbour in N_sphere1:
                if neighbour.atomic_symbol == "O":
                    O_count += 1
            geom = carbocation_check(atom)
            if O_count == 2 and geom == "trigonal":
                oxi_contrib[atom] = 0

    return oxi_contrib




[docs]
def redundantAON(AON: dict[Atom, float], molecule: Molecule) -> dict[Atom, float]:
    """
    Maps the oxidation contributions of unique atom sites to the redundant atom
    sites according to their shared atom labels.

    Parameters:
        AON (dict[ccdc.molecule.Atom, float]): dictionary with Atom object as keys
                        and their oxidation state contribution as values for unique
                        Atom objects.
        molecule (ccdc.molecule.Molecule): Molecule object.

    Returns:
        redAON (dict[ccdc.molecule.Atom, float]): dictionary with Atom object as keys
                        and their oxidation state contribution as values for all
                        (including redundant) Atom objects.
    """
    redAON = {}
    for rsite1 in molecule.atoms:
        for usite1 in AON:
            redAON[usite1] = AON[usite1]
            if rsite1.label == usite1.label:
                redAON[rsite1] = AON[usite1]
    return redAON




[docs]
def binding_domain(
    binding_sites: list[Atom],
    AON: dict[Atom, float],
    molecule: Molecule,
    usites: list[Atom],
) -> dict[Atom, list[Atom]]:
    """
    Builds bonding domains within the crystal structure to determine which
    metal binding sites (Atom objects directly bonded to a metal) are connected
    via conjugation. Function accounts for the inconsistent assignment of
    delocalized bonds, by using the bonding domains (see methodology section
    for details on the implementation and validation).

    Parameters:
        binding_sites (list[ccdc.molecule.Atom]): list of binding sites connecting
                                                  metal atoms and ligands.
        AON (dict[ccdc.molecule.Atom, float]): dictionary with Atom object as keys
                        and their oxidation state contribution as values for unique
                        Atom objects.
        molecule (ccdc.molecule.Molecule): Molecule object.
        uniquesites (list[ccdc.molecule.Atom]): list of unique atoms.

    Returns:
        sitedomain (dict[ccdc.molecule.Atom, list[ccdc.molecule.Atom]]):
                        dictionary with Atom object as keys and a list of Atoms
                        connected through bonding that form a binding domain
                        as values.
    """

    def arom_domains(
        site: Atom, usites: list[Atom], aromlist: list[Atom], bondset: list[Bond]
    ) -> list[Atom]:
        """
        Recursively generate aromatic binding domains.
        """
        for bond in site.bonds:
            bondset.add(bond)
        for bond in bondset:
            for member in bond.atoms:
                if all(
                    [
                        (not member in aromlist),
                        (not member.is_metal),
                        (any(mbond.bond_type == "Aromatic" for mbond in member.bonds)),
                    ]
                ):
                    aromlist.append(member)
                    for mbond in member.bonds:
                        bondset.add(mbond)
                    return arom_domains(site, usites, aromlist, bondset)
        # aromlist currently contains non-unique instances of atoms
        # this will cause problems further down the line, so correct
        for index, member in enumerate(aromlist):
            aromlist[index] = usites[member.label]
        return aromlist

    def deloc_domains(
        site: Atom,
        usites: list[Atom],
        AON: dict[Atom, float],
        molecule: Molecule,
        deloclist: list[Atom],
        bondset: list[Bond],
        checked_bonds: list[Bond],
    ) -> list[Atom]:
        """
        Recursively generate delocalised binding domains.
        """
        for bond in site.bonds:
            if not bond in bondset:
                bondset.add(bond)
        for bond in bondset:
            if not bond in checked_bonds:
                for member in bond.atoms:
                    if all(
                        [
                            (not member in deloclist),
                            (not member.is_metal),
                            (
                                not any(
                                    mbond.bond_type == "Aromatic"
                                    for mbond in member.bonds
                                )
                            ),
                            (
                                any(
                                    [
                                        (
                                            len(
                                                molecule.shortest_path_bonds(
                                                    site, member
                                                )
                                            )
                                            <= 2
                                        ),
                                        (bond.bond_type == "Delocalised"),
                                        (bond.is_conjugated),
                                        (
                                            all(
                                                [
                                                    (bond.bond_type == "Single"),
                                                    (not AON[member] == 0),
                                                ]
                                            )
                                        ),
                                        (
                                            all(
                                                [
                                                    (
                                                        not any(
                                                            mbond.bond_type == "Single"
                                                            for mbond in member.bonds
                                                        )
                                                    ),
                                                    (
                                                        not any(
                                                            mbond.bond_type
                                                            == "Aromatic"
                                                            for mbond in member.bonds
                                                        )
                                                    ),
                                                    (
                                                        not any(
                                                            mbond.bond_type
                                                            == "Delocalised"
                                                            for mbond in member.bonds
                                                        )
                                                    ),
                                                ]
                                            )
                                        ),
                                    ]
                                )
                            ),
                        ]
                    ):
                        deloclist.append(member)
                        for mbond in member.bonds:
                            bondset.add(mbond)
                checked_bonds.add(bond)
                return deloc_domains(
                    site, usites, AON, molecule, deloclist, bondset, checked_bonds
                )
        # deloclist currently contains non-unique instances of atoms
        # this will cause problems further down the line, so correct
        for index, member in enumerate(deloclist):
            deloclist[index] = usites[member.label]
        return deloclist

    sitedomain = {}
    for site in binding_sites:
        if not site.is_metal == True:
            if any(sbond.bond_type == "Aromatic" for sbond in site.bonds):
                sitedomain[site] = arom_domains(
                    site, usites, aromlist=[site], bondset=set()
                )
            if not any(sbond.bond_type == "Aromatic" for sbond in site.bonds):
                sitedomain[site] = deloc_domains(
                    site,
                    usites,
                    AON,
                    molecule,
                    deloclist=[site],
                    bondset=set(),
                    checked_bonds=set(),
                )

    for site in sitedomain:
        olapset = set()
        for site2 in sitedomain:
            for member in sitedomain[site]:
                if member in sitedomain[site2]:
                    olapset.add(site2)
        for olap in olapset:
            sitedomain[site] = list(set(sitedomain[site]) | set(sitedomain[olap]))
            sitedomain[olap] = sitedomain[site]
    return sitedomain




[docs]
def binding_contrib(
    binding_sphere: dict[Atom, list[Atom]],
    binding_sites: list[Atom],
    AON: dict[Atom, float],
) -> dict[Atom, float]:
    """
    Redistributes oxidation state contributions within a binding domain.
    Equal distribution is assumed across connected binding sites in each domain.

    Parameters:
        binding_sphere (dict[ccdc.molecule.Atom, list[ccdc.molecule.Atom]]):
                        dictionary with Atom object as keys and a list of Atoms
                        connected through bonding that form a binding domain as values.
        binding_sites (list[ccdc.molecule.Atom]): list of binding sites connecting
                                                  metal atoms and ligands.
        AON (dict[ccdc.molecule.Atom, float]): dictionary with Atom object as keys
                        and their oxidation state contribution as values for unique Atoms.

    Returns:
        site_contrib (dict[ccdc.molecule.Atom, float]): dictionary with Atom object as keys
                        and their updated oxidation state contribution as values accounting
                        for distribution within the binding domain.
    """
    site_contrib = {}
    for site in binding_sphere:
        site_contrib[site] = 0
        nbinding = 0
        for member in binding_sphere[site]:
            if member in binding_sites:
                nbinding += 1
            site_contrib[site] += AON[member]
        site_contrib[site] /= nbinding
    return site_contrib




[docs]
def outer_sphere_domain(
    uniquesites: list[Atom], binding_domains: dict[Atom, list[Atom]]
) -> list[Atom]:
    """
    Identifies sites outside of the binding domains which must be checked for
    outer sphere charge contributions.

    Parameters:
        uniquesites (list[ccdc.molecule.Atom]): list of unique atoms in the structure
                                                belonging to the asymmetric unit.
        binding_domains (dict[ccdc.molecule.Atom, list[ccdc.molecule.Atom]]):
                        dictionary with Atom object as keys and a list of Atoms
                        connected through bonding that form a binding domain as values.

    Returns:
        outer_sphere (list[ccdc.molecule.Atom]): list of unique, non-metal atoms
                                                 outside of binding domains.
    """
    outer_sphere = []
    for site in uniquesites:
        if all(
            [
                (
                    not any(
                        site in binding_domains[domain] for domain in binding_domains
                    )
                ),
                (not site.is_metal),
            ]
        ):
            outer_sphere.append(site)
    return outer_sphere




[docs]
def outer_sphere_contrib(outer_sphere: list[Atom], AON: dict[Atom, float]) -> int:
    """
    Calculates the total oxidation state contribution of the outer sphere atoms as
    the sum of their formal charge/contributions.

    Parameters:
        outer_sphere (list[ccdc.molecule.Atom]): list of unique, non-metal atoms
                                                 outside of binding domains.
        AON (dict[ccdc.molecule.Atom, float]): dictionary with Atom object as keys
                        and their oxidation state contribution as values for unique Atoms.

    Returns:
        contrib (int): sum of outer sphere charge contributions.
    """
    contrib = 0
    for site in outer_sphere:
        contrib += AON[site]
    return contrib




[docs]
def get_metal_networks(
    ligand_sites: dict[Atom, list[Atom]],
    binding_sphere: dict[Atom, list[Atom]],
    bindingAON: dict[Atom, float],
) -> dict[Atom, list[Atom]]:
    """
    Determines the metal atoms that are connected through binding domains and
    charged ligands. Any connections through neutral ligands are ignored as they
    do not contribute to the charge accounting.

    Parameters:
        ligand_sites (dict[ccdc.molecule.Atom, list[ccdc.molecule.Atom]]):
                            dictionary with metal Atom object as key and the
                            list of ligand atoms which bind them as values.
        binding_sphere (dict[ccdc.molecule.Atom, list[ccdc.molecule.Atom]]):
                            dictionary with Atom object as keys and a list of Atoms
                            connected through bonding that form a binding domain
                            as values.
        bindingAON (dict[ccdc.molecule.Atom, float]): dictionary with Atom object as
                            keys and their updated oxidation state contribution as
                            values accounting for distribution within the binding domain.

    Returns:
        network_dict (dict[ccdc.molecule.Atom, list[ccdc.molecule.Atom]]):
                            dictionary with as metal Atom objects as keys and a list
                            of other metal Atom objects connected through binding
                            domains/charged ligands as values. Ignores neutral ligand
                            connections.
    """

    def network_crawl(
        ligand_sites: dict[Atom, list[Atom]],
        binding_sphere: dict[Atom, list[Atom]],
        bindingAON: dict[Atom, float],
        metal_networks: list[Atom],
        checked_sites: list[Atom],
        group: list[Atom],
    ) -> list[Atom]:
        """
        Recursively crawl through bonds to identify metals connected through direct bondings
        or delocalised/conjugated systems.
        """
        for metal in group:
            # This block will find all metals connected to an input metal by metal-metal bonds
            checked_sites.append(metal)
            for neighbour in metal.neighbours:
                if neighbour.is_metal:
                    if not neighbour in checked_sites:
                        checked_sites.append(neighbour)
                        for site in ligand_sites:
                            if neighbour.label == site.label:
                                if not site in group:
                                    group.append(site)
                        return network_crawl(
                            ligand_sites,
                            binding_sphere,
                            bindingAON,
                            metal_networks,
                            checked_sites,
                            group,
                        )
            # this block will find all metals connected to an input metal by
            # conjugation and delocalized charge ligands
            # metals connected through NEUTRAL ligands will be ignored
            for site in ligand_sites[metal]:
                if all([(not bindingAON[site] == 0), (not site in checked_sites)]):
                    for dsite in binding_sphere[site]:
                        if all(
                            [(not dsite in checked_sites), (dsite in binding_sphere)]
                        ):
                            checked_sites.append(dsite)
                            for environ in dsite.neighbours:
                                if environ.is_metal:
                                    if environ in ligand_sites:
                                        if all(
                                            [
                                                (
                                                    all(
                                                        not environ in network
                                                        for network in metal_networks
                                                    )
                                                ),
                                                (not environ in group),
                                            ]
                                        ):
                                            group.append(environ)
                                    else:
                                        for umetal in ligand_sites:
                                            if all(
                                                [
                                                    (umetal.label == environ.label),
                                                    (
                                                        all(
                                                            not umetal in network
                                                            for network in metal_networks
                                                        )
                                                    ),
                                                    (not umetal in group),
                                                ]
                                            ):
                                                group.append(umetal)
                    return network_crawl(
                        ligand_sites,
                        binding_sphere,
                        bindingAON,
                        metal_networks,
                        checked_sites,
                        group,
                    )
        return group

    metal_networks = []
    for metal in ligand_sites:
        if all(not metal in network for network in metal_networks):
            metal_networks.append(
                network_crawl(
                    ligand_sites,
                    binding_sphere,
                    bindingAON,
                    metal_networks,
                    checked_sites=[],
                    group=[metal],
                )
            )

    network_dict = {}
    for network in metal_networks:
        for metal in network:
            network_dict[metal] = network
    return network_dict




[docs]
def distribute_ONEC(
    sONEC: dict[Atom, list[float, float]],
    metal_networks: dict[Atom, list[Atom]],
    IEs: dict[str, list[float]],
    ONP: dict[str, list[float]],
    highest_known_ON: dict[str, int],
    metal_CN: dict[Molecule, int],
    most_probable_ON: dict[str, int],
) -> dict[Atom, list[float, float]]:
    """
    Redistributes the oxidation state contributions across all metal atoms in
    the structure according to their metal networks (fully local distribution)
    & calculates their associated electron counts. Features utilizing electron
    counts are minimally implemented at this time.

    Parameters:
        sONEC (dict[ccdc.molecule.Atom, list[float, float]]): dictionary with
                        metal Atom object as keys and lists containing the initial
                        oxidation state and electron count implied by only the
                        equal splitting of binding domain charges as values.
        metal_networks (dict[ccdc.molecule.Atom, list[ccdc.molecule.Atom]]):
                        dictionary with as metal Atom objects as keys and a list of
                        other metal Atom objects connected through binding domains/
                        charged ligands as values. Ignores neutral ligand connections.
        IEs (dict[str, list[float]]): dictionary with metal element symbols as keys
                        and a list of their reported ionization energies as values.
        ONP (dict[str, list[float]]): dictionary with metal element symbols as keys
                        and a list of the probability at the relevant oxidation states
                        as values.
        highest_known_ON (dict[str, int]) : dictionary with metal element symbols as
                        keys and a their highest known oxidation state as values.
        metal_CN (dict[ccdc.molecule.Molecule, int]): dictionary with as metal Atom
                        objects as keys and their effective coordination number as values.
        most_probable_ON (dict[str, int]) : dictionary with metal element symbols as
                        keys and a their oxidation state with the highest probability
                        as values.

    Returns:
        distributed_ONEC (dict[ccdc.molecule.Atom, list[float, float]]): dictionary
                        with metal Atom object as keys and lists containing their
                        redistributed oxidation state and electron count as values.
    """

    def recursive_distributor_single_network(
        iONEC: dict[Atom, list[float, float]],
        available_charge: int,
        sorted_metals: dict[str, Atom],
        IEs: dict[str, list[float]],
        ONP: dict[str, list[float]],
        highest_known_ON: dict[str, int],
    ) -> dict[Atom, list[float, float]]:
        """
        Distribute network charge according to ionization energy and probability
        until all charge is distributed. Performed after tallying available
        network charge and sorting network metals by element type.
        """
        # initialize working dictionary
        dONEC = {}
        dONEC = dict(iONEC)

        # positive contribution?
        if available_charge > 0:

            # get list of improbable and improbable next oxidations
            prob_metal_type = []
            improb_metal_type = []
            for metal_type in sorted_metals:
                try:
                    prob = float(
                        100
                        * ONP[metal_type][
                            math.floor(dONEC[sorted_metals[metal_type][0]][0]) + 1
                        ]
                    )
                except IndexError:
                    prob = 0
                if prob >= 1:
                    prob_metal_type.append(metal_type)
                else:
                    improb_metal_type.append(metal_type)

            # if only one metal type has a probable next oxidation state, do that
            if len(prob_metal_type) == 1:
                lowestMetal = prob_metal_type[0]

            # if more than one metal type has a probable next oxidation state,
            # determine next lowest ionization energy among probable next
            # oxidation states
            elif len(prob_metal_type) > 1:
                # find lowest next ionization energy
                for metal_type in prob_metal_type:
                    # metal in a negative oxidation state? Use IE = 0.
                    if dONEC[sorted_metals[metal_type][0]][0] < 0:
                        currentIE = 0
                    # metal oxidation state at or higher than highest known? Set IE arbitrarily high.
                    elif (
                        dONEC[sorted_metals[metal_type][0]][0]
                        >= highest_known_ON[metal_type]
                    ):
                        currentIE = 9999
                    # otherwise, use the appropriate IE.
                    else:
                        currentIE = float(
                            IEs[metal_type][
                                math.floor(dONEC[sorted_metals[metal_type][0]][0])
                            ]
                        )
                    if not "lowestIE" in locals():
                        lowestIE = currentIE
                        lowestMetal = metal_type
                    else:
                        if currentIE < lowestIE:
                            lowestIE = currentIE
                            lowestMetal = metal_type

            # if there is no probable next oxidation state available,
            # determine lowest ionization energy among improbable next oxidation states
            elif len(prob_metal_type) == 0:
                # find lowest next ionization energy
                for metal_type in improb_metal_type:
                    # metal in a negative oxidation state? Use IE = 0.
                    if dONEC[sorted_metals[metal_type][0]][0] < 0:
                        currentIE = 0
                    # metal oxidation state at or higher than highest known? Set IE arbitrarily high.
                    elif (
                        dONEC[sorted_metals[metal_type][0]][0]
                        >= highest_known_ON[metal_type]
                    ):
                        currentIE = 9999
                    # otherwise, use the appropriate IE.
                    else:
                        currentIE = float(
                            IEs[metal_type][
                                math.floor(dONEC[sorted_metals[metal_type][0]][0])
                            ]
                        )
                    if not "lowestIE" in locals():
                        lowestIE = currentIE
                        lowestMetal = metal_type
                    else:
                        if currentIE < lowestIE:
                            lowestIE = currentIE
                            lowestMetal = metal_type

            # distribute one ionization energy level worth of charge
            if available_charge >= len(sorted_metals[lowestMetal]):
                for metal in sorted_metals[lowestMetal]:
                    dONEC[metal][0] += 1
                    available_charge -= 1
            elif available_charge < len(sorted_metals[lowestMetal]):
                for metal in sorted_metals[lowestMetal]:
                    dONEC[metal][0] += available_charge / (
                        len(sorted_metals[lowestMetal])
                    )
                available_charge = 0

        # negative contribution?
        if available_charge < 0:
            # get list of improbable and improbable next oxidations
            prob_metal_type = []
            improb_metal_type = []
            for metal_type in sorted_metals:
                try:
                    prob = float(
                        100
                        * ONP[metal_type][
                            math.floor(dONEC[sorted_metals[metal_type][0]][0]) - 1
                        ]
                    )
                except IndexError:
                    prob = 0
                if prob >= 1:
                    prob_metal_type.append(metal_type)
                else:
                    improb_metal_type.append(metal_type)

            # if only one metal type has a probable next oxidation state, do that
            if len(prob_metal_type) == 1:
                highestMetal = prob_metal_type[0]

            # if more than one metal type has a probable next oxidation state,
            # determine next highest ionization energy among probable next
            # oxidation states
            elif len(prob_metal_type) > 1:
                for metal_type in prob_metal_type:
                    # metal in a negative oxidation state? Use IE = 0.
                    if dONEC[sorted_metals[metal_type][0]][0] <= 0:
                        currentIE = 0
                    # metal oxidation state higher than highest known? Set IE arbitrarily high.
                    elif (
                        dONEC[sorted_metals[metal_type][0]][0]
                        > highest_known_ON[metal_type]
                    ):
                        currentIE = 9999
                    # otherwise, use the appropriate IE.
                    else:
                        if not any(
                            [
                                (
                                    math.isclose(
                                        (dONEC[sorted_metals[metal_type][0]][0] % 1),
                                        0,
                                        abs_tol=0.0001,
                                    )
                                ),
                                (
                                    math.isclose(
                                        (dONEC[sorted_metals[metal_type][0]][0] % 1),
                                        1,
                                        abs_tol=0.0001,
                                    )
                                ),
                            ]
                        ):
                            currentIE = float(
                                IEs[metal_type][
                                    math.floor(dONEC[sorted_metals[metal_type][0]][0])
                                ]
                            )
                        else:
                            currentIE = float(
                                IEs[metal_type][
                                    math.floor(dONEC[sorted_metals[metal_type][0]][0])
                                    - 1
                                ]
                            )
                    if not "highestIE" in locals():
                        highestIE = currentIE
                        highestMetal = metal_type
                    else:
                        if currentIE > highestIE:
                            highestIE = currentIE
                            highestMetal = metal_type

            # if no probable next oxidation states are available,
            # determine next highest ionization energy among probable next
            # oxidation states
            elif len(improb_metal_type) > 0:
                for metal_type in improb_metal_type:
                    # metal in a negative oxidation state? Use IE = 0.
                    if dONEC[sorted_metals[metal_type][0]][0] <= 0:
                        currentIE = 0
                    # metal oxidation state higher than highest known? Set IE arbitrarily high.
                    elif (
                        dONEC[sorted_metals[metal_type][0]][0]
                        > highest_known_ON[metal_type]
                    ):
                        currentIE = 9999
                    # otherwise, use the appropriate IE.
                    else:
                        if not any(
                            [
                                (
                                    math.isclose(
                                        (dONEC[sorted_metals[metal_type][0]][0] % 1),
                                        0,
                                        abs_tol=0.0001,
                                    )
                                ),
                                (
                                    math.isclose(
                                        (dONEC[sorted_metals[metal_type][0]][0] % 1),
                                        1,
                                        abs_tol=0.0001,
                                    )
                                ),
                            ]
                        ):
                            currentIE = float(
                                IEs[metal_type][
                                    math.floor(dONEC[sorted_metals[metal_type][0]][0])
                                ]
                            )
                        else:
                            currentIE = float(
                                IEs[metal_type][
                                    math.floor(dONEC[sorted_metals[metal_type][0]][0])
                                    - 1
                                ]
                            )
                    if not "highestIE" in locals():
                        highestIE = currentIE
                        highestMetal = metal_type
                    else:
                        if currentIE > highestIE:
                            highestIE = currentIE
                            highestMetal = metal_type

            # distribute one ionization energy level worth of charge
            if (-1 * available_charge) >= len(sorted_metals[highestMetal]):
                for metal in sorted_metals[highestMetal]:
                    dONEC[metal][0] -= 1
                    available_charge += 1
            elif (-1 * available_charge) < len(sorted_metals[highestMetal]):
                for metal in sorted_metals[highestMetal]:
                    dONEC[metal][0] += available_charge / (
                        len(sorted_metals[highestMetal])
                    )
                available_charge = 0

        # if all charge has been distributed, we're done, otherwise, roll again
        if available_charge == 0:
            return dONEC
        else:
            return recursive_distributor_single_network(
                dONEC, available_charge, sorted_metals, IEs, ONP, highest_known_ON
            )

    # operate on each network individually
    distributed_ONEC = {}
    for network in metal_networks:

        # sort metals by element type
        sorted_metals = {}
        for metal in metal_networks[network]:
            sorted_metals[metal.atomic_symbol] = []
        for metal in metal_networks[network]:
            sorted_metals[metal.atomic_symbol].append(metal)

        # tally up network charge to be distributed
        # and initialize metals to most probable ON
        # (adjust network charge accordingly)
        network_charge = 0
        for metal in metal_networks[network]:
            network_charge += sONEC[metal][0]
            distributed_ONEC[metal] = [most_probable_ON[metal.atomic_symbol]]
            network_charge -= most_probable_ON[metal.atomic_symbol]
            distributed_ONEC[metal].append(int(sONEC[metal][1]))

        # if the most probable oxidation distribution has already balanced the charge, we're done
        # if not, recursively distribute network charge according to ionization energy
        if not (math.isclose(network_charge, 0, abs_tol=0.0001)):
            distributed_ONEC = recursive_distributor_single_network(
                distributed_ONEC,
                network_charge,
                sorted_metals,
                IEs,
                ONP,
                highest_known_ON,
            )

        # finally, adjust electron count to new oxidation state (OiL RiG)
        for metal in metal_networks[network]:
            distributed_ONEC[metal][1] = (
                valence_e(metal) + (2 * metal_CN[metal]) - distributed_ONEC[metal][0]
            )

    return distributed_ONEC




[docs]
def distribute_OuterSphere(
    sONEC: dict[Atom, list[float, float]],
    outer_sphere_charge: int,
    IEs: dict[Atom, list[Atom]],
    ONP: dict[str, list[float]],
    highest_known_ON: dict[str, int],
    metal_CN: dict[Molecule, int],
) -> dict[Atom, list[float, float]]:
    """
    Redistributes the oxidation state contributions across all metal atoms in
    the structure according to the outer sphere charge contribution (partially
    local distribution) & calculates their associated electron counts.
    Features utilizing electron counts are minimally implemented at this time.

    Parameters:
        sONEC (dict[ccdc.molecule.Atom, list[float, float]]): dictionary with
                        metal Atom object as keys and lists containing the initial
                        oxidation state and electron count implied by only the equal
                        splitting of binding domain charges as values.
        outer_sphere_charge (int): sum of outer sphere charge contributions.
        IEs (dict[str, list(float)]): dictionary with metal element symbols as keys
                        and a list of  their reported ionization energies as values.
        ONP (dict[str, list(float)]): dictionary with metal element symbols as keys
                        and a list of the probability at the relevant oxidation
                        states as values.
        highest_known_ON (dict[str, int]) : dictionary with metal element symbols
                        as keys and a their highest known oxidation state as values.
        metal_CN (dict[ccdc.molecule.Molecule, int]): dictionary with as metal Atom
                        objects as keys and their  effective coordination number
                        as values.

    Returns:
        distributed_ONEC (dict[ccdc.molecule.Atom, list[float, float]]): dictionary
                        with metal Atom object as keys and lists containing their
                        redistributed oxidation state and electron count as values.
    """

    def recursive_distributor(
        iONEC: dict[Atom, list[float, float]],
        available_charge: int,
        IEs: dict[str, list[float]],
        ONP: dict[str, list[float]],
        highest_known_ON: dict[str, int],
    ) -> dict[Atom, list[float, float]]:
        """
        Distribute network charge according to ionization energy and highest
        allowable oxidation state until all charge is distributed. Performed after
        tallying available network charge and sorting network metals by element type.
        """
        # initialize working dictionary
        dONEC = {}
        dONEC = dict(iONEC)

        # positive contribution?
        if available_charge > 0:
            # get list of probable and improbable next oxidations
            prob_metals = []
            improb_metals = []
            for metal in dONEC:
                try:
                    prob = float(
                        100 * ONP[metal.atomic_symbol][math.floor(dONEC[metal][0]) + 1]
                    )
                except IndexError:
                    prob = 0
                if prob >= 1:
                    prob_metals.append(metal)
                else:
                    improb_metals.append(metal)

            if len(prob_metals) == 1:
                lowestMetals = prob_metals
            elif len(prob_metals) > 1:
                for metal in prob_metals:
                    # metal in a negative oxidation state? Use IE = 0.
                    if dONEC[metal][0] < 0:
                        currentIE = 0
                    # metal oxidation state at or higher than highest known? Set IE arbitrarily high.
                    elif dONEC[metal][0] >= highest_known_ON[metal.atomic_symbol]:
                        currentIE = 9999
                    # otherwise, use the appropriate IE.
                    else:
                        currentIE = float(
                            IEs[metal.atomic_symbol][math.floor(dONEC[metal][0])]
                        )
                    if not "lowestIE" in locals():
                        lowestIE = currentIE
                        lowestMetals = [metal]
                    else:
                        if currentIE == lowestIE:
                            lowestMetals.append(metal)
                        if currentIE < lowestIE:
                            lowestIE = currentIE
                            lowestMetals = [metal]
            elif len(prob_metals) == 0:
                for metal in improb_metals:
                    # metal in a negative oxidation state? Use IE = 0.
                    if dONEC[metal][0] < 0:
                        currentIE = 0
                    # metal oxidation state at or higher than highest known? Set IE arbitrarily high.
                    elif dONEC[metal][0] >= highest_known_ON[metal.atomic_symbol]:
                        currentIE = 9999
                    # otherwise, use the appropriate IE.
                    else:
                        currentIE = float(
                            IEs[metal.atomic_symbol][math.floor(dONEC[metal][0])]
                        )
                    if not "lowestIE" in locals():
                        lowestIE = currentIE
                        lowestMetals = [metal]
                    else:
                        if currentIE == lowestIE:
                            lowestMetals.append(metal)
                        if currentIE < lowestIE:
                            lowestIE = currentIE
                            lowestMetals = [metal]
            # distribute one ionization energy level worth of charge
            if available_charge >= len(lowestMetals):
                for metal in lowestMetals:
                    dONEC[metal][0] += 1
                    available_charge -= 1
            elif available_charge < len(lowestMetals):
                for metal in lowestMetals:
                    dONEC[metal][0] += available_charge / (len(lowestMetals))
                available_charge = 0

        # negative contribution?
        if available_charge < 0:
            # get list of improbable and improbable next oxidations
            prob_metals = []
            improb_metals = []
            for metal in dONEC:
                try:
                    prob = float(
                        100 * ONP[metal.atomic_symbol][math.floor(dONEC[metal][0]) - 1]
                    )
                except IndexError:
                    prob = 0
                if prob >= 1:
                    prob_metals.append(metal)
                else:
                    improb_metals.append(metal)

            if len(prob_metals) == 1:
                highestMetals = prob_metals
            elif len(prob_metals) > 1:
                for metal in prob_metals:
                    # metal in a negative oxidation state? Use IE = 0.
                    if dONEC[metal][0] <= 0:
                        currentIE = 0
                    # metal oxidation state higher than highest known? Set IE arbitrarily high.
                    elif dONEC[metal][0] > highest_known_ON[metal.atomic_symbol]:
                        currentIE = 9999
                    # otherwise, use the appropriate IE.
                    else:
                        if not any(
                            [
                                (
                                    math.isclose(
                                        (dONEC[metal][0] % 1), 0, abs_tol=0.0001
                                    )
                                ),
                                (
                                    math.isclose(
                                        (dONEC[metal][0] % 1), 1, abs_tol=0.0001
                                    )
                                ),
                            ]
                        ):
                            currentIE = float(
                                IEs[metal.atomic_symbol][math.floor(dONEC[metal][0])]
                            )
                        else:
                            currentIE = float(
                                IEs[metal.atomic_symbol][
                                    math.floor(dONEC[metal][0]) - 1
                                ]
                            )
                    if not "highestIE" in locals():
                        highestIE = currentIE
                        highestMetals = [metal]
                    else:
                        if currentIE == highestIE:
                            highestMetals.append(metal)
                        if currentIE > highestIE:
                            highestIE = currentIE
                            highestMetals = [metal]

            else:
                for metal in improb_metals:
                    # metal in a negative oxidation state? Use IE = 0.
                    if dONEC[metal][0] <= 0:
                        currentIE = 0
                    # metal oxidation state higher than highest known? Set IE arbitrarily high.
                    elif dONEC[metal][0] > highest_known_ON[metal.atomic_symbol]:
                        currentIE = 9999
                    # otherwise, use the appropriate IE.
                    else:
                        if not any(
                            [
                                (
                                    math.isclose(
                                        (dONEC[metal][0] % 1), 0, abs_tol=0.0001
                                    )
                                ),
                                (
                                    math.isclose(
                                        (dONEC[metal][0] % 1), 1, abs_tol=0.0001
                                    )
                                ),
                            ]
                        ):
                            currentIE = float(
                                IEs[metal.atomic_symbol][math.floor(dONEC[metal][0])]
                            )
                        else:
                            currentIE = float(
                                IEs[metal.atomic_symbol][
                                    math.floor(dONEC[metal][0]) - 1
                                ]
                            )
                    if not "highestIE" in locals():
                        highestIE = currentIE
                        highestMetals = [metal]
                    else:
                        if currentIE == highestIE:
                            highestMetals.append(metal)
                        if currentIE > highestIE:
                            highestIE = currentIE
                            highestMetals = [metal]

            # distribute one ionization energy level worth of charge
            if (-1 * available_charge) >= len(highestMetals):
                for metal in highestMetals:
                    dONEC[metal][0] -= 1
                    available_charge += 1
            elif (-1 * available_charge) < len(highestMetals):
                for metal in highestMetals:
                    dONEC[metal][0] += available_charge / (len(highestMetals))
                available_charge = 0

        # if all charge has been distributed, we're done, otherwise, roll again
        if available_charge == 0:
            return dONEC
        else:
            return recursive_distributor(
                dONEC, available_charge, IEs, ONP, highest_known_ON
            )

    if outer_sphere_charge == 0:
        return sONEC

    distributed_ONEC = {}

    # initialize dictionary for charge distribution
    for metal in sONEC:
        distributed_ONEC[metal] = [sONEC[metal][0]]
        distributed_ONEC[metal].append(int(sONEC[metal][1]))

    # recursively distribute network charge according to ionization energy
    distributed_ONEC = recursive_distributor(
        distributed_ONEC, outer_sphere_charge, IEs, ONP, highest_known_ON
    )

    # finally, adjust electron count to new oxidation state (OiL RiG)
    for metal in sONEC:
        distributed_ONEC[metal][1] = (
            valence_e(metal) + (2 * metal_CN[metal]) - distributed_ONEC[metal][0]
        )

    return distributed_ONEC




[docs]
def global_charge_distribution(
    metalONdict: dict[Atom, list[float, float]],
    IEs: dict[str, list[float]],
    ONP: dict[str, list[float]],
    highest_known_ON: dict[str, int],
    metal_CN: dict[Molecule, int],
    most_probable_ON: dict[str, int],
) -> dict[Atom, list[float, float]]:
    """
    Redistributes the oxidation state contributions across all metal atoms in
    the structure according to full/global shating (fully delocalized distribution)
    & calculates their associated electron counts. Features utilizing electron
    counts are minimally implemented at this time.

    Parameters:
        metalONdict (dict[ccdc.molecule.Atom, list[float, float]]): dictionary with
                        metal Atom object as keys and lists containing the initial
                        oxidation state and electron count implied by only the
                        equal splitting of binding domain charges as values.
        IEs (dict[str, list[float]]): dictionary with metal element symbols as keys
                        and a list of their reported ionization energies as values.
        ONP (dict[str, list[float]]): dictionary with metal element symbols as keys
                        and a list of the probability at the relevant oxidation
                        states as values.
        highest_known_ON (dict[str, int]) : dictionary with metal element symbols
                        as keys and a their highest known oxidation state as values.
        metal_CN (dict[ccdc.molecule.Molecule, int]): dictionary with as metal Atom
                        objects as keys and their effective coordination number
                        as values.
        most_probable_ON (dict[str, int]) : dictionary with metal element symbols
                        as keys and a their oxidation state with the highest
                        probability as values.

    Returns:
        global_ONEC (dict[ccdc.molecule.Atom, list[float, float]]): dictionary with
                        metal Atom object  as keys and lists containing their
                        redistributed oxidation state and electron count as values.
    """
    global_ONEC = {}

    def recursive_distributor_global(
        iONEC: dict[Atom, list[float, float]],
        available_charge: int,
        sorted_metals: dict[str, Atom],
        IEs: dict[str, list[float]],
        ONP: dict[str, list[float]],
        highest_known_ON: dict[str, int],
    ) -> dict[Atom, list[float, float]]:
        """
        Distribute network charge according to ionization energy and probability
        until all charge is distributed. Performed after tallying available network
        charge and sorting network metals by element type.
        """
        # initialize working dictionary
        dONEC = {}
        dONEC = dict(iONEC)

        # positive contribution?
        if available_charge > 0:

            # get list of improbable and improbable next oxidations
            prob_metal_type = []
            improb_metal_type = []
            for metal_type in sorted_metals:
                try:
                    prob = float(
                        100
                        * ONP[metal_type][
                            math.floor(dONEC[sorted_metals[metal_type][0]][0]) + 1
                        ]
                    )
                except IndexError:
                    prob = 0
                if prob >= 1:
                    prob_metal_type.append(metal_type)
                else:
                    improb_metal_type.append(metal_type)

            # if only one metal type has a probable next oxidation state, do that
            if len(prob_metal_type) == 1:
                lowestMetal = prob_metal_type[0]

            # if more than one metal type has a probable next oxidation state,
            # determine next lowest ionization energy among probable next
            # oxidation states
            elif len(prob_metal_type) > 1:
                # find lowest next ionization energy
                for metal_type in prob_metal_type:
                    # metal in a negative oxidation state? Use IE = 0.
                    if dONEC[sorted_metals[metal_type][0]][0] < 0:
                        currentIE = 0
                    # metal oxidation state at or higher than highest known? Set IE arbitrarily high.
                    elif (
                        dONEC[sorted_metals[metal_type][0]][0]
                        >= highest_known_ON[metal_type]
                    ):
                        currentIE = 9999
                    # otherwise, use the appropriate IE.
                    else:
                        currentIE = float(
                            IEs[metal_type][
                                math.floor(dONEC[sorted_metals[metal_type][0]][0])
                            ]
                        )
                    if not "lowestIE" in locals():
                        lowestIE = currentIE
                        lowestMetal = metal_type
                    else:
                        if currentIE < lowestIE:
                            lowestIE = currentIE
                            lowestMetal = metal_type

            # if there is no probable next oxidation state available,
            # determine lowest ionization energy among improbable next oxidation states
            elif len(prob_metal_type) == 0:
                # find lowest next ionization energy
                for metal_type in improb_metal_type:
                    # metal in a negative oxidation state? Use IE = 0.
                    if dONEC[sorted_metals[metal_type][0]][0] < 0:
                        currentIE = 0
                    # metal oxidation state at or higher than highest known? Set IE arbitrarily high.
                    elif (
                        dONEC[sorted_metals[metal_type][0]][0]
                        >= highest_known_ON[metal_type]
                    ):
                        currentIE = 9999
                    # otherwise, use the appropriate IE.
                    else:
                        currentIE = float(
                            IEs[metal_type][
                                math.floor(dONEC[sorted_metals[metal_type][0]][0])
                            ]
                        )
                    if not "lowestIE" in locals():
                        lowestIE = currentIE
                        lowestMetal = metal_type
                    else:
                        if currentIE < lowestIE:
                            lowestIE = currentIE
                            lowestMetal = metal_type

            # distribute one ionization energy level worth of charge
            if available_charge >= len(sorted_metals[lowestMetal]):
                for metal in sorted_metals[lowestMetal]:
                    dONEC[metal][0] += 1
                    available_charge -= 1
            elif available_charge < len(sorted_metals[lowestMetal]):
                for metal in sorted_metals[lowestMetal]:
                    dONEC[metal][0] += available_charge / (
                        len(sorted_metals[lowestMetal])
                    )
                available_charge = 0

        # negative contribution?
        if available_charge < 0:
            # get list of improbable and improbable next oxidations
            prob_metal_type = []
            improb_metal_type = []
            for metal_type in sorted_metals:
                try:
                    prob = float(
                        100
                        * ONP[metal_type][
                            math.floor(dONEC[sorted_metals[metal_type][0]][0]) - 1
                        ]
                    )
                except IndexError:
                    prob = 0
                if prob >= 1:
                    prob_metal_type.append(metal_type)
                else:
                    improb_metal_type.append(metal_type)

            # if only one metal type has a probable next oxidation state, do that
            if len(prob_metal_type) == 1:
                highestMetal = prob_metal_type[0]

            # if more than one metal type has a probable next oxidation state,
            # determine next highest ionization energy among probable next
            # oxidation states
            elif len(prob_metal_type) > 1:
                for metal_type in prob_metal_type:
                    # metal in a negative oxidation state? Use IE = 0.
                    if dONEC[sorted_metals[metal_type][0]][0] <= 0:
                        currentIE = 0
                    # metal oxidation state higher than highest known? Set IE arbitrarily high.
                    elif (
                        dONEC[sorted_metals[metal_type][0]][0]
                        > highest_known_ON[metal_type]
                    ):
                        currentIE = 9999
                    # otherwise, use the appropriate IE.
                    else:
                        if not any(
                            [
                                (
                                    math.isclose(
                                        (dONEC[sorted_metals[metal_type][0]][0] % 1),
                                        0,
                                        abs_tol=0.0001,
                                    )
                                ),
                                (
                                    math.isclose(
                                        (dONEC[sorted_metals[metal_type][0]][0] % 1),
                                        1,
                                        abs_tol=0.0001,
                                    )
                                ),
                            ]
                        ):
                            currentIE = float(
                                IEs[metal_type][
                                    math.floor(dONEC[sorted_metals[metal_type][0]][0])
                                ]
                            )
                        else:
                            currentIE = float(
                                IEs[metal_type][
                                    math.floor(dONEC[sorted_metals[metal_type][0]][0])
                                    - 1
                                ]
                            )
                    if not "highestIE" in locals():
                        highestIE = currentIE
                        highestMetal = metal_type
                    else:
                        if currentIE > highestIE:
                            highestIE = currentIE
                            highestMetal = metal_type

            # if no probable next oxidation states are available,
            # determine next highest ionization energy among probable next
            # oxidation states
            elif len(improb_metal_type) > 0:
                for metal_type in improb_metal_type:
                    # metal in a negative oxidation state? Use IE = 0.
                    if dONEC[sorted_metals[metal_type][0]][0] <= 0:
                        currentIE = 0
                    # metal oxidation state higher than highest known? Set IE arbitrarily high.
                    elif (
                        dONEC[sorted_metals[metal_type][0]][0]
                        > highest_known_ON[metal_type]
                    ):
                        currentIE = 9999
                    # otherwise, use the appropriate IE.
                    else:
                        if not any(
                            [
                                (
                                    math.isclose(
                                        (dONEC[sorted_metals[metal_type][0]][0] % 1),
                                        0,
                                        abs_tol=0.0001,
                                    )
                                ),
                                (
                                    math.isclose(
                                        (dONEC[sorted_metals[metal_type][0]][0] % 1),
                                        1,
                                        abs_tol=0.0001,
                                    )
                                ),
                            ]
                        ):
                            currentIE = float(
                                IEs[metal_type][
                                    math.floor(dONEC[sorted_metals[metal_type][0]][0])
                                ]
                            )
                        else:
                            currentIE = float(
                                IEs[metal_type][
                                    math.floor(dONEC[sorted_metals[metal_type][0]][0])
                                    - 1
                                ]
                            )
                    if not "highestIE" in locals():
                        highestIE = currentIE
                        highestMetal = metal_type
                    else:
                        if currentIE > highestIE:
                            highestIE = currentIE
                            highestMetal = metal_type

            # distribute one ionization energy level worth of charge
            if (-1 * available_charge) >= len(sorted_metals[highestMetal]):
                for metal in sorted_metals[highestMetal]:
                    dONEC[metal][0] -= 1
                    available_charge += 1
            elif (-1 * available_charge) < len(sorted_metals[highestMetal]):
                for metal in sorted_metals[highestMetal]:
                    dONEC[metal][0] += available_charge / (
                        len(sorted_metals[highestMetal])
                    )
                available_charge = 0

        # if all charge has been distributed, we're done, otherwise, roll again
        if available_charge == 0:
            return dONEC
        else:
            return recursive_distributor_global(
                dONEC, available_charge, sorted_metals, IEs, ONP, highest_known_ON
            )

    # sort metals by element type
    sorted_metals = {}
    for metal in metalONdict:
        sorted_metals[metal.atomic_symbol] = []
    for metal in metalONdict:
        sorted_metals[metal.atomic_symbol].append(metal)

    # tally up the global charge to be distributed
    # and initialize global ON to most probable for all metals
    global_charge = 0
    for metal in metalONdict:
        global_charge += metalONdict[metal][0]
        global_ONEC[metal] = [most_probable_ON[metal.atomic_symbol]]
        global_charge -= most_probable_ON[metal.atomic_symbol]
        global_ONEC[metal].append(int(metalONdict[metal][1]))

    # recursively distribute network charge according to ionization energy
    if math.isclose(global_charge, 0, abs_tol=0.0001):
        distributed_ONEC = global_ONEC
    else:
        distributed_ONEC = recursive_distributor_global(
            global_ONEC, global_charge, sorted_metals, IEs, ONP, highest_known_ON
        )

    # finally, adjust electron count to new oxidation state (OiL RiG)
    for metal in metalONdict:
        global_ONEC[metal][1] = (
            valence_e(metal) + (2 * metal_CN[metal]) - distributed_ONEC[metal][0]
        )
    return global_ONEC




[docs]
def KnownONs() -> dict[str, list[int]]:
    """
    Reads in the known oxidation states for each metal element.


    Returns:
        KONs (dict[str, list(int)]) : dictionary with metal element symbols as keys
                              and a list of their known oxidation states as values.
    """
    
    data_str = """
            H	-1	1										
            He												
            Li	1											
            Be	0	1	2									
            B	-5	-1	0	1	2	3						
            C	-4	-3	-2	-1	0	1	2	3	4			
            N	-3	-2	-1	1	2	3	4	5				
            O	-2	-1	0	1	2							
            F	-1	0										
            Ne												
            Na	-1	1										
            Mg	0	1	2									
            Al	-2	-1	1	2	3							
            Si	-4	-3	-2	-1	0	1	2	3	4			
            P	-3	-2	-1	0	1	2	3	4	5			
            S	-2	-1	0	1	2	3	4	5	6			
            Cl	-1	1	2	3	4	5	6	7				
            Ar	0											
            K	-1	1										
            Ca	0	1	2									
            Sc	0	1	2	3								
            Ti	-2	-1	0	1	2	3	4					
            V	-3	1	0	1	2	3	4	5				
            Cr	-4	-2	-1	0	1	2	3	4	5	6		
            Mn	-3	-2	-1	0	1	2	3	4	5	6	7	
            Fe	-4	-2	-1	0	1	2	3	4	5	6	7	
            Co	-3	-1	0	1	2	3	4	5				
            Ni	-2	-1	0	1	2	3	4					
            Cu	-2	0	1	2	3	4						
            Zn	-2	0	1	2								
            Ga	-5	-4	-3	-2	-1	1	2	3				
            Ge	-3	-2	-1	0	1	2	3	4				
            As	-3	-2	-1	0	1	2	3	4	5			
            Se	-2	-1	1	2	3	4	5	6				
            Br	-1	1	3	4	5	7						
            Kr	0	1	2									
            Rb	-1	1										
            Sr	0	1	2									
            Y	0	1	2	3								
            Zr	-2	0	1	2	3	4						
            Nb	-3	-1	0	1	2	3	4	5				
            Mo	-4	-2	-1	0	1	2	3	4	5	6		
            Tc	-3	-1	0	1	2	3	4	5	6	7		
            Ru	-4	-2	0	1	2	3	4	5	6	7	8	
            Rh	-3	-1	0	1	2	3	4	5	6			
            Pd	0	1	2	3	4							
            Ag	-2	-1	1	2	3							
            Cd	-2	1	2									
            In	-5	-2	-2	1	2	3						
            Sn	-4	-3	-2	-1	0	1	2	3	4			
            Sb	-3	-2	-1	0	1	2	3	4	5			
            Te	-2	-1	1	3	4	5	6	7				
            I	-1	1	3	4	5	6	7					
            Xe	0	2	4	6	8							
            Cs	-1	1										
            Ba	0	1	2									
            La	0	1	2	3								
            Ce	0	2	3	4								
            Pr	0	1	2	3	4	5						
            Nd	0	2	3	4								
            Pm	2	3										
            Sm	0	2	3									
            Eu	0	2	3									
            Gd	0	1	2	3								
            Tb	0	1	2	3	4							
            Dy	0	2	3	4								
            Ho	0	2	3									
            Er	0	2	3									
            Tm	0	2	3									
            Yb	0	2	3									
            Lu	0	2	3									
            Hf	-2	0	1	2	3	4						
            Ta	-3	-1	0	1	2	3	4	5				
            W	-4	-2	-1	0	1	2	3	4	5	6		
            Re	-3	-1	0	1	2	3	4	5	6	7		
            Os	-4	-2	-1	0	1	2	3	4	5	6	7	8
            Ir	-3	-1	0	1	2	3	4	5	6	7	8	9
            Pt	-3	-2	-1	0	1	2	3	4	5	6		
            Au	-3	-2	-1	0	1	2	3	5				
            Hg	-2	1	2									
            Tl	-5	-2	-1	1	2	3						
            Pb	-4	-2	-1	1	2	3	4					
            Bi	-3	-2	-1	1	2	3	4	5				
            Po	-2	2	4	5	6							
            At	-1	1	3	5	7							
            Rn	2	6										
            Fr	1											
            Ra	2											
            Ac	2	3										
            Th	1	2	3	4								
            Pa	3	4	5									
            U	1	2	3	4	5	6						
            Np	2	3	4	5	6	7						
            Pu	2	3	4	5	6	7	8					
            Am	2	3	4	5	6	7						
            Cm	3	4	5	6								
            Bk	2	3	4	5								
            Cf	2	3	4	5								
            Es	2	3	4									
            Fm	2	3										
            Md	2	3										
            No	2	3										
            Lr	3											
            Rf	4											
            Db	5											
            Sg	0	6										
            Bh	7											
            Hs	8											
            Mt												
            Ds												
            Rg												
            Cn	2											
            Nh												
            Fl												
            Mc												
            Lv												
            Ts												
            Og												
            """.strip()

    KONs: dict[str, list[int]] = {}
    for line in data_str.splitlines():
        parts = line.split()
        elem = parts[0]
        # only parse lines with at least one oxidation state
        if len(parts) > 1:
            # map all remaining tokens to integers
            states = [int(x) for x in parts[1:]]
            KONs[elem] = states
        else:
            # no data: use empty list (or choose a default if you prefer)
            KONs[elem] = []

    return KONs



[docs]
def IonizationEnergies() -> dict[str, list[float]]:
    """
    Reads in the reported ionization energies for each metal element.

    Returns:
        KIEs (dict[str, list[float]]): dictionary with metal element symbols as keys
                          and a list of their reported ionization energies as values.
    """
    data_str = """
            0	H	13.5984346
            0	He	24.58738901
            1	He	54.41776549
            0	Li	5.391714996
            1	Li	75.640097
            2	Li	122.4543591
            0	Be	9.322699
            1	Be	18.21115
            2	Be	153.896205
            3	Be	217.7185861
            0	B	8.298019
            1	B	25.15483
            2	B	37.93059
            3	B	259.3715
            4	B	340.2260229
            0	C	11.260288
            1	C	24.383154
            2	C	47.88778
            3	C	64.49352
            4	C	392.090518
            5	C	489.993198
            0	N	14.53413
            1	N	29.60125
            2	N	47.4453
            3	N	77.4735
            4	N	97.8901
            5	N	552.06733
            6	N	667.046121
            0	O	13.618055
            1	O	35.12112
            2	O	54.93554
            3	O	77.4135
            4	O	113.899
            5	O	138.1189
            6	O	739.32683
            7	O	871.409883
            0	F	17.42282
            1	F	34.97081
            2	F	62.70798
            3	F	87.175
            4	F	114.249
            5	F	157.16311
            6	F	185.1868
            7	F	953.89805
            8	F	1103.11748
            0	Ne	21.564541
            1	Ne	40.96297
            2	Ne	63.4233
            3	Ne	97.19
            4	Ne	126.247
            5	Ne	157.934
            6	Ne	207.271
            7	Ne	239.097
            8	Ne	1195.80784
            9	Ne	1362.19916
            0	Na	5.13907696
            1	Na	47.28636
            2	Na	71.62
            3	Na	98.936
            4	Na	138.404
            5	Na	172.23
            6	Na	208.504
            7	Na	264.192
            8	Na	299.856
            9	Na	1465.134502
            0	Mg	7.646236
            1	Mg	15.035271
            2	Mg	80.1436
            3	Mg	109.2654
            4	Mg	141.33
            5	Mg	186.76
            6	Mg	225.02
            7	Mg	265.924
            8	Mg	327.99
            9	Mg	367.489
            0	Al	5.985769
            1	Al	18.82855
            2	Al	28.447642
            3	Al	119.9924
            4	Al	153.8252
            5	Al	190.49
            6	Al	241.76
            7	Al	284.64
            8	Al	330.21
            9	Al	398.65
            0	Si	8.15168
            1	Si	16.34585
            2	Si	33.493
            3	Si	45.14179
            4	Si	166.767
            5	Si	205.279
            6	Si	246.57
            7	Si	303.59
            8	Si	351.28
            9	Si	401.38
            0	P	10.486686
            1	P	19.76949
            2	P	30.20264
            3	P	51.44387
            4	P	65.02511
            5	P	220.43
            6	P	263.57
            7	P	309.6
            8	P	372.31
            9	P	424.4
            0	S	10.36001
            1	S	23.33788
            2	S	34.86
            3	S	47.222
            4	S	72.5945
            5	S	88.0529
            6	S	280.954
            7	S	328.794
            8	S	379.84
            9	S	447.7
            0	Cl	12.967633
            1	Cl	23.81364
            2	Cl	39.8
            3	Cl	53.24
            4	Cl	67.68
            5	Cl	96.94
            6	Cl	114.2013
            7	Cl	348.306
            8	Cl	400.851
            9	Cl	456.7
            0	Ar	15.7596119
            1	Ar	27.62967
            2	Ar	40.735
            3	Ar	59.58
            4	Ar	74.84
            5	Ar	91.29
            6	Ar	124.41
            7	Ar	143.4567
            8	Ar	422.6
            9	Ar	479.76
            0	K	4.34066373
            1	K	31.625
            2	K	45.8031
            3	K	60.917
            4	K	82.66
            5	K	99.44
            6	K	117.56
            7	K	154.87
            8	K	175.8174
            9	K	503.67
            0	Ca	6.11315547
            1	Ca	11.871719
            2	Ca	50.91316
            3	Ca	67.2732
            4	Ca	84.34
            5	Ca	108.78
            6	Ca	127.21
            7	Ca	147.24
            8	Ca	188.54
            9	Ca	211.275
            0	Sc	6.56149
            1	Sc	12.79977
            2	Sc	24.756839
            3	Sc	73.4894
            4	Sc	91.95
            5	Sc	110.68
            6	Sc	137.99
            7	Sc	158.08
            8	Sc	180.03
            9	Sc	225.18
            0	Ti	6.82812
            1	Ti	13.5755
            2	Ti	27.49171
            3	Ti	43.26717
            4	Ti	99.299
            5	Ti	119.533
            6	Ti	140.68
            7	Ti	170.5
            8	Ti	192.1
            9	Ti	215.92
            0	V	6.746187
            1	V	14.634
            2	V	29.3111
            3	V	46.709
            4	V	65.28165
            5	V	128.125
            6	V	150.72
            7	V	173.55
            8	V	206
            9	V	230.5
            0	Cr	6.76651
            1	Cr	16.486305
            2	Cr	30.959
            3	Cr	49.16
            4	Cr	69.46
            5	Cr	90.6349
            6	Cr	160.29
            7	Cr	184.76
            8	Cr	209.5
            9	Cr	244.5
            0	Mn	7.434038
            1	Mn	15.63999
            2	Mn	33.668
            3	Mn	51.21
            4	Mn	72.41
            5	Mn	95.604
            6	Mn	119.203
            7	Mn	195.5
            8	Mn	221.89
            9	Mn	248.6
            0	Fe	7.9024681
            1	Fe	16.19921
            2	Fe	30.651
            3	Fe	54.91
            4	Fe	75
            5	Fe	98.985
            6	Fe	124.976
            7	Fe	151.06
            8	Fe	233.6
            9	Fe	262.1
            0	Co	7.88101
            1	Co	17.0844
            2	Co	33.5
            3	Co	51.27
            4	Co	79.5
            5	Co	102
            6	Co	128.9
            7	Co	157.8
            8	Co	186.14
            9	Co	275.4
            0	Ni	7.639878
            1	Ni	18.168838
            2	Ni	35.187
            3	Ni	54.92
            4	Ni	76.06
            5	Ni	108
            6	Ni	132
            7	Ni	162
            8	Ni	193.2
            9	Ni	224.7
            0	Cu	7.72638
            1	Cu	20.29239
            2	Cu	36.841
            3	Cu	57.38
            4	Cu	79.8
            5	Cu	103
            6	Cu	139
            7	Cu	166
            8	Cu	198
            9	Cu	232.2
            0	Zn	9.394197
            1	Zn	17.96439
            2	Zn	39.7233
            3	Zn	59.573
            4	Zn	82.6
            5	Zn	108
            6	Zn	133.9
            7	Zn	173.9
            8	Zn	203
            9	Zn	238
            0	Ga	5.999302
            1	Ga	20.51514
            2	Ga	30.72576
            3	Ga	63.241
            4	Ga	86.01
            5	Ga	112.7
            6	Ga	140.8
            7	Ga	169.9
            8	Ga	211
            9	Ga	244
            0	Ge	7.899435
            1	Ge	15.93461
            2	Ge	34.0576
            3	Ge	45.7155
            4	Ge	90.5
            5	Ge	115.9
            6	Ge	144.9
            7	Ge	176.4
            8	Ge	212.5
            9	Ge	252.1
            0	As	9.78855
            1	As	18.5892
            2	As	28.349
            3	As	50.15
            4	As	62.77
            5	As	121.19
            6	As	147
            7	As	180
            8	As	213
            9	As	247
            0	Se	9.752392
            1	Se	21.196
            2	Se	31.697
            3	Se	42.947
            4	Se	68.3
            5	Se	81.83
            6	Se	155.327
            7	Se	184
            8	Se	219
            9	Se	255
            0	Br	11.81381
            1	Br	21.591
            2	Br	34.871
            3	Br	47.782
            4	Br	59.595
            5	Br	87.39
            6	Br	103.03
            7	Br	192.61
            8	Br	224
            9	Br	261
            0	Kr	13.9996055
            1	Kr	24.35984
            2	Kr	35.838
            3	Kr	50.85
            4	Kr	64.69
            5	Kr	78.49
            6	Kr	109.13
            7	Kr	125.802
            8	Kr	233
            9	Kr	268
            0	Rb	4.1771281
            1	Rb	27.28954
            2	Rb	39.247
            3	Rb	52.2
            4	Rb	68.44
            5	Rb	82.9
            6	Rb	98.67
            7	Rb	132.79
            8	Rb	150.628
            9	Rb	277.12
            0	Sr	5.69486745
            1	Sr	11.0302765
            2	Sr	42.88353
            3	Sr	56.28
            4	Sr	70.7
            5	Sr	88
            6	Sr	104
            7	Sr	121.21
            8	Sr	158.33
            9	Sr	177.3
            0	Y	6.21726
            1	Y	12.2236
            2	Y	20.52441
            3	Y	60.6072
            4	Y	75.35
            5	Y	91.39
            6	Y	110.02
            7	Y	128.12
            8	Y	145.64
            9	Y	185.7
            0	Zr	6.634126
            1	Zr	13.13
            2	Zr	23.17
            3	Zr	34.41836
            4	Zr	80.348
            5	Zr	96.38
            6	Zr	112
            7	Zr	133.7
            8	Zr	153
            9	Zr	172.02
            0	Nb	6.75885
            1	Nb	14.32
            2	Nb	25.04
            3	Nb	37.611
            4	Nb	50.5728
            5	Nb	102.069
            6	Nb	119.1
            7	Nb	136
            8	Nb	159.2
            9	Nb	180
            0	Mo	7.09243
            1	Mo	16.16
            2	Mo	27.13
            3	Mo	40.33
            4	Mo	54.417
            5	Mo	68.82704
            6	Mo	125.638
            7	Mo	143.6
            8	Mo	164.12
            9	Mo	186.3
            0	Tc	7.11938
            1	Tc	15.26
            2	Tc	29.55
            3	Tc	41
            4	Tc	57
            5	Tc	72
            6	Tc	88
            7	Tc	150
            8	Tc	169
            9	Tc	189.9
            0	Ru	7.3605
            1	Ru	16.76
            2	Ru	28.47
            3	Ru	45
            4	Ru	59
            5	Ru	76
            6	Ru	93
            7	Ru	110
            8	Ru	178.41
            9	Ru	198
            0	Rh	7.4589
            1	Rh	18.08
            2	Rh	31.06
            3	Rh	42
            4	Rh	63
            5	Rh	80
            6	Rh	97
            7	Rh	115.1
            8	Rh	135
            9	Rh	207.51
            0	Pd	8.336839
            1	Pd	19.43
            2	Pd	32.93
            3	Pd	46
            4	Pd	61
            5	Pd	84.1
            6	Pd	101
            7	Pd	120
            8	Pd	141
            9	Pd	159.9
            0	Ag	7.576234
            1	Ag	21.4844
            2	Ag	34.8
            3	Ag	49
            4	Ag	65
            5	Ag	82
            6	Ag	106
            7	Ag	125
            8	Ag	145.1
            9	Ag	167
            0	Cd	8.99382
            1	Cd	16.908313
            2	Cd	37.468
            3	Cd	51
            4	Cd	67.9
            5	Cd	87
            6	Cd	105
            7	Cd	130.1
            8	Cd	150
            9	Cd	173
            0	In	5.7863558
            1	In	18.87041
            2	In	28.04415
            3	In	55.45
            4	In	69.3
            5	In	90
            6	In	109
            7	In	130.1
            8	In	156
            9	In	178
            0	Sn	7.343918
            1	Sn	14.63307
            2	Sn	30.506
            3	Sn	40.74
            4	Sn	77.03
            5	Sn	94
            6	Sn	112.9
            7	Sn	135
            8	Sn	156
            9	Sn	184
            0	Sb	8.608389
            1	Sb	16.626
            2	Sb	25.3235
            3	Sb	43.804
            4	Sb	55
            5	Sb	99.51
            6	Sb	117
            7	Sb	139
            8	Sb	162
            9	Sb	185
            0	Tb	9.009808
            1	Tb	18.6
            2	Tb	27.84
            3	Tb	37.4155
            4	Tb	59.3
            5	Tb	69.1
            6	Tb	124.2
            7	Tb	143
            8	Tb	167
            9	Tb	191.1
            0	I	10.45126
            1	I	19.13126
            2	I	29.57
            3	I	40.357
            4	I	51.52
            5	I	74.4
            6	I	87.61
            7	I	150.81
            8	I	171
            9	I	197
            0	Xe	12.1298437
            1	Xe	20.975
            2	Xe	31.05
            3	Xe	42.2
            4	Xe	54.1
            5	Xe	66.703
            6	Xe	91.6
            7	Xe	105.9778
            8	Xe	179.84
            9	Xe	202
            0	Cs	3.893905727
            1	Cs	23.15745
            2	Cs	33.195
            3	Cs	43
            4	Cs	56
            5	Cs	69.1
            6	Cs	82.9
            7	Cs	110.1
            8	Cs	125.61
            9	Cs	213.3
            0	Ba	5.2116646
            1	Ba	10.003826
            2	Ba	35.8438
            3	Ba	47
            4	Ba	58
            5	Ba	71
            6	Ba	86
            7	Ba	101
            8	Ba	130.5
            9	Ba	146.52
            0	La	5.5769
            1	La	11.18496
            2	La	19.1773
            3	La	49.95
            4	La	61.6
            5	La	74
            6	La	88
            7	La	105
            8	La	119
            9	La	151.4
            0	Ce	5.5386
            1	Ce	10.956
            2	Ce	20.1974
            3	Ce	36.906
            4	Ce	65.55
            5	Ce	77.6
            6	Ce	91
            7	Ce	106
            8	Ce	125
            9	Ce	140
            0	Pr	5.4702
            1	Pr	10.631
            2	Pr	21.6237
            3	Pr	38.981
            4	Pr	57.53
            5	Pr	82
            6	Pr	97
            7	Pr	112
            8	Pr	131
            9	Pr	148
            0	Nd	5.525
            1	Nd	10.783
            2	Nd	22.09
            3	Nd	40.6
            4	Nd	60
            5	Nd	84
            6	Nd	99
            7	Nd	114
            8	Nd	136
            9	Nd	152
            0	Pm	5.58187
            1	Pm	10.938
            2	Pm	22.44
            3	Pm	41.17
            4	Pm	61.7
            5	Pm	85
            6	Pm	101
            7	Pm	116
            8	Pm	138
            9	Pm	155
            0	Sm	5.643722
            1	Sm	11.078
            2	Sm	23.55
            3	Sm	41.64
            4	Sm	62.7
            5	Sm	87
            6	Sm	103
            7	Sm	118
            8	Sm	141
            9	Sm	158
            0	Eu	5.670385
            1	Eu	11.24
            2	Eu	24.84
            3	Eu	42.94
            4	Eu	63.2
            5	Eu	89
            6	Eu	105
            7	Eu	120
            8	Eu	144
            9	Eu	161
            0	Gd	6.1498
            1	Gd	12.076
            2	Gd	20.54
            3	Gd	44.44
            4	Gd	64.8
            5	Gd	89
            6	Gd	106
            7	Gd	123
            8	Gd	144
            9	Gd	165
            0	Tb	5.8638
            1	Tb	11.513
            2	Tb	21.82
            3	Tb	39.33
            4	Tb	66.5
            5	Tb	90
            6	Tb	108
            7	Tb	125
            8	Tb	143
            9	Tb	168
            0	Dy	5.93905
            1	Dy	11.647
            2	Dy	22.89
            3	Dy	41.23
            4	Dy	62.1
            5	Dy	93
            6	Dy	110
            7	Dy	127
            8	Dy	152
            9	Dy	170
            0	Ho	6.0215
            1	Ho	11.781
            2	Ho	22.79
            3	Ho	42.52
            4	Ho	63.9
            5	Ho	95
            6	Ho	112
            7	Ho	129
            8	Ho	155
            9	Ho	173
            0	Er	6.1077
            1	Er	11.916
            2	Er	22.7
            3	Er	42.42
            4	Er	65.1
            5	Er	96
            6	Er	114
            7	Er	131
            8	Er	158
            9	Er	177
            0	Tm	6.18431
            1	Tm	12.065
            2	Tm	23.66
            3	Tm	42.41
            4	Tm	65.4
            5	Tm	98
            6	Tm	116
            7	Tm	133
            8	Tm	160
            9	Tm	180
            0	Yb	6.25416
            1	Yb	12.179185
            2	Yb	25.053
            3	Yb	43.61
            4	Yb	65.6
            5	Yb	99
            6	Yb	117
            7	Yb	135
            8	Yb	163
            9	Yb	182
            0	Lu	5.425871
            1	Lu	14.13
            2	Lu	20.9594
            3	Lu	45.249
            4	Lu	66.8
            5	Lu	98
            6	Lu	117
            7	Lu	136
            8	Lu	159
            9	Lu	185
            0	Hf	6.82507
            1	Hf	14.61
            2	Hf	22.55
            3	Hf	33.37
            4	Hf	68.37
            5	Hf	98
            6	Hf	118
            7	Hf	137
            8	Hf	157
            9	Hf	187
            0	Ta	7.549571
            1	Ta	16.2
            2	Ta	23.1
            3	Ta	35
            4	Ta	48.272
            5	Ta	94.01
            6	Ta	119
            7	Ta	139
            8	Ta	159
            9	Ta	180
            0	W	7.86403
            1	W	16.37
            2	W	26
            3	W	38.2
            4	W	51.6
            5	W	64.77
            6	W	122.01
            7	W	141.2
            8	W	160.2
            9	W	179
            0	Re	7.83352
            1	Re	16.6
            2	Re	27
            3	Re	39.1
            4	Re	51.9
            5	Re	67
            6	Re	82.71
            7	Re	144.4
            8	Re	165
            9	Re	187
            0	Os	8.43823
            1	Os	17
            2	Os	25
            3	Os	41
            4	Os	55
            5	Os	70.1
            6	Os	85.1
            7	Os	102.02
            8	Os	168.7
            9	Os	190
            0	Ir	8.96702
            1	Ir	17
            2	Ir	28
            3	Ir	40
            4	Ir	57
            5	Ir	72
            6	Ir	89
            7	Ir	105
            8	Ir	122.7
            9	Ir	194.8
            0	Pt	8.95883
            1	Pt	18.56
            2	Pt	29
            3	Pt	43
            4	Pt	56
            5	Pt	75
            6	Pt	91
            7	Pt	109
            8	Pt	126
            9	Pt	144.9
            0	Au	9.225554
            1	Au	20.203
            2	Au	30
            3	Au	45
            4	Au	60
            5	Au	74
            6	Au	94
            7	Au	112
            8	Au	130.1
            9	Au	149
            0	Hg	10.437504
            1	Hg	18.75688
            2	Hg	34.49
            3	Hg	48.55
            4	Hg	61.2
            5	Hg	76.6
            6	Hg	93
            7	Hg	113.9
            8	Hg	134
            9	Hg	153
            0	Tl	6.1082873
            1	Tl	20.4283
            2	Tl	29.852
            3	Tl	51.14
            4	Tl	62.6
            5	Tl	80
            6	Tl	97.9
            7	Tl	116
            8	Tl	135
            9	Tl	158
            0	Pb	7.4166799
            1	Pb	15.032499
            2	Pb	31.9373
            3	Pb	42.33256
            4	Pb	68.8
            5	Pb	82.9
            6	Pb	100.1
            7	Pb	120
            8	Pb	138
            9	Pb	158
            0	Bi	7.285516
            1	Bi	16.703
            2	Bi	25.57075
            3	Bi	45.37
            4	Bi	54.856
            5	Bi	88.4
            6	Bi	103
            7	Bi	122
            8	Bi	143
            9	Bi	161.1
            0	Po	8.41807
            1	Po	19.3
            2	Po	27.3
            3	Po	36
            4	Po	57
            5	Po	69.1
            6	Po	108
            7	Po	125
            8	Po	146.1
            9	Po	166
            0	At	9.31751
            1	At	17.88
            2	At	26.58
            3	At	39.65
            4	At	50.39
            5	At	72
            6	At	85.1
            7	At	130.1
            8	At	149
            9	At	169
            0	Rn	10.7485
            1	Rn	21.4
            2	Rn	29.4
            3	Rn	36.9
            4	Rn	52.9
            5	Rn	64
            6	Rn	88
            7	Rn	102
            8	Rn	154
            9	Rn	173.9
            0	Fr	4.0727411
            1	Fr	22.4
            2	Fr	33.5
            3	Fr	39.1
            4	Fr	50
            5	Fr	67
            6	Fr	80
            7	Fr	106
            8	Fr	120
            9	Fr	179
            0	Ra	5.2784239
            1	Ra	10.14718
            2	Ra	31
            3	Ra	41
            4	Ra	52.9
            5	Ra	64
            6	Ra	82
            7	Ra	97
            8	Ra	124
            9	Ra	140
            0	Ac	5.380226
            1	Ac	11.75
            2	Ac	17.431
            3	Ac	44.8
            4	Ac	55
            5	Ac	67
            6	Ac	79
            7	Ac	98.9
            8	Ac	113.9
            9	Ac	143.9
            0	Th	6.3067
            1	Th	12.1
            2	Th	18.32
            3	Th	28.648
            4	Th	58
            5	Th	69.1
            6	Th	82
            7	Th	95
            8	Th	118
            9	Th	133
            0	Pa	5.89
            1	Pa	11.9
            2	Pa	18.6
            3	Pa	30.9
            4	Pa	44.3
            5	Pa	72
            6	Pa	85.1
            7	Pa	98.9
            8	Pa	111
            9	Pa	137
            0	U	6.19405
            1	U	11.6
            2	U	19.8
            3	U	36.7
            4	U	46
            5	U	62
            6	U	89
            7	U	101
            8	U	116
            9	U	128.9
            0	Np	6.26554
            1	Np	11.5
            2	Np	19.7
            3	Np	33.8
            4	Np	48
            5	Np	65
            6	Np	92
            7	Np	107
            8	Np	121
            9	Np	136
            0	Pu	6.02576
            1	Pu	11.5
            2	Pu	21.1
            3	Pu	35
            4	Pu	49
            5	Pu	80
            6	Pu	95
            7	Pu	109
            8	Pu	124
            9	Pu	139
            0	Am	5.97381
            1	Am	11.7
            2	Am	21.7
            3	Am	36.8
            4	Am	50
            5	Am	67.9
            6	Am	95
            7	Am	110
            8	Am	125
            9	Am	141
            0	Cm	5.99141
            1	Cm	12.4
            2	Cm	20.1
            3	Cm	37.7
            4	Cm	51
            5	Cm	69.1
            6	Cm	97
            7	Cm	112
            8	Cm	128
            9	Cm	144
            0	Bk	6.19785
            1	Bk	11.9
            2	Bk	21.6
            3	Bk	36
            4	Bk	56
            5	Bk	70.1
            6	Bk	90
            7	Bk	114
            8	Bk	130
            9	Bk	147
            0	Cf	6.28166
            1	Cf	12
            2	Cf	22.4
            3	Cf	37.7
            4	Cf	51.9
            5	Cf	75
            6	Cf	91
            7	Cf	112.9
            8	Cf	133
            9	Cf	152
            0	Es	6.36758
            1	Es	12.2
            2	Es	22.7
            3	Es	38.8
            4	Es	54.1
            5	Es	71
            6	Es	97
            7	Es	112.9
            8	Es	137
            9	Es	157
            0	Fm	6.5
            1	Fm	12.4
            2	Fm	23.2
            3	Fm	39.3
            4	Fm	55
            5	Fm	74
            6	Fm	93
            7	Fm	120
            8	Fm	136
            9	Fm	162
            0	Md	6.58
            1	Md	12.4
            2	Md	24.3
            3	Md	40
            4	Md	54.1
            5	Md	76
            6	Md	96
            7	Md	115.1
            8	Md	143.9
            9	Md	162
            0	No	6.62621
            1	No	12.93
            2	No	25.8
            3	No	41.5
            4	No	60
            5	No	74
            6	No	97
            7	No	119
            8	No	140
            9	No	170
            0	Lr	4.96
            1	Lr	14.54
            2	Lr	21.8
            3	Lr	43.6
            4	Lr	56
            5	Lr	80
            6	Lr	96
            7	Lr	121
            8	Lr	143
            9	Lr	165
            0	Rf	6.02
            1	Rf	14.35
            2	Rf	23.84
            3	Rf	31.87
            4	Rf	64
            5	Rf	77
            6	Rf	102
            7	Rf	119
            8	Rf	146.1
            9	Rf	169
            0	Db	6.8
            1	Db	14
            2	Db	23.1
            3	Db	33
            4	Db	43
            5	Db	86
            6	Db	98.9
            7	Db	126
            8	Db	145.1
            9	Db	172
            0	Sg	7.8
            1	Sg	17.1
            2	Sg	25.8
            3	Sg	35.5
            4	Sg	47.2
            5	Sg	59.3
            6	Sg	109
            7	Sg	122
            8	Sg	152
            9	Sg	170
            0	Bh	7.7
            1	Bh	17.5
            2	Bh	26.7
            3	Bh	37.3
            4	Bh	49
            5	Bh	62.1
            6	Bh	74.9
            7	Bh	134
            8	Bh	148
            9	Bh	178
            0	Hs	7.6
            1	Hs	18.2
            2	Hs	29.3
            3	Hs	37.7
            4	Hs	51.2
            5	Hs	64
            6	Hs	78.1
            7	Hs	91.7
            8	Hs	159.9
            9	Hs	173.9
            4	Mt	50
            5	Mt	
            6	Mt	
            7	Mt	94
            8	Mt	109
            9	Mt	187
            5	Ds	65
            6	Ds	
            7	Ds	
            8	Ds	112.9
            9	Ds	128
            """.strip()

    KIEs: dict[str, list[float]] = {}
    for line in data_str.splitlines():
        parts = line.split()
        if len(parts) < 3:
            continue     # skip blank or malformed lines
        _, elem, energy = parts
        KIEs.setdefault(elem, []).append(float(energy))

    return KIEs




[docs]
def HighestKnownONs() -> dict[str, int]:
    """
    Determines the highest known oxidation states for each metal element.

    Returns:
        HKONs (dict[str, int]) : dictionary with metal element symbols as keys
                              and their highest known oxidation state as values.
    """
    
    data_str = """
        H	-1	1										
            He												
            Li	1											
            Be	0	1	2									
            B	-5	-1	0	1	2	3						
            C	-4	-3	-2	-1	0	1	2	3	4			
            N	-3	-2	-1	1	2	3	4	5				
            O	-2	-1	0	1	2							
            F	-1	0										
            Ne												
            Na	-1	1										
            Mg	0	1	2									
            Al	-2	-1	1	2	3							
            Si	-4	-3	-2	-1	0	1	2	3	4			
            P	-3	-2	-1	0	1	2	3	4	5			
            S	-2	-1	0	1	2	3	4	5	6			
            Cl	-1	1	2	3	4	5	6	7				
            Ar	0											
            K	-1	1										
            Ca	0	1	2									
            Sc	0	1	2	3								
            Ti	-2	-1	0	1	2	3	4					
            V	-3	1	0	1	2	3	4	5				
            Cr	-4	-2	-1	0	1	2	3	4	5	6		
            Mn	-3	-2	-1	0	1	2	3	4	5	6	7	
            Fe	-4	-2	-1	0	1	2	3	4	5	6	7	
            Co	-3	-1	0	1	2	3	4	5				
            Ni	-2	-1	0	1	2	3	4					
            Cu	-2	0	1	2	3	4						
            Zn	-2	0	1	2								
            Ga	-5	-4	-3	-2	-1	1	2	3				
            Ge	-3	-2	-1	0	1	2	3	4				
            As	-3	-2	-1	0	1	2	3	4	5			
            Se	-2	-1	1	2	3	4	5	6				
            Br	-1	1	3	4	5	7						
            Kr	0	1	2									
            Rb	-1	1										
            Sr	0	1	2									
            Y	0	1	2	3								
            Zr	-2	0	1	2	3	4						
            Nb	-3	-1	0	1	2	3	4	5				
            Mo	-4	-2	-1	0	1	2	3	4	5	6		
            Tc	-3	-1	0	1	2	3	4	5	6	7		
            Ru	-4	-2	0	1	2	3	4	5	6	7	8	
            Rh	-3	-1	0	1	2	3	4	5	6			
            Pd	0	1	2	3	4							
            Ag	-2	-1	1	2	3							
            Cd	-2	1	2									
            In	-5	-2	-2	1	2	3						
            Sn	-4	-3	-2	-1	0	1	2	3	4			
            Sb	-3	-2	-1	0	1	2	3	4	5			
            Te	-2	-1	1	3	4	5	6	7				
            I	-1	1	3	4	5	6	7					
            Xe	0	2	4	6	8							
            Cs	-1	1										
            Ba	0	1	2									
            La	0	1	2	3								
            Ce	0	2	3	4								
            Pr	0	1	2	3	4	5						
            Nd	0	2	3	4								
            Pm	2	3										
            Sm	0	2	3									
            Eu	0	2	3									
            Gd	0	1	2	3								
            Tb	0	1	2	3	4							
            Dy	0	2	3	4								
            Ho	0	2	3									
            Er	0	2	3									
            Tm	0	2	3									
            Yb	0	2	3									
            Lu	0	2	3									
            Hf	-2	0	1	2	3	4						
            Ta	-3	-1	0	1	2	3	4	5				
            W	-4	-2	-1	0	1	2	3	4	5	6		
            Re	-3	-1	0	1	2	3	4	5	6	7		
            Os	-4	-2	-1	0	1	2	3	4	5	6	7	8
            Ir	-3	-1	0	1	2	3	4	5	6	7	8	9
            Pt	-3	-2	-1	0	1	2	3	4	5	6		
            Au	-3	-2	-1	0	1	2	3	5				
            Hg	-2	1	2									
            Tl	-5	-2	-1	1	2	3						
            Pb	-4	-2	-1	1	2	3	4					
            Bi	-3	-2	-1	1	2	3	4	5				
            Po	-2	2	4	5	6							
            At	-1	1	3	5	7							
            Rn	2	6										
            Fr	1											
            Ra	2											
            Ac	2	3										
            Th	1	2	3	4								
            Pa	3	4	5									
            U	1	2	3	4	5	6						
            Np	2	3	4	5	6	7						
            Pu	2	3	4	5	6	7	8					
            Am	2	3	4	5	6	7						
            Cm	3	4	5	6								
            Bk	2	3	4	5								
            Cf	2	3	4	5								
            Es	2	3	4									
            Fm	2	3										
            Md	2	3										
            No	2	3										
            Lr	3											
            Rf	4											
            Db	5											
            Sg	0	6										
            Bh	7											
            Hs	8											
            Mt												
            Ds												
            Rg												
            Cn	2											
            Nh												
            Fl												
            Mc												
            Lv												
            Ts												
            Og						
        """.strip()

    HKONs: dict[str, int] = {}
    for line in data_str.splitlines():
        parts = line.split()
        elem = parts[0]
        # ignore entries with no numeric states
        if len(parts) > 1:
            # convert all remaining tokens to ints, take the max
            states = [int(x) for x in parts[1:]]
            HKONs[elem] = max(states)
        else:
            # if no oxidation state listed, set to 0 or remove entirely
            HKONs[elem] = 0

    return HKONs




[docs]
def ONprobabilities() -> dict[str, list[float]]:
    """
    Reads in the probability of each oxidation state for all metal elements.
    Approximate probabilities are assessed by their relative frequency of
    occurence in the CSD metadata.

    Returns:
        ONP (dict[str, list[float]]): dictionary with metal element symbols as keys
                        and a list of the probability at the relevant oxidation
                        states as values.
    """
    data_str = """
                    Li	0	0.993162393	0.003418803	0.001709402	0.001709402	0	0	0	0	0	0
                    Be	0.023529412	0	0.976470588	0	0	0	0	0	0	0	0
                    Na	0	0.998743719	0	0.001256281	0	0	0	0	0	0	0
                    Mg	0.002141328	0.016416845	0.981441827	0	0	0	0	0	0	0	0
                    Al	0	0.020121951	0.016463415	0.962804878	0.000609756	0	0	0	0	0	0
                    K	0	0.998137803	0	0.001862197	0	0	0	0	0	0	0
                    Ca	0	0.001176471	0.998823529	0	0	0	0	0	0	0	0
                    Sc	0	0.003656307	0.010968921	0.985374771	0	0	0	0	0	0	0
                    Ti	0.004095004	0.00020475	0.028460278	0.128992629	0.838247338	0	0	0	0	0	0
                    V	0.002768166	0.007612457	0.037197232	0.133564014	0.391868512	0.426816609	0.00017301	0	0	0	0
                    Cr	0.160139589	0.017836371	0.138037999	0.612252811	0.014928267	0.018805739	0.037999225	0	0	0	0
                    Mn	0.001919631	0.044599437	0.64864346	0.260621961	0.039928334	0.003519324	0.000319939	0.000447914	0	0	0
                    Fe	0.016919249	0.011083465	0.545216014	0.42053834	0.0059715	0.000226193	4.52E-05	0	0	0	0
                    Co	0.002508241	0.021678372	0.655403469	0.31990827	0.00035832	0.000143328	0	0	0	0	0
                    Ni	0.020168417	0.010846588	0.935890772	0.031222927	0.001871296	0	0	0	0	0	0
                    Cu	6.78E-05	0.236206984	0.75977341	0.003951765	0	0	0	0	0	0	0
                    Zn	7.78E-05	0.000778362	0.999104884	3.89E-05	0	0	0	0	0	0	0
                    Ga	0.003392706	0.100932994	0.066157761	0.828668363	0.000848176	0	0	0	0	0	0
                    Rb	0	0.9875	0.0125	0	0	0	0	0	0	0	0
                    Sr	0	0	1	0	0	0	0	0	0	0	0
                    Y	0.000608273	0.000608273	0.001824818	0.996350365	0.000608273	0	0	0	0	0	0
                    Zr	0.001476378	0	0.030019685	0.039370079	0.928149606	0	0.000984252	0	0	0	0
                    Nb	0.006417112	0.03315508	0.02459893	0.100534759	0.19144385	0.643850267	0	0	0	0	0
                    Mo	0.067368705	0.007830431	0.106115836	0.052922911	0.132847307	0.184150128	0.448764682	0	0	0	0
                    Tc	0	0.1488	0.0592	0.2256	0.056	0.4288	0.04	0.0416	0	0	0
                    Ru	0.01519372	0.011901747	0.819870009	0.119355111	0.027348696	0.000759686	0.005571031	0	0	0	0
                    Rh	0.002670673	0.455850442	0.141545652	0.397095643	0.00166917	0.001168419	0	0	0	0	0
                    Pd	0.018729923	0.009834445	0.961255251	0.004101804	0.005979738	9.88E-05	0	0	0	0	0
                    Ag	0.000163733	0.988784282	0.006631191	0.004420794	0	0	0	0	0	0	0
                    Cd	0	0.000421977	0.998818466	0.000759558	0	0	0	0	0	0	0
                    In	0	0.056062581	0.024771838	0.91916558	0	0	0	0	0	0	0
                    Sn	0.00222187	0.00095223	0.194730995	0.001110935	0.800666561	0.000158705	0.000158705	0	0	0	0
                    Cs	0	0.986486486	0.013513514	0	0	0	0	0	0	0	0
                    Ba	0	0	0.998236332	0.001763668	0	0	0	0	0	0	0
                    Hf	0	0	0.015473888	0.025145068	0.959381044	0	0	0	0	0	0
                    Ta	0.003726708	0.016149068	0.01242236	0.068322981	0.116770186	0.782608696	0	0	0	0	0
                    W	0.128280743	0.003538779	0.131229726	0.027720436	0.146564435	0.130934827	0.431731053	0	0	0	0
                    Re	0.001812324	0.316149819	0.044905356	0.124043496	0.065243657	0.364679823	0.017921869	0.065243657	0	0	0
                    Os	0.009817672	0.005610098	0.460729313	0.140953717	0.204067321	0.016830295	0.152173913	0.000701262	0.00911641	0	0
                    Ir	0.001902045	0.201854494	0.027104137	0.752258678	0.011650024	0.005230623	0	0	0	0	0
                    Pt	0.017449918	0.005606652	0.844021671	0.01870984	0.114085927	0	0.000125992	0	0	0	0
                    Au	0.001029204	0.731892448	0.015824006	0.251125691	0.00012865	0	0	0	0	0	0
                    Hg	0.00251004	0.013805221	0.983433735	0.000251004	0	0	0	0	0	0	0
                    Tl	0.001342282	0.616107383	0.013422819	0.369127517	0	0	0	0	0	0	0
                    Pb	0	0.000545852	0.964792576	0.000272926	0.034388646	0	0	0	0	0	0
                    Bi	0	0	0	1	0	0	0	0	0	0	0
                    Po	0	0	0	0	0	0	0	0	0	0	0
                    Fr	0	0	0	0	0	0	0	0	0	0	0
                    Ra	0	0	0	0	0	0	0	0	0	0	0
                    Rf	0	0	0	0	0	0	0	0	0	0	0
                    Db	0	0	0	0	0	0	0	0	0	0	0
                    Sg	0	0	0	0	0	0	0	0	0	0	0
                    Bh	0	0	0	0	0	0	0	0	0	0	0
                    Hs	0	0	0	0	0	0	0	0	0	0	0
                    Mt	0	0	0	0	0	0	0	0	0	0	0
                    Ds	0	0	0	0	0	0	0	0	0	0	0
                    Rg	0	0	0	0	0	0	0	0	0	0	0
                    Cn	0	0	0	0	0	0	0	0	0	0	0
                    Nh	0	0	0	0	0	0	0	0	0	0	0
                    Fl	0	0	0	0	0	0	0	0	0	0	0
                    Mc	0	0	0	0	0	0	0	0	0	0	0
                    Lv	0	0	0	0	0	0	0	0	0	0	0
                    La	0	0	0.002631579	0.996842105	0.000526316	0	0	0	0	0	0
                    Ce	0	0.000666223	0.005996003	0.742838108	0.250499667	0	0	0	0	0	0
                    Pr	0	0	0.002636204	0.993848858	0.003514938	0	0	0	0	0	0
                    Nd	0	0	0.005083884	0.994916116	0	0	0	0	0	0	0
                    Pm	0	0	0	1	0	0	0	0	0	0	0
                    Sm	0	0	0.153056235	0.846943765	0	0	0	0	0	0	0
                    Eu	0	0	0.081716037	0.918283963	0	0	0	0	0	0	0
                    Gd	0.000430663	0	0.005167959	0.994401378	0	0	0	0	0	0	0
                    Tb	0	0	0.132275132	0.866717057	0.001007811	0	0	0	0	0	0
                    Dy	0	0	0.007676768	0.992323232	0	0	0	0	0	0	0
                    Ho	0.001218027	0	0.007308161	0.990255786	0.001218027	0	0	0	0	0	0
                    Er	0	0	0.099644793	0.899233502	0.001121705	0	0	0	0	0	0
                    Tm	0	0	0.112903226	0.887096774	0	0	0	0	0	0	0
                    Yb	0	0	0.268343816	0.731656184	0	0	0	0	0	0	0
                    Lu	0	0	0.003663004	0.996336996	0	0	0	0	0	0	0
                    Ac	0	0	0	0	0.333333333	0.666666667	0	0	0	0	0
                    Th	0	0	0.008896797	0.019572954	0.96975089	0	0.001779359	0	0	0	0
                    Pa	0	0	0	0	0.333333333	0.666666667	0	0	0	0	0
                    U	0.000308452	0	0.003701419	0.098704503	0.333127699	0.064466379	0.499691548	0	0	0	0
                    Np	0	0	0.004048583	0.04048583	0.246963563	0.453441296	0.251012146	0.004048583	0	0	0
                    Pu	0	0	0	0.231292517	0.510204082	0.040816327	0.217687075	0	0	0	0
                    Am	0	0	0	0.857142857	0	0.057142857	0.085714286	0	0	0	0
                    Cm	0	0	0	1	0	0	0	0	0	0	0
                    Bk	0	0	0	1	0	0	0	0	0	0	0
                    Cf	0	0	0	1	0	0	0	0	0	0	0
                    Es	0	0	0	0	0	0	0	0	0	0	0
                    Fm	0	0	0	0	0	0	0	0	0	0	0
                    Md	0	0	0	0	0	0	0	0	0	0	0
                    No	0	0	0	0	0	0	0	0	0	0	0
                    Lr	0	0	0	0	0	0	0	0	0	0	0
                    Ge	0.00867052	0.012524085	0.512524085	0.002890173	0.462427746	0	0.000963391	0	0	0	0
                    As	0	0.021459227	0	0.811158798	0	0.167381974	0	0	0	0	0
                    Se	0	0.023809524	0.619047619	0	0.325396825	0	0.031746032	0	0	0	0
                    Sb	0	0.002742732	0	0.541963796	0.000548546	0.45419638	0.000548546	0	0	0	0
                    Te	0	0.001194743	0.338112306	0.001194743	0.628434886	0	0.031063321	0	0	0	0
                    """.strip()

    ONP: dict[str, list[float]] = {}
    for line in data_str.splitlines():
        parts = line.split()  # tab/space 都能分
        element = parts[0]
        probs = [float(x) for x in parts[1:]]
        ONP[element] = probs

    return ONP




[docs]
def ONmostprob(iONP: dict[str, list[float]]) -> dict[str, int]:
    """
    Determines the highest probability oxidation state for each metal element.
    These values are utilized during charge distribution routines.

    Parameters:
        iONP (dict[str, list[float]]): dictionary with metal element symbols as
                        keys and a list of the probability at the relevant oxidation
                        states as values.

    Returns:
        MPOS (dict[str, int]) : dictionary with metal element symbols as keys and
                        their oxidation state with the highest probability as values.
    """
    MPOS = {}
    for metal in iONP:
        hprob = 0
        for index, prob in enumerate(iONP[metal]):
            if prob >= hprob:
                hprob = prob
                MPOS[metal] = index
    return MPOS




[docs]
def getCN(lsites: dict[Atom, list[Atom]]) -> dict[Molecule, int]:
    """
    Determines the highest probability oxidation state for each metal element.
    These values are utilized during charge distribution routines.

    Parameters:
        lsites (dict[Atom, list[Atom]]): dictionary with metal Atom object as keys
                        and the list of ligand atoms which bind them as values.

    Returns:
        CNdict (dict[Molecule, int]): dictionary with as metal Atom objects as
                        keys and effective coordination number as values.
    """
    CNdict = {}
    for metal in lsites:
        CNdict[metal] = 0
        for ligand in lsites[metal]:
            if hapticity(ligand, metal):
                CNdict[metal] += 0.5
            else:
                CNdict[metal] += 1
        for neighbour in metal.neighbours:
            if neighbour.is_metal:
                CNdict[metal] += 0.5
    return CNdict



KnownON  = KnownONs()
KnownIE  = IonizationEnergies()
HighestKnownON = HighestKnownONs()
ONProb   = ONprobabilities()
HighestProbON = ONmostprob(ONProb)



[docs]
def check(file_path: str) -> dict[str, list[str]]:
    """
    Process a single .cif or .mol2 file and return a dict mapping each metal site
    label to the list of flag-names that are not 'GOOD'.
    """
    # Read structure
    try:
        if file_path.endswith(".cif"):
            cif = readentry(file_path)
            mol = cif.molecule
            asymmol = cif.asymmetric_unit_molecule
        else:
            mol = readSBU(file_path)
            asymmol = mol
    except RuntimeError:
        return {}

    # Identify sites
    uniquesites = get_unique_sites(mol, asymmol)
    metalsites = get_metal_sites(uniquesites)
    if not metalsites:
        return {}

    # Bonding and oxidation contributions
    dVBO = delocalisedLBO(mol)
    rVBO = ringVBOs(mol)
    AON = iVBS_Oxidation_Contrib(uniquesites, rVBO, dVBO)
    rAON = redundantAON(AON, mol)
    ligand_sites = get_ligand_sites(metalsites, uniquesites)
    binding_sites = get_binding_sites(metalsites, uniquesites)
    binding_sphere = binding_domain(binding_sites, rAON, mol, {u.label: u for u in uniquesites})
    bindingAON = binding_contrib(binding_sphere, binding_sites, rAON)
    connected = get_metal_networks(ligand_sites, binding_sphere, bindingAON)
    mCN = getCN(ligand_sites)

    # ONE-center inner
    ONEC_inner = {}
    for metal in ligand_sites:
        oxidation = 0
        val = valence_e(metal)
        for lig in ligand_sites[metal]:
            LBO = bindingAON[lig]
            Nbr = bridging(lig)
            Ox = LBO / Nbr
            oxidation += Ox
            if Ox >= 2:
                mCN[metal] += 1
            if Ox >= 3:
                mCN[metal] += 1
        ONEC_inner[metal] = [oxidation, val + 2*mCN[metal] - oxidation]

    # Network distribution
    noint_balance = distribute_ONEC(ONEC_inner, connected, KnownIE, ONProb, HighestKnownON, mCN, HighestProbON)

    # Outer-sphere
    OSC = outer_sphere_contrib(outer_sphere_domain(uniquesites, binding_sphere), rAON)
    noint_outer = distribute_OuterSphere(noint_balance, OSC, KnownIE, ONProb, HighestKnownON, mCN)

    # Prepare flag dictionaries
    flags = {name: {} for name in [
        "impossible", "unknown", "zero_valence", "noint_flag",
        "low_prob_1", "low_prob_2", "low_prob_3", "low_prob_multi"
    ]}

    # Evaluate flags
    for metal in noint_outer:
        lbl = metal.label
        flags["impossible"][lbl] = "GOOD"
        flags["unknown"][lbl]    = "GOOD"
        flags["zero_valence"][lbl]       = "GOOD"
        flags["noint_flag"][lbl]         = "GOOD"
        flags["low_prob_1"][lbl]         = "GOOD"
        flags["low_prob_2"][lbl]         = "GOOD"
        flags["low_prob_3"][lbl]         = "GOOD"
        flags["low_prob_multi"][lbl]     = "GOOD"
        
        val = valence_e(metal)
        os_val = noint_outer[metal][0]

        if os_val > val:
            flags["impossible"][lbl] = "BAD"
        if os_val == 0:
            flags["zero_valence"][lbl] = "BAD"
        if not any(math.isclose(os_val, i, abs_tol=0.5) for i in KnownON[metal.atomic_symbol]):
            flags["unknown"][lbl] = "BAD"
        # low-prob checks
        if math.isclose(os_val % 1, 0, abs_tol=1e-4):
            prob = ONProb[metal.atomic_symbol][round(os_val)] if round(os_val) < len(ONProb[metal.atomic_symbol]) else 0
            if prob < 0.01:
                flags["low_prob_1"][lbl] = "LIKELY_BAD"
            if prob < 0.001:
                flags["low_prob_2"][lbl] = "BAD"
            if prob < 0.0001:
                flags["low_prob_3"][lbl] = "BAD"
        
    return flags


import glob
import os
import json
from concurrent.futures import ProcessPoolExecutor, as_completed
from tqdm import tqdm
import  pandas as pd



[docs]
def worker(cif_path):
    name = os.path.basename(cif_path).replace(".cif", "")
    try:
        result = check(cif_path)
        return {
            "name": name,
            "impossible":   result.get("impossible",   "unknown"),
            "unknown":      result.get("unknown",      "unknown"),
            "zero_valence": result.get("zero_valence", "unknown"),
            "noint_flag":   result.get("noint_flag",   "unknown"),
            "low_prob_1":   result.get("low_prob_1",   "unknown"),
            "low_prob_2":   result.get("low_prob_2",   "unknown"),
            "low_prob_3":   result.get("low_prob_3",   "unknown"),
            "low_prob_multi": result.get("low_prob_multi", "unknown"),
        }
    except Exception:
        return {
            "name": name,
            "impossible":   "error",
            "unknown":      "error",
            "zero_valence": "error",
            "noint_flag":   "error",
            "low_prob_1":   "error",
            "low_prob_2":   "error",
            "low_prob_3":   "error",
            "low_prob_multi": "error",
        }



[docs]
def run(cif_folder, save_path="./", max_workers=64):
    os.makedirs(save_path, exist_ok=True)

    all_cifs = glob.glob(cif_folder+"/*.cif")

    with ProcessPoolExecutor(max_workers=max_workers) as exe:
        futures = {exe.submit(worker, cif): cif for cif in all_cifs}

        for fut in tqdm(as_completed(futures), total=len(futures), desc="Checking CIFs"):
            result_dict = fut.result()
            name = result_dict["name"]
            out_path = os.path.join(save_path, f"{name}.json")
    
            with open(out_path, "w", encoding="utf-8") as jf:
                json.dump(result_dict, jf, ensure_ascii=False, indent=2)

    all_data = []
    columns = ["impossible", "unknown", "zero_valence", "noint_flag",
                "low_prob_1", "low_prob_2", "low_prob_3", "low_prob_multi"]

    for file in tqdm(glob.glob(save_path+"/*json")):
        
        name = file.split("/")[-1].replace(".json", "")
        data_each = [name, False, False,
                        False, False, False,
                        False, False, False]

        with open(file, "r") as f:
            data = json.load(f)
        
        i=0
        for col in columns:
            for metal in data[col]:
                if data[col][metal] != "GOOD":
                    data_each[i+1] = True
            i+=1

        all_data.append(data_each)
            
    data = pd.DataFrame(all_data, columns=["id", "impossible", "unknown",
                                        "zero_valence", "noint_flag",
                                        "low_prob_1", "low_prob_2",
                                        "low_prob_3",
                                        "low_prob_multi"
                                        ]
                                        ).to_csv(save_path+"/mosaec_results.csv", index=False)