try:
from typing import Literal
except ImportError:
from typing_extensions import Literal
import math
import mendeleev
from ccdc import io
from ccdc import crystal
from ccdc import molecule
from ccdc.crystal import Crystal
from ccdc.molecule import Atom, Bond, Molecule
from ccdc import descriptors
[docs]
def readentry(input_cif: str) -> Crystal:
"""
Reads a CIF file containing structure data and converts it to a
standard atom labeling convention using the ccdc.crystal module.
Parameters:
input_cif (str): filename (.CIF) containing crystal structure data.
Returns:
newcif (ccdc.crystal.Crystal): Crystal object containing structural data
in the standard atom labeling convention.
"""
# read in the cif to a crystal object
with io.CrystalReader(input_cif, format="cif") as readcif:
cif = readcif[0]
readcif.close()
# to remove duplicate atoms, need the empirical formula
formula = cif.formula
elamnt = formula.split(" ")
# now convert to standard labelling convention and identify
# duplicate atoms to be removed
with open(input_cif, "r") as file:
file.seek(0)
newstring = str()
lines = file.readlines()
loop_pos = 0
start = 0
end = 0
columncount = 0
type_pos = 0
label_pos = 0
x_pos = 0
y_pos = 0
z_pos = 0
for i, line in enumerate(lines):
lines[i] = lines[i].lstrip()
for i, line in enumerate(lines):
# locate atom type and site label columns
if "loop_" in line:
loop_pos = i
if ("_atom" in line) and (not "_geom" in line) and (not "_aniso" in line):
start = loop_pos + 1
end = i + 1
for i in range(start, end):
if "atom_site_type_symbol" in lines[i]:
type_pos = columncount
if "atom_site_label" in lines[i]:
label_pos = columncount
columncount += 1
counting = {}
cutoff = {}
to_remove = []
for i in range(end, len(lines)):
if "loop_" in lines[i]:
break
# lines with atom information will contain a ., so only look at these
if "." in lines[i]:
# split lines by whitespace
col = lines[i].split()
# keep count of how many of each element type
if not col[type_pos] in counting:
counting[col[type_pos]] = 1
elif col[type_pos] in counting:
counting[col[type_pos]] += 1
# new atom labels
newlabel = f"{col[type_pos]}{counting[col[type_pos]]}"
lines[i] = lines[i].replace(col[label_pos], newlabel)
# cutoff repeated atoms
if newlabel in elamnt:
cutoff[col[type_pos]] = counting[col[type_pos]]
if col[type_pos] in cutoff:
if counting[col[type_pos]] > cutoff[col[type_pos]]:
to_remove.append(lines[i])
# remove unnecessary atoms
for i in to_remove:
lines.remove(i)
# combine to new string
for i in lines:
newstring += i
# read into new crystal object and assign bonds
newcif = crystal.Crystal.from_string(newstring, format="cif")
newcif.assign_bonds()
file.close()
return newcif
[docs]
def readSBU(input_mol2: str) -> Crystal:
"""
Reads a MOL2 file containing SBU/metal complex structural data
and converts it to a standard atom labeling convention using
the ccdc.crystal module.
Parameters:
input_mol2 (str): filename (.mol2) containing SBU/metal complex
structural data.
Returns:
mol (ccdc.crystal.Crystal): Crystal object containing structural data
in the standard atom labeling convention.
"""
# First have to convert the connection points (unknown atom types)
# to H. Has to be done as a string so CSD reader can
# interpret bonding
with open(input_mol2, "r") as file:
file.seek(0)
newstring = str()
lines = file.readlines()
for i, line in enumerate(lines):
if "*" in line:
lines[i] = lines[i].replace("Du", "H")
newstring += lines[i]
cif = crystal.Crystal.from_string(newstring, format="mol2")
cif.assign_bonds()
file.close()
mol = cif.molecule
# MOSAEC needs atoms to have unique labels, so make them unique
count = 1
for atom in mol.atoms:
atom.label = f"{atom.label}{count}"
count += 1
return mol
[docs]
def read_CSD_entry(input_refcode: str) -> Crystal:
"""
Read entries directly from the CSD CrystalReader according to CSD refcode.
Parameters:
input_refcode (str): string used to identify materials in the CSD.
Returns:
cif (ccdc.crystal.Crystal): Crystal object containing structural data
in the standard atom labeling convention.
"""
# read in the cif to a crystal object
csd_crystal_reader = io.CrystalReader("CSD")
cif = csd_crystal_reader.crystal(input_refcode)
cif.assign_bonds()
csd_crystal_reader.close()
return cif
[docs]
def get_unique_sites(mole: Molecule, asymmole: Molecule) -> list[Atom]:
"""
Get the unique atoms in a structure belonging to the asymmetric unit.
Parameters:
mole (ccdc.molecule.Molecule): original structure Molecule object.
asymmole (ccdc.molecule.Molecule): asymmetric unit of the structure.
Returns:
uniquesites (list[ccdc.molecule.Atom]): list of unique atoms in the structure
that belong to the asymmetric unit.
"""
# blank list for unique sites
uniquesites = []
labels = []
asymmcoords = []
molecoords = []
duplicates = []
for atom in asymmole.atoms:
asymmcoords.append(atom.coordinates)
for atom in mole.atoms:
if atom.coordinates in asymmcoords:
if not atom.coordinates in molecoords:
if not atom.label in labels:
uniquesites.append(atom)
molecoords.append(atom.coordinates)
labels.append(atom.label)
else:
duplicates.append(atom)
else:
duplicates.append(atom)
if len(duplicates) >= 1:
for datom in duplicates:
for atom in uniquesites:
if any(
[
(datom.coordinates == atom.coordinates),
(datom.label == atom.label),
]
):
if datom.atomic_symbol == atom.atomic_symbol:
if len(datom.neighbours) > len(atom.neighbours):
uniquesites.remove(atom)
uniquesites.append(datom)
if not datom.label in labels:
uniquesites.append(datom)
labels.append(datom.label)
return uniquesites
[docs]
def get_ligand_sites(
metalsites: list[Atom], sites: list[Atom]
) -> dict[Atom, list[Atom]]:
"""
Get the ligand sites binding each metal atom in a structure.
Parameters:
metalsites (list[ccdc.molecule.Atom]): list of metal sites in the structure
that belong to the asymmetric unit.
sites (list[ccdc.molecule.Atom]): list of unique atoms in the structure
that belong to the asymmetric unit.
Returns:
metal_sphere (dict[ccdc.molecule.Atom, list[ccdc.molecule.Atom]]):
dictionary with metal Atom object as keys and the the list
of ligand atoms which bind them as values.
"""
metal_sphere = {}
for metal in metalsites:
sphere1 = []
for ligand in metal.neighbours:
if not ligand.is_metal == True:
for site in sites:
if ligand.label == site.label:
sphere1.append(site)
metal_sphere[metal] = sphere1
return metal_sphere
[docs]
def get_binding_sites(metalsites: list[Atom], uniquesites: list[Atom]) -> list[Atom]:
"""
Get the binding sites in a structure, given the list of unique metal atoms
and all unique atoms.
Parameters:
metalsites (list[ccdc.molecule.Atom]): list of unique metal atoms.
uniquesites (list[ccdc.molecule.Atom]): list of unique atoms.
Returns:
binding_sites (list[ccdc.molecule.Atom]): list of binding sites connecting
metal atoms and ligands.
"""
binding_sites = set()
for metal in metalsites:
for ligand in metal.neighbours:
for site in uniquesites:
if ligand.label == site.label:
binding_sites.add(site)
return binding_sites
[docs]
def ringVBOs(mole: Molecule) -> dict[int, int]:
"""
Calculates the VBO (valence bond order) for each atom in the structure.
Parameters:
mole (ccdc.molecule.Molecule): Molecule object representing the structure.
Returns:
ringVBO (dict[int, int]): dictionary with each atom's index in mole.atoms
as keys and VBO (valence bond order) as values.
"""
ringVBO = {}
unassigned = mole.atoms
ringcopy = mole.copy()
oncycle_atoms = []
offcycle_atoms = []
oncycle_labels = []
offcycle_labels = []
cyclic_periodic = []
cyclic_periodic_labels = []
offcycle_periodic = []
# remove all the metals, this
# prevents metal-containing rings (i.e. pores)
# from interfering
for atom in ringcopy.atoms:
if atom.is_metal:
ringcopy.remove_atom(atom)
# collect all the cyclic atoms
for atom in ringcopy.atoms:
if atom.is_cyclic:
if not atom in oncycle_atoms:
oncycle_atoms.append(atom)
oncycle_labels.append(atom.label)
# we also need everything that the cyclic atoms are bound to
for atom in oncycle_atoms:
for neighbour in atom.neighbours:
if not neighbour in oncycle_atoms:
if not neighbour in offcycle_atoms:
offcycle_atoms.append(neighbour)
offcycle_labels.append(neighbour.label)
# combine cyclic atoms and 1st coordination sphere
cyclicsystem = oncycle_atoms + offcycle_atoms
# initialize ringVBO dictionary
for atom in unassigned:
if atom.label in oncycle_labels:
ringVBO[atom] = 0
# CSD doesn't do periodic boundary conditions, need a workaround
# check for any periodic copies of cyclic atoms
for atom in ringcopy.atoms:
if all([(not atom in oncycle_atoms), (atom.label in oncycle_labels)]):
if not atom in cyclic_periodic:
cyclic_periodic.append(atom)
cyclic_periodic_labels.append(atom.label)
for atom in cyclic_periodic:
# print (atom.neighbours)
for neighbour in atom.neighbours:
if not neighbour in (offcycle_periodic + cyclic_periodic):
if not neighbour.label in (oncycle_labels):
offcycle_periodic.append(neighbour)
# remove every atom that isn't part of or directly bound to a cycle
for atom in ringcopy.atoms:
if not atom in (cyclicsystem + cyclic_periodic):
ringcopy.remove_atom(atom)
# find all non-cyclic bonds
# single bonds between cycles, break and cap with H
for bond in ringcopy.bonds:
if not bond.is_cyclic:
# bonds between cycles
if all(
[
(all((member.label in oncycle_labels for member in bond.atoms))),
(
all(
(
not member.label in cyclic_periodic_labels
for member in bond.atoms
)
)
),
]
):
member1 = bond.atoms[0]
member2 = bond.atoms[1]
Hcap1 = molecule.Atom("H", coordinates=member1.coordinates)
Hcap2 = molecule.Atom("H", coordinates=member2.coordinates)
Hcap1_id = ringcopy.add_atom(Hcap1)
Hcap2_id = ringcopy.add_atom(Hcap2)
ringcopy.add_bond(bond.bond_type, Hcap1_id, member2)
ringcopy.add_bond(bond.bond_type, Hcap2_id, member1)
ringcopy.remove_bond(bond)
# cap off-cycle atoms
for offatom in offcycle_atoms + offcycle_periodic:
# get the VBO for each off-cycle atom
# (VBO with respect to cyclic atoms)
offVBO = 0
# quick check for delocalized systems in the ring
# if there are any, get the delocalised bond orders
if any(bond.bond_type == "Delocalised" for bond in offatom.bonds):
offdVBO = delocalisedLBO(ringcopy)
# add the non-delocalized bond orders
for bond in offatom.bonds:
# only interested in bonds to cyclic atoms
if any(batom.label in oncycle_labels for batom in bond.atoms):
# Each bond contributes to Ligand Bond Order according to its type
if bond.bond_type == "Single":
offVBO += 1
elif bond.bond_type == "Double":
offVBO += 2
elif bond.bond_type == "Triple":
offVBO += 3
elif bond.bond_type == "Quadruple":
offVBO += 4
elif bond.bond_type == "Delocalised":
offVBO += offdVBO[offatom]
elif bond.bond_type == "Aromatic":
offVBO += 0
print("impossible Aromatic bond")
# cap with appropriate element for VBO
if offVBO == 1:
offatom.atomic_symbol = "H"
elif offVBO == 2:
offatom.atomic_symbol = "O"
elif offVBO == 3:
offatom.atomic_symbol = "N"
elif offVBO == 4:
offatom.atomic_symbol = "C"
elif offVBO == 5:
offatom.atomic_symbol = "P"
elif offVBO == 6:
offatom.atomic_symbol = "S"
elif offVBO > 6:
print("no, that's too many")
# for each cyclic system, reassign bonds, kekulize, and get VBO
# the bond and atom pruning we did above ensures that fused cycles
# will be treated as a single system
# while non-fused cycles that are connected via bonding are treated
# as seperate systems
for cyclesys in ringcopy.components:
# reassign bonds and kekulize
cyclesys.assign_bond_types()
cyclesys.kekulize()
# porhpyrins and similar molecules are misassigned, we will code a hard fix
# first identify and isolate the inner porphyrin(/like) atoms
# (these atoms determine overall charge)
# store these in a dictionary for later
joining_atoms = dict()
joining_rings = dict()
subring_labels = dict()
porphyrinatoms = dict()
porphyrin_to_correct = set()
# ring by ring
for subring in cyclesys.rings:
subring_labels[subring] = []
# get a list of all atom labels in each subring
for sratom in subring.atoms:
subring_labels[subring].append(sratom.label)
# check atom by atom
for sratom in subring.atoms:
# check each atom neighbour
for srneighbour in sratom.neighbours:
srn_label = srneighbour.label
# if the neighbour is not part of the current ring
if not srn_label in subring_labels[subring]:
# if the nighbour IS a cyclic atom
# consider this a "joining atom"
if srn_label in oncycle_labels:
try:
joining_atoms[srn_label].append(subring)
except KeyError:
joining_atoms[srn_label] = [subring]
try:
joining_rings[subring].append(srn_label)
except KeyError:
joining_rings[subring] = [srn_label]
for jring in joining_rings:
if all([(len(jring) == 16), (jring.is_fully_conjugated)]):
for patom in jring.atoms:
plabel = patom.label
if not plabel in joining_atoms:
ncyclicbonds = 0
for pbond in patom.bonds:
if pbond.is_cyclic:
ncyclicbonds += 1
if ncyclicbonds == 2:
try:
porphyrinatoms[jring].append(patom)
except KeyError:
porphyrinatoms[jring] = [patom]
for porph in porphyrinatoms:
if all(i.atomic_symbol == "N" for i in porphyrinatoms[porph]):
protonated = 0
for patom in porphyrinatoms[porph]:
if any(i.atomic_symbol == "H" for i in patom.neighbours):
protonated += 1
if protonated == 0:
for patom in porphyrinatoms[porph]:
porphyrin_to_correct.add(patom.label)
# quick check for delocalized systems in the ring
# if there are any, get the delocalised bond orders
if any(bond.bond_type == "Delocalised" for bond in cyclesys.bonds):
rdVBO = delocalisedLBO(cyclesys)
# assign VBO for each on-cycle atom
for ratom in cyclesys.atoms:
rVBO = 0
if ratom.label in oncycle_labels:
if ratom.label in porphyrin_to_correct:
rVBO -= 0.5
for rbond in ratom.bonds:
# Each bond contributes to Ligand Bond Order
# according to its type except periodic copies
if any(
[
(rbond.is_cyclic),
(
not all(
(
mem.label in cyclic_periodic_labels
for mem in rbond.atoms
)
)
),
]
):
if rbond.bond_type == "Single":
rVBO += 1
elif rbond.bond_type == "Double":
rVBO += 2
elif rbond.bond_type == "Triple":
rVBO += 3
elif rbond.bond_type == "Quadruple":
rVBO += 4
elif rbond.bond_type == "Delocalised":
rVBO += rdVBO[ratom]
elif rbond.bond_type == "Aromatic":
rVBO += 0
print("impossible Aromatic bond")
# the VBOs are currently associated to atom objects
# in molecule objects that we have modified
# we need these to be associated to atom objects in
# the parent (unmodified) molecule object
for matom in unassigned:
if matom.label == ratom.label:
ringVBO[matom] += rVBO
# unassigned.remove(matom)
return ringVBO
[docs]
def assign_VBS(atom: Atom, rVBO: dict[int, int], dVBO: dict[int, float]) -> int:
"""
Assigns a Valence-Bond-Sum (VBS) to an atom.
Parameters:
atom (ccdc.molecule.Atom): Atom object.
rVBO (dict[int, int]): dictionary with each atom's index in mole.atoms
as keys and VBO (valence bond order) as values.
dVBO (dict[int, float]): dictionary with delocalized bond-possessing
atom's index in mole.atoms as keys and their
corresponding (delocalized-only) VBS.
Returns:
VBO (int): valence bond sum value.
"""
VBO = 0
if atom.is_metal:
return 0
if atom in rVBO:
VBO = rVBO[atom]
else:
for bond in atom.bonds:
if any(batom.is_metal for batom in bond.atoms):
VBO += 0
# Each bond contributes to Ligand Bond Order according to its type
elif bond.bond_type == "Single":
VBO += 1
elif bond.bond_type == "Double":
VBO += 2
elif bond.bond_type == "Triple":
VBO += 3
elif bond.bond_type == "Quadruple":
VBO += 4
elif bond.bond_type == "Delocalised":
VBO += dVBO[atom]
elif bond.bond_type == "Aromatic":
# necessary? aVBO not defined
# VBO += aVBO[atom]
VBO += dVBO[atom]
return VBO
[docs]
def delocalisedLBO(molecule: Molecule) -> dict[int, float]:
"""
Writes a dictionary of all atoms in the molecule with delocalized bonds
and their (delocalized-only) valence bond sum (VBS).
Parameters:
molecule (ccdc.molecule.Molecule): Molecule object.
Returns:
delocal_dict (dict[int, float]): dictionary with delocalized bond-possessing
atom's index in mole.atoms as keys and their
corresponding (delocalized-only) VBS.
"""
def TerminusCounter(atomlist: list[Atom]) -> int:
"""
Counts the number of termini in the input delocalized bond system.
Parameters:
atomlist (list[ccdc.molecule.Atom]): list of atoms in delocalised system.
Returns:
NTerminus (int): number of termini in delocalized bond system.
"""
NTerminus = 0
for member in atomlist:
connectivity = 0
for bond in member.bonds:
if bond.bond_type == "Delocalised":
connectivity += 1
if connectivity == 1:
NTerminus += 1
return NTerminus
def delocal_crawl(atomlist: list[Atom]) -> list[Atom]:
"""
Recursively searches for atoms in delocalised bond systems starting from
an input list containing at least one delocalised bonding atom.
Parameters:
atomlist (list[ccdc.molecule.Atom)]: list of atoms in delocalised system.
Returns:
atomlist (list[ccdc.molecule.Atom]): modified list of atoms in
delocalised system.
"""
for delocatom in atomlist:
for bond in delocatom.bonds:
if bond.bond_type == "Delocalised":
for member in bond.atoms:
if not member in atomlist:
atomlist.append(member)
return delocal_crawl(atomlist)
return atomlist
delocal_dict = {}
for atom in molecule.atoms:
if all(
[
(any(bond.bond_type == "Delocalised" for bond in atom.bonds)),
(not atom in delocal_dict),
]
):
delocal_dict[atom] = []
delocal_system = delocal_crawl([atom])
NTerminus = TerminusCounter(delocal_system)
for datom in delocal_system:
connectivity = 0
delocLBO = 0
for neighbour in datom.neighbours:
if neighbour in delocal_system:
connectivity += 1
if connectivity == 1:
# terminus
delocLBO = (NTerminus + 1) / NTerminus
if connectivity > 1:
# node
delocLBO = (connectivity + 1) / connectivity
delocal_dict[datom] = delocLBO
return delocal_dict
[docs]
def get_CN(atom: Atom) -> int:
"""
Determines the coordination number of the input atom.
Parameters:
atom (ccdc.molecule.Atom): Atom object.
Returns:
coord_number (int): Atom's coordination number.
"""
CN = 0
for neighbour in atom.neighbours:
if not neighbour.is_metal:
CN += 1
return CN
[docs]
def valence_e(elmnt: Atom) -> int:
"""
Determines the number of valence electrons of an atom/element.
Parameters:
elmnt (ccdc.molecule.Atom): Atom object.
Returns:
valence (int): Atom's valence electron count.
"""
atom = mendeleev.element(elmnt.atomic_symbol)
if atom.block == "s":
valence = atom.group_id
if atom.block == "p":
valence = atom.group_id - 10
if atom.block == "d":
valence = atom.group_id
if atom.block == "f":
if atom.atomic_number in range(56, 72):
valence = atom.atomic_number - 57 + 3
elif atom.atomic_number in range(88, 104):
valence = atom.atomic_number - 89 + 3
if atom.group_id == 18:
valence = 8
if atom.symbol == "He":
valence = 2
return valence
[docs]
def carbocation_check(atom: Atom) -> Literal["tetrahedral", "trigonal"]:
"""
Check carbocation/carbanion geometry according to bond angles.
Parameters:
atom (ccdc.molecule.Atom): Atom object.
Returns:
Literal["tetrahedral", "trigonal"]: geometry at input atom.
"""
abc = []
# get atom neighbours
for neighbours in atom.neighbours:
if not neighbours.is_metal:
abc.append(neighbours)
# get all three relevant bond angles
angle1 = descriptors.MolecularDescriptors.atom_angle(abc[0], atom, abc[1])
angle2 = descriptors.MolecularDescriptors.atom_angle(abc[0], atom, abc[2])
angle3 = descriptors.MolecularDescriptors.atom_angle(abc[1], atom, abc[2])
# average the angels
AVGangle = abs(angle1 + angle2 + angle3) / 3
# take the difference between the averaged bond angles and
# ideal trigonal planar/tetrahedral bond angles
tet = abs(AVGangle - 109.5)
trig = abs(AVGangle - 120)
if tet < trig:
return "tetrahedral"
if trig < tet:
return "trigonal"
[docs]
def carbene_type(atom: Atom) -> Literal["singlet", "triplet"]:
"""
Distinguishes between singlet and triplet carbenes.
Parameters:
atom (ccdc.molecule.Atom): Atom object(s) suspected of belonging to a
carbene (2-coordinate carbon II).
Returns:
Literal["singlet", "triplet"]: carbene type at input atom.
"""
# get alpha-atoms
alpha = atom.neighbours
alpha_type = []
# get element symbols for alpha atoms
for a in alpha:
if not a.is_metal:
alpha_type.append(a.atomic_symbol)
# if any alpha atom is a heteroatom, return "singlet"
# these are Fischer carbenes
for a in alpha_type:
if not any([(a == "C"), (a == "H")]):
return "singlet"
# if the carbene C is in a heterocycle,
# return "singlet"
# there are Arduengo carbenes (NHCs, CAACs)
if atom.is_cyclic == True:
for ring in atom.rings:
for species in ring.atoms:
if not species.atomic_symbol == "C":
return "singlet"
# for all other carbenes, return "triplet"
# these are Schrock carbenes
return "triplet"
[docs]
def hapticity(atom: Atom, metalsite: list[Atom]) -> bool:
"""
Determines if a ligand binding site possesses hapticity (any n-hapto).
Parameters:
atom (ccdc.molecule.Atom): Atom object.
metalsites (list[ccdc.molecule.Atom]): list of metal sites in the structure
that belong to the asymmetric unit.
Returns:
bool: whether the the input ligand is hapto-.
"""
for atom2 in atom.neighbours:
if not atom2.is_metal:
if any(n2.label == metalsite.label for n2 in atom2.neighbours):
return True
return False
[docs]
def bridging(atom: Atom) -> int:
"""
Determines how many metal atoms the input atom binds to search for
bridging sites.
Parameters:
atom (ccdc.molecule.Atom): binding site Atom object.
Returns:
bridge (int): number of metal atoms bound to the atom.
"""
bridge = 0
for n in atom.neighbours:
if n.is_metal:
bridge += 1
return bridge
[docs]
def iVBS_Oxidation_Contrib(
unique_atoms: list[Atom], rVBO: dict[int, int], dVBO: dict[int, float]
) -> dict[Atom, float]:
"""
Determines the oxidation state contribution of all unique atoms.
Parameters:
unique_atoms (list[ccdc.molecule.Atom]): unique atoms belonging to the
asymmetric unit.
rVBO (dict[int, int]): dictionary with each atom's index in mole.atoms
as keys and VBO (valence bond order) as values.
dVBO (dict[int, float]): dictionary with delocalized bond-possessing atom's
index in mole.atoms as keys and their corresponding
(delocalized-only) VBS.
Returns:
oxi_contrib (dict[ccdc.molecule.Atom, float)]: dictionary with Atom object
as keys and their oxidation state contribution as values.
"""
VBS = 0
CN = 0
valence = 0
oxi_contrib = {}
# for each unique atom
for atom in unique_atoms:
# assign valence-bond-sum
VBS = assign_VBS(atom, rVBO, dVBO)
# determine coordination number
CN = get_CN(atom)
# determine number of valence electrons
valence = valence_e(atom)
# get number of unpaired electrons in the free element
unpaired_e = 4 - abs(4 - valence)
# metals do not contribute:
if atom.is_metal:
oxi_contrib[atom] = 0
# Normal valences:
elif VBS <= (unpaired_e):
oxi_contrib[atom] = unpaired_e - VBS
# Expanded (2e) valences:
elif (VBS > unpaired_e) and (VBS < valence):
diff = VBS - unpaired_e
if diff <= 2:
UPE = valence - unpaired_e - 2
elif diff <= 4:
UPE = valence - unpaired_e - 4
elif diff <= 6:
UPE = valence - unpaired_e - 6
elif diff <= 8:
UPE = valence - unpaired_e - 8
oxi_contrib[atom] = VBS + UPE - valence
elif VBS >= (valence):
oxi_contrib[atom] = VBS - valence
# need to check for 3-coordinate carbocations,
# 3-coordinate carbanions, carbenes, and heavier
# homologues (these are not immediately detectable)
if any(
[
(atom.atomic_symbol == "C"),
(atom.atomic_symbol == "Si"),
(atom.atomic_symbol == "Ge"),
(atom.atomic_symbol == "Pb"),
]
):
if not atom in rVBO:
# 3 coordinate and VBS 3 could be
# carbanion or carbocation
if VBS == 3 and CN == 3:
geom = carbocation_check(atom)
if geom == "trigonal":
oxi_contrib[atom] = -1
if geom == "tetrahedral":
oxi_contrib[atom] = 1
# VBS 2 and 2 coordinate is carbene,
# but singlet or triplet?
if VBS == 2 and CN == 2:
carbene = carbene_type(atom)
if carbene == "singlet":
oxi_contrib[atom] = 0
if carbene == "triplet":
oxi_contrib[atom] = 2
# Nitro groups frequently have both N-O bonds assigned
# as double bonds, giving incorrect VBS of 5
# and oxidation contribution of -2
# this block catches this and applies a fix
if all(
[
(atom.atomic_symbol == "N"),
(VBS == 5 and CN == 3),
]
):
N_sphere1 = atom.neighbours
O_count = 0
for neighbour in N_sphere1:
if neighbour.atomic_symbol == "O":
O_count += 1
geom = carbocation_check(atom)
if O_count == 2 and geom == "trigonal":
oxi_contrib[atom] = 0
return oxi_contrib
[docs]
def redundantAON(AON: dict[Atom, float], molecule: Molecule) -> dict[Atom, float]:
"""
Maps the oxidation contributions of unique atom sites to the redundant atom
sites according to their shared atom labels.
Parameters:
AON (dict[ccdc.molecule.Atom, float]): dictionary with Atom object as keys
and their oxidation state contribution as values for unique
Atom objects.
molecule (ccdc.molecule.Molecule): Molecule object.
Returns:
redAON (dict[ccdc.molecule.Atom, float]): dictionary with Atom object as keys
and their oxidation state contribution as values for all
(including redundant) Atom objects.
"""
redAON = {}
for rsite1 in molecule.atoms:
for usite1 in AON:
redAON[usite1] = AON[usite1]
if rsite1.label == usite1.label:
redAON[rsite1] = AON[usite1]
return redAON
[docs]
def binding_domain(
binding_sites: list[Atom],
AON: dict[Atom, float],
molecule: Molecule,
usites: list[Atom],
) -> dict[Atom, list[Atom]]:
"""
Builds bonding domains within the crystal structure to determine which
metal binding sites (Atom objects directly bonded to a metal) are connected
via conjugation. Function accounts for the inconsistent assignment of
delocalized bonds, by using the bonding domains (see methodology section
for details on the implementation and validation).
Parameters:
binding_sites (list[ccdc.molecule.Atom]): list of binding sites connecting
metal atoms and ligands.
AON (dict[ccdc.molecule.Atom, float]): dictionary with Atom object as keys
and their oxidation state contribution as values for unique
Atom objects.
molecule (ccdc.molecule.Molecule): Molecule object.
uniquesites (list[ccdc.molecule.Atom]): list of unique atoms.
Returns:
sitedomain (dict[ccdc.molecule.Atom, list[ccdc.molecule.Atom]]):
dictionary with Atom object as keys and a list of Atoms
connected through bonding that form a binding domain
as values.
"""
def arom_domains(
site: Atom, usites: list[Atom], aromlist: list[Atom], bondset: list[Bond]
) -> list[Atom]:
"""
Recursively generate aromatic binding domains.
"""
for bond in site.bonds:
bondset.add(bond)
for bond in bondset:
for member in bond.atoms:
if all(
[
(not member in aromlist),
(not member.is_metal),
(any(mbond.bond_type == "Aromatic" for mbond in member.bonds)),
]
):
aromlist.append(member)
for mbond in member.bonds:
bondset.add(mbond)
return arom_domains(site, usites, aromlist, bondset)
# aromlist currently contains non-unique instances of atoms
# this will cause problems further down the line, so correct
for index, member in enumerate(aromlist):
aromlist[index] = usites[member.label]
return aromlist
def deloc_domains(
site: Atom,
usites: list[Atom],
AON: dict[Atom, float],
molecule: Molecule,
deloclist: list[Atom],
bondset: list[Bond],
checked_bonds: list[Bond],
) -> list[Atom]:
"""
Recursively generate delocalised binding domains.
"""
for bond in site.bonds:
if not bond in bondset:
bondset.add(bond)
for bond in bondset:
if not bond in checked_bonds:
for member in bond.atoms:
if all(
[
(not member in deloclist),
(not member.is_metal),
(
not any(
mbond.bond_type == "Aromatic"
for mbond in member.bonds
)
),
(
any(
[
(
len(
molecule.shortest_path_bonds(
site, member
)
)
<= 2
),
(bond.bond_type == "Delocalised"),
(bond.is_conjugated),
(
all(
[
(bond.bond_type == "Single"),
(not AON[member] == 0),
]
)
),
(
all(
[
(
not any(
mbond.bond_type == "Single"
for mbond in member.bonds
)
),
(
not any(
mbond.bond_type
== "Aromatic"
for mbond in member.bonds
)
),
(
not any(
mbond.bond_type
== "Delocalised"
for mbond in member.bonds
)
),
]
)
),
]
)
),
]
):
deloclist.append(member)
for mbond in member.bonds:
bondset.add(mbond)
checked_bonds.add(bond)
return deloc_domains(
site, usites, AON, molecule, deloclist, bondset, checked_bonds
)
# deloclist currently contains non-unique instances of atoms
# this will cause problems further down the line, so correct
for index, member in enumerate(deloclist):
deloclist[index] = usites[member.label]
return deloclist
sitedomain = {}
for site in binding_sites:
if not site.is_metal == True:
if any(sbond.bond_type == "Aromatic" for sbond in site.bonds):
sitedomain[site] = arom_domains(
site, usites, aromlist=[site], bondset=set()
)
if not any(sbond.bond_type == "Aromatic" for sbond in site.bonds):
sitedomain[site] = deloc_domains(
site,
usites,
AON,
molecule,
deloclist=[site],
bondset=set(),
checked_bonds=set(),
)
for site in sitedomain:
olapset = set()
for site2 in sitedomain:
for member in sitedomain[site]:
if member in sitedomain[site2]:
olapset.add(site2)
for olap in olapset:
sitedomain[site] = list(set(sitedomain[site]) | set(sitedomain[olap]))
sitedomain[olap] = sitedomain[site]
return sitedomain
[docs]
def binding_contrib(
binding_sphere: dict[Atom, list[Atom]],
binding_sites: list[Atom],
AON: dict[Atom, float],
) -> dict[Atom, float]:
"""
Redistributes oxidation state contributions within a binding domain.
Equal distribution is assumed across connected binding sites in each domain.
Parameters:
binding_sphere (dict[ccdc.molecule.Atom, list[ccdc.molecule.Atom]]):
dictionary with Atom object as keys and a list of Atoms
connected through bonding that form a binding domain as values.
binding_sites (list[ccdc.molecule.Atom]): list of binding sites connecting
metal atoms and ligands.
AON (dict[ccdc.molecule.Atom, float]): dictionary with Atom object as keys
and their oxidation state contribution as values for unique Atoms.
Returns:
site_contrib (dict[ccdc.molecule.Atom, float]): dictionary with Atom object as keys
and their updated oxidation state contribution as values accounting
for distribution within the binding domain.
"""
site_contrib = {}
for site in binding_sphere:
site_contrib[site] = 0
nbinding = 0
for member in binding_sphere[site]:
if member in binding_sites:
nbinding += 1
site_contrib[site] += AON[member]
site_contrib[site] /= nbinding
return site_contrib
[docs]
def outer_sphere_domain(
uniquesites: list[Atom], binding_domains: dict[Atom, list[Atom]]
) -> list[Atom]:
"""
Identifies sites outside of the binding domains which must be checked for
outer sphere charge contributions.
Parameters:
uniquesites (list[ccdc.molecule.Atom]): list of unique atoms in the structure
belonging to the asymmetric unit.
binding_domains (dict[ccdc.molecule.Atom, list[ccdc.molecule.Atom]]):
dictionary with Atom object as keys and a list of Atoms
connected through bonding that form a binding domain as values.
Returns:
outer_sphere (list[ccdc.molecule.Atom]): list of unique, non-metal atoms
outside of binding domains.
"""
outer_sphere = []
for site in uniquesites:
if all(
[
(
not any(
site in binding_domains[domain] for domain in binding_domains
)
),
(not site.is_metal),
]
):
outer_sphere.append(site)
return outer_sphere
[docs]
def outer_sphere_contrib(outer_sphere: list[Atom], AON: dict[Atom, float]) -> int:
"""
Calculates the total oxidation state contribution of the outer sphere atoms as
the sum of their formal charge/contributions.
Parameters:
outer_sphere (list[ccdc.molecule.Atom]): list of unique, non-metal atoms
outside of binding domains.
AON (dict[ccdc.molecule.Atom, float]): dictionary with Atom object as keys
and their oxidation state contribution as values for unique Atoms.
Returns:
contrib (int): sum of outer sphere charge contributions.
"""
contrib = 0
for site in outer_sphere:
contrib += AON[site]
return contrib
[docs]
def distribute_ONEC(
sONEC: dict[Atom, list[float, float]],
metal_networks: dict[Atom, list[Atom]],
IEs: dict[str, list[float]],
ONP: dict[str, list[float]],
highest_known_ON: dict[str, int],
metal_CN: dict[Molecule, int],
most_probable_ON: dict[str, int],
) -> dict[Atom, list[float, float]]:
"""
Redistributes the oxidation state contributions across all metal atoms in
the structure according to their metal networks (fully local distribution)
& calculates their associated electron counts. Features utilizing electron
counts are minimally implemented at this time.
Parameters:
sONEC (dict[ccdc.molecule.Atom, list[float, float]]): dictionary with
metal Atom object as keys and lists containing the initial
oxidation state and electron count implied by only the
equal splitting of binding domain charges as values.
metal_networks (dict[ccdc.molecule.Atom, list[ccdc.molecule.Atom]]):
dictionary with as metal Atom objects as keys and a list of
other metal Atom objects connected through binding domains/
charged ligands as values. Ignores neutral ligand connections.
IEs (dict[str, list[float]]): dictionary with metal element symbols as keys
and a list of their reported ionization energies as values.
ONP (dict[str, list[float]]): dictionary with metal element symbols as keys
and a list of the probability at the relevant oxidation states
as values.
highest_known_ON (dict[str, int]) : dictionary with metal element symbols as
keys and a their highest known oxidation state as values.
metal_CN (dict[ccdc.molecule.Molecule, int]): dictionary with as metal Atom
objects as keys and their effective coordination number as values.
most_probable_ON (dict[str, int]) : dictionary with metal element symbols as
keys and a their oxidation state with the highest probability
as values.
Returns:
distributed_ONEC (dict[ccdc.molecule.Atom, list[float, float]]): dictionary
with metal Atom object as keys and lists containing their
redistributed oxidation state and electron count as values.
"""
def recursive_distributor_single_network(
iONEC: dict[Atom, list[float, float]],
available_charge: int,
sorted_metals: dict[str, Atom],
IEs: dict[str, list[float]],
ONP: dict[str, list[float]],
highest_known_ON: dict[str, int],
) -> dict[Atom, list[float, float]]:
"""
Distribute network charge according to ionization energy and probability
until all charge is distributed. Performed after tallying available
network charge and sorting network metals by element type.
"""
# initialize working dictionary
dONEC = {}
dONEC = dict(iONEC)
# positive contribution?
if available_charge > 0:
# get list of improbable and improbable next oxidations
prob_metal_type = []
improb_metal_type = []
for metal_type in sorted_metals:
try:
prob = float(
100
* ONP[metal_type][
math.floor(dONEC[sorted_metals[metal_type][0]][0]) + 1
]
)
except IndexError:
prob = 0
if prob >= 1:
prob_metal_type.append(metal_type)
else:
improb_metal_type.append(metal_type)
# if only one metal type has a probable next oxidation state, do that
if len(prob_metal_type) == 1:
lowestMetal = prob_metal_type[0]
# if more than one metal type has a probable next oxidation state,
# determine next lowest ionization energy among probable next
# oxidation states
elif len(prob_metal_type) > 1:
# find lowest next ionization energy
for metal_type in prob_metal_type:
# metal in a negative oxidation state? Use IE = 0.
if dONEC[sorted_metals[metal_type][0]][0] < 0:
currentIE = 0
# metal oxidation state at or higher than highest known? Set IE arbitrarily high.
elif (
dONEC[sorted_metals[metal_type][0]][0]
>= highest_known_ON[metal_type]
):
currentIE = 9999
# otherwise, use the appropriate IE.
else:
currentIE = float(
IEs[metal_type][
math.floor(dONEC[sorted_metals[metal_type][0]][0])
]
)
if not "lowestIE" in locals():
lowestIE = currentIE
lowestMetal = metal_type
else:
if currentIE < lowestIE:
lowestIE = currentIE
lowestMetal = metal_type
# if there is no probable next oxidation state available,
# determine lowest ionization energy among improbable next oxidation states
elif len(prob_metal_type) == 0:
# find lowest next ionization energy
for metal_type in improb_metal_type:
# metal in a negative oxidation state? Use IE = 0.
if dONEC[sorted_metals[metal_type][0]][0] < 0:
currentIE = 0
# metal oxidation state at or higher than highest known? Set IE arbitrarily high.
elif (
dONEC[sorted_metals[metal_type][0]][0]
>= highest_known_ON[metal_type]
):
currentIE = 9999
# otherwise, use the appropriate IE.
else:
currentIE = float(
IEs[metal_type][
math.floor(dONEC[sorted_metals[metal_type][0]][0])
]
)
if not "lowestIE" in locals():
lowestIE = currentIE
lowestMetal = metal_type
else:
if currentIE < lowestIE:
lowestIE = currentIE
lowestMetal = metal_type
# distribute one ionization energy level worth of charge
if available_charge >= len(sorted_metals[lowestMetal]):
for metal in sorted_metals[lowestMetal]:
dONEC[metal][0] += 1
available_charge -= 1
elif available_charge < len(sorted_metals[lowestMetal]):
for metal in sorted_metals[lowestMetal]:
dONEC[metal][0] += available_charge / (
len(sorted_metals[lowestMetal])
)
available_charge = 0
# negative contribution?
if available_charge < 0:
# get list of improbable and improbable next oxidations
prob_metal_type = []
improb_metal_type = []
for metal_type in sorted_metals:
try:
prob = float(
100
* ONP[metal_type][
math.floor(dONEC[sorted_metals[metal_type][0]][0]) - 1
]
)
except IndexError:
prob = 0
if prob >= 1:
prob_metal_type.append(metal_type)
else:
improb_metal_type.append(metal_type)
# if only one metal type has a probable next oxidation state, do that
if len(prob_metal_type) == 1:
highestMetal = prob_metal_type[0]
# if more than one metal type has a probable next oxidation state,
# determine next highest ionization energy among probable next
# oxidation states
elif len(prob_metal_type) > 1:
for metal_type in prob_metal_type:
# metal in a negative oxidation state? Use IE = 0.
if dONEC[sorted_metals[metal_type][0]][0] <= 0:
currentIE = 0
# metal oxidation state higher than highest known? Set IE arbitrarily high.
elif (
dONEC[sorted_metals[metal_type][0]][0]
> highest_known_ON[metal_type]
):
currentIE = 9999
# otherwise, use the appropriate IE.
else:
if not any(
[
(
math.isclose(
(dONEC[sorted_metals[metal_type][0]][0] % 1),
0,
abs_tol=0.0001,
)
),
(
math.isclose(
(dONEC[sorted_metals[metal_type][0]][0] % 1),
1,
abs_tol=0.0001,
)
),
]
):
currentIE = float(
IEs[metal_type][
math.floor(dONEC[sorted_metals[metal_type][0]][0])
]
)
else:
currentIE = float(
IEs[metal_type][
math.floor(dONEC[sorted_metals[metal_type][0]][0])
- 1
]
)
if not "highestIE" in locals():
highestIE = currentIE
highestMetal = metal_type
else:
if currentIE > highestIE:
highestIE = currentIE
highestMetal = metal_type
# if no probable next oxidation states are available,
# determine next highest ionization energy among probable next
# oxidation states
elif len(improb_metal_type) > 0:
for metal_type in improb_metal_type:
# metal in a negative oxidation state? Use IE = 0.
if dONEC[sorted_metals[metal_type][0]][0] <= 0:
currentIE = 0
# metal oxidation state higher than highest known? Set IE arbitrarily high.
elif (
dONEC[sorted_metals[metal_type][0]][0]
> highest_known_ON[metal_type]
):
currentIE = 9999
# otherwise, use the appropriate IE.
else:
if not any(
[
(
math.isclose(
(dONEC[sorted_metals[metal_type][0]][0] % 1),
0,
abs_tol=0.0001,
)
),
(
math.isclose(
(dONEC[sorted_metals[metal_type][0]][0] % 1),
1,
abs_tol=0.0001,
)
),
]
):
currentIE = float(
IEs[metal_type][
math.floor(dONEC[sorted_metals[metal_type][0]][0])
]
)
else:
currentIE = float(
IEs[metal_type][
math.floor(dONEC[sorted_metals[metal_type][0]][0])
- 1
]
)
if not "highestIE" in locals():
highestIE = currentIE
highestMetal = metal_type
else:
if currentIE > highestIE:
highestIE = currentIE
highestMetal = metal_type
# distribute one ionization energy level worth of charge
if (-1 * available_charge) >= len(sorted_metals[highestMetal]):
for metal in sorted_metals[highestMetal]:
dONEC[metal][0] -= 1
available_charge += 1
elif (-1 * available_charge) < len(sorted_metals[highestMetal]):
for metal in sorted_metals[highestMetal]:
dONEC[metal][0] += available_charge / (
len(sorted_metals[highestMetal])
)
available_charge = 0
# if all charge has been distributed, we're done, otherwise, roll again
if available_charge == 0:
return dONEC
else:
return recursive_distributor_single_network(
dONEC, available_charge, sorted_metals, IEs, ONP, highest_known_ON
)
# operate on each network individually
distributed_ONEC = {}
for network in metal_networks:
# sort metals by element type
sorted_metals = {}
for metal in metal_networks[network]:
sorted_metals[metal.atomic_symbol] = []
for metal in metal_networks[network]:
sorted_metals[metal.atomic_symbol].append(metal)
# tally up network charge to be distributed
# and initialize metals to most probable ON
# (adjust network charge accordingly)
network_charge = 0
for metal in metal_networks[network]:
network_charge += sONEC[metal][0]
distributed_ONEC[metal] = [most_probable_ON[metal.atomic_symbol]]
network_charge -= most_probable_ON[metal.atomic_symbol]
distributed_ONEC[metal].append(int(sONEC[metal][1]))
# if the most probable oxidation distribution has already balanced the charge, we're done
# if not, recursively distribute network charge according to ionization energy
if not (math.isclose(network_charge, 0, abs_tol=0.0001)):
distributed_ONEC = recursive_distributor_single_network(
distributed_ONEC,
network_charge,
sorted_metals,
IEs,
ONP,
highest_known_ON,
)
# finally, adjust electron count to new oxidation state (OiL RiG)
for metal in metal_networks[network]:
distributed_ONEC[metal][1] = (
valence_e(metal) + (2 * metal_CN[metal]) - distributed_ONEC[metal][0]
)
return distributed_ONEC
[docs]
def distribute_OuterSphere(
sONEC: dict[Atom, list[float, float]],
outer_sphere_charge: int,
IEs: dict[Atom, list[Atom]],
ONP: dict[str, list[float]],
highest_known_ON: dict[str, int],
metal_CN: dict[Molecule, int],
) -> dict[Atom, list[float, float]]:
"""
Redistributes the oxidation state contributions across all metal atoms in
the structure according to the outer sphere charge contribution (partially
local distribution) & calculates their associated electron counts.
Features utilizing electron counts are minimally implemented at this time.
Parameters:
sONEC (dict[ccdc.molecule.Atom, list[float, float]]): dictionary with
metal Atom object as keys and lists containing the initial
oxidation state and electron count implied by only the equal
splitting of binding domain charges as values.
outer_sphere_charge (int): sum of outer sphere charge contributions.
IEs (dict[str, list(float)]): dictionary with metal element symbols as keys
and a list of their reported ionization energies as values.
ONP (dict[str, list(float)]): dictionary with metal element symbols as keys
and a list of the probability at the relevant oxidation
states as values.
highest_known_ON (dict[str, int]) : dictionary with metal element symbols
as keys and a their highest known oxidation state as values.
metal_CN (dict[ccdc.molecule.Molecule, int]): dictionary with as metal Atom
objects as keys and their effective coordination number
as values.
Returns:
distributed_ONEC (dict[ccdc.molecule.Atom, list[float, float]]): dictionary
with metal Atom object as keys and lists containing their
redistributed oxidation state and electron count as values.
"""
def recursive_distributor(
iONEC: dict[Atom, list[float, float]],
available_charge: int,
IEs: dict[str, list[float]],
ONP: dict[str, list[float]],
highest_known_ON: dict[str, int],
) -> dict[Atom, list[float, float]]:
"""
Distribute network charge according to ionization energy and highest
allowable oxidation state until all charge is distributed. Performed after
tallying available network charge and sorting network metals by element type.
"""
# initialize working dictionary
dONEC = {}
dONEC = dict(iONEC)
# positive contribution?
if available_charge > 0:
# get list of probable and improbable next oxidations
prob_metals = []
improb_metals = []
for metal in dONEC:
try:
prob = float(
100 * ONP[metal.atomic_symbol][math.floor(dONEC[metal][0]) + 1]
)
except IndexError:
prob = 0
if prob >= 1:
prob_metals.append(metal)
else:
improb_metals.append(metal)
if len(prob_metals) == 1:
lowestMetals = prob_metals
elif len(prob_metals) > 1:
for metal in prob_metals:
# metal in a negative oxidation state? Use IE = 0.
if dONEC[metal][0] < 0:
currentIE = 0
# metal oxidation state at or higher than highest known? Set IE arbitrarily high.
elif dONEC[metal][0] >= highest_known_ON[metal.atomic_symbol]:
currentIE = 9999
# otherwise, use the appropriate IE.
else:
currentIE = float(
IEs[metal.atomic_symbol][math.floor(dONEC[metal][0])]
)
if not "lowestIE" in locals():
lowestIE = currentIE
lowestMetals = [metal]
else:
if currentIE == lowestIE:
lowestMetals.append(metal)
if currentIE < lowestIE:
lowestIE = currentIE
lowestMetals = [metal]
elif len(prob_metals) == 0:
for metal in improb_metals:
# metal in a negative oxidation state? Use IE = 0.
if dONEC[metal][0] < 0:
currentIE = 0
# metal oxidation state at or higher than highest known? Set IE arbitrarily high.
elif dONEC[metal][0] >= highest_known_ON[metal.atomic_symbol]:
currentIE = 9999
# otherwise, use the appropriate IE.
else:
currentIE = float(
IEs[metal.atomic_symbol][math.floor(dONEC[metal][0])]
)
if not "lowestIE" in locals():
lowestIE = currentIE
lowestMetals = [metal]
else:
if currentIE == lowestIE:
lowestMetals.append(metal)
if currentIE < lowestIE:
lowestIE = currentIE
lowestMetals = [metal]
# distribute one ionization energy level worth of charge
if available_charge >= len(lowestMetals):
for metal in lowestMetals:
dONEC[metal][0] += 1
available_charge -= 1
elif available_charge < len(lowestMetals):
for metal in lowestMetals:
dONEC[metal][0] += available_charge / (len(lowestMetals))
available_charge = 0
# negative contribution?
if available_charge < 0:
# get list of improbable and improbable next oxidations
prob_metals = []
improb_metals = []
for metal in dONEC:
try:
prob = float(
100 * ONP[metal.atomic_symbol][math.floor(dONEC[metal][0]) - 1]
)
except IndexError:
prob = 0
if prob >= 1:
prob_metals.append(metal)
else:
improb_metals.append(metal)
if len(prob_metals) == 1:
highestMetals = prob_metals
elif len(prob_metals) > 1:
for metal in prob_metals:
# metal in a negative oxidation state? Use IE = 0.
if dONEC[metal][0] <= 0:
currentIE = 0
# metal oxidation state higher than highest known? Set IE arbitrarily high.
elif dONEC[metal][0] > highest_known_ON[metal.atomic_symbol]:
currentIE = 9999
# otherwise, use the appropriate IE.
else:
if not any(
[
(
math.isclose(
(dONEC[metal][0] % 1), 0, abs_tol=0.0001
)
),
(
math.isclose(
(dONEC[metal][0] % 1), 1, abs_tol=0.0001
)
),
]
):
currentIE = float(
IEs[metal.atomic_symbol][math.floor(dONEC[metal][0])]
)
else:
currentIE = float(
IEs[metal.atomic_symbol][
math.floor(dONEC[metal][0]) - 1
]
)
if not "highestIE" in locals():
highestIE = currentIE
highestMetals = [metal]
else:
if currentIE == highestIE:
highestMetals.append(metal)
if currentIE > highestIE:
highestIE = currentIE
highestMetals = [metal]
else:
for metal in improb_metals:
# metal in a negative oxidation state? Use IE = 0.
if dONEC[metal][0] <= 0:
currentIE = 0
# metal oxidation state higher than highest known? Set IE arbitrarily high.
elif dONEC[metal][0] > highest_known_ON[metal.atomic_symbol]:
currentIE = 9999
# otherwise, use the appropriate IE.
else:
if not any(
[
(
math.isclose(
(dONEC[metal][0] % 1), 0, abs_tol=0.0001
)
),
(
math.isclose(
(dONEC[metal][0] % 1), 1, abs_tol=0.0001
)
),
]
):
currentIE = float(
IEs[metal.atomic_symbol][math.floor(dONEC[metal][0])]
)
else:
currentIE = float(
IEs[metal.atomic_symbol][
math.floor(dONEC[metal][0]) - 1
]
)
if not "highestIE" in locals():
highestIE = currentIE
highestMetals = [metal]
else:
if currentIE == highestIE:
highestMetals.append(metal)
if currentIE > highestIE:
highestIE = currentIE
highestMetals = [metal]
# distribute one ionization energy level worth of charge
if (-1 * available_charge) >= len(highestMetals):
for metal in highestMetals:
dONEC[metal][0] -= 1
available_charge += 1
elif (-1 * available_charge) < len(highestMetals):
for metal in highestMetals:
dONEC[metal][0] += available_charge / (len(highestMetals))
available_charge = 0
# if all charge has been distributed, we're done, otherwise, roll again
if available_charge == 0:
return dONEC
else:
return recursive_distributor(
dONEC, available_charge, IEs, ONP, highest_known_ON
)
if outer_sphere_charge == 0:
return sONEC
distributed_ONEC = {}
# initialize dictionary for charge distribution
for metal in sONEC:
distributed_ONEC[metal] = [sONEC[metal][0]]
distributed_ONEC[metal].append(int(sONEC[metal][1]))
# recursively distribute network charge according to ionization energy
distributed_ONEC = recursive_distributor(
distributed_ONEC, outer_sphere_charge, IEs, ONP, highest_known_ON
)
# finally, adjust electron count to new oxidation state (OiL RiG)
for metal in sONEC:
distributed_ONEC[metal][1] = (
valence_e(metal) + (2 * metal_CN[metal]) - distributed_ONEC[metal][0]
)
return distributed_ONEC
[docs]
def global_charge_distribution(
metalONdict: dict[Atom, list[float, float]],
IEs: dict[str, list[float]],
ONP: dict[str, list[float]],
highest_known_ON: dict[str, int],
metal_CN: dict[Molecule, int],
most_probable_ON: dict[str, int],
) -> dict[Atom, list[float, float]]:
"""
Redistributes the oxidation state contributions across all metal atoms in
the structure according to full/global shating (fully delocalized distribution)
& calculates their associated electron counts. Features utilizing electron
counts are minimally implemented at this time.
Parameters:
metalONdict (dict[ccdc.molecule.Atom, list[float, float]]): dictionary with
metal Atom object as keys and lists containing the initial
oxidation state and electron count implied by only the
equal splitting of binding domain charges as values.
IEs (dict[str, list[float]]): dictionary with metal element symbols as keys
and a list of their reported ionization energies as values.
ONP (dict[str, list[float]]): dictionary with metal element symbols as keys
and a list of the probability at the relevant oxidation
states as values.
highest_known_ON (dict[str, int]) : dictionary with metal element symbols
as keys and a their highest known oxidation state as values.
metal_CN (dict[ccdc.molecule.Molecule, int]): dictionary with as metal Atom
objects as keys and their effective coordination number
as values.
most_probable_ON (dict[str, int]) : dictionary with metal element symbols
as keys and a their oxidation state with the highest
probability as values.
Returns:
global_ONEC (dict[ccdc.molecule.Atom, list[float, float]]): dictionary with
metal Atom object as keys and lists containing their
redistributed oxidation state and electron count as values.
"""
global_ONEC = {}
def recursive_distributor_global(
iONEC: dict[Atom, list[float, float]],
available_charge: int,
sorted_metals: dict[str, Atom],
IEs: dict[str, list[float]],
ONP: dict[str, list[float]],
highest_known_ON: dict[str, int],
) -> dict[Atom, list[float, float]]:
"""
Distribute network charge according to ionization energy and probability
until all charge is distributed. Performed after tallying available network
charge and sorting network metals by element type.
"""
# initialize working dictionary
dONEC = {}
dONEC = dict(iONEC)
# positive contribution?
if available_charge > 0:
# get list of improbable and improbable next oxidations
prob_metal_type = []
improb_metal_type = []
for metal_type in sorted_metals:
try:
prob = float(
100
* ONP[metal_type][
math.floor(dONEC[sorted_metals[metal_type][0]][0]) + 1
]
)
except IndexError:
prob = 0
if prob >= 1:
prob_metal_type.append(metal_type)
else:
improb_metal_type.append(metal_type)
# if only one metal type has a probable next oxidation state, do that
if len(prob_metal_type) == 1:
lowestMetal = prob_metal_type[0]
# if more than one metal type has a probable next oxidation state,
# determine next lowest ionization energy among probable next
# oxidation states
elif len(prob_metal_type) > 1:
# find lowest next ionization energy
for metal_type in prob_metal_type:
# metal in a negative oxidation state? Use IE = 0.
if dONEC[sorted_metals[metal_type][0]][0] < 0:
currentIE = 0
# metal oxidation state at or higher than highest known? Set IE arbitrarily high.
elif (
dONEC[sorted_metals[metal_type][0]][0]
>= highest_known_ON[metal_type]
):
currentIE = 9999
# otherwise, use the appropriate IE.
else:
currentIE = float(
IEs[metal_type][
math.floor(dONEC[sorted_metals[metal_type][0]][0])
]
)
if not "lowestIE" in locals():
lowestIE = currentIE
lowestMetal = metal_type
else:
if currentIE < lowestIE:
lowestIE = currentIE
lowestMetal = metal_type
# if there is no probable next oxidation state available,
# determine lowest ionization energy among improbable next oxidation states
elif len(prob_metal_type) == 0:
# find lowest next ionization energy
for metal_type in improb_metal_type:
# metal in a negative oxidation state? Use IE = 0.
if dONEC[sorted_metals[metal_type][0]][0] < 0:
currentIE = 0
# metal oxidation state at or higher than highest known? Set IE arbitrarily high.
elif (
dONEC[sorted_metals[metal_type][0]][0]
>= highest_known_ON[metal_type]
):
currentIE = 9999
# otherwise, use the appropriate IE.
else:
currentIE = float(
IEs[metal_type][
math.floor(dONEC[sorted_metals[metal_type][0]][0])
]
)
if not "lowestIE" in locals():
lowestIE = currentIE
lowestMetal = metal_type
else:
if currentIE < lowestIE:
lowestIE = currentIE
lowestMetal = metal_type
# distribute one ionization energy level worth of charge
if available_charge >= len(sorted_metals[lowestMetal]):
for metal in sorted_metals[lowestMetal]:
dONEC[metal][0] += 1
available_charge -= 1
elif available_charge < len(sorted_metals[lowestMetal]):
for metal in sorted_metals[lowestMetal]:
dONEC[metal][0] += available_charge / (
len(sorted_metals[lowestMetal])
)
available_charge = 0
# negative contribution?
if available_charge < 0:
# get list of improbable and improbable next oxidations
prob_metal_type = []
improb_metal_type = []
for metal_type in sorted_metals:
try:
prob = float(
100
* ONP[metal_type][
math.floor(dONEC[sorted_metals[metal_type][0]][0]) - 1
]
)
except IndexError:
prob = 0
if prob >= 1:
prob_metal_type.append(metal_type)
else:
improb_metal_type.append(metal_type)
# if only one metal type has a probable next oxidation state, do that
if len(prob_metal_type) == 1:
highestMetal = prob_metal_type[0]
# if more than one metal type has a probable next oxidation state,
# determine next highest ionization energy among probable next
# oxidation states
elif len(prob_metal_type) > 1:
for metal_type in prob_metal_type:
# metal in a negative oxidation state? Use IE = 0.
if dONEC[sorted_metals[metal_type][0]][0] <= 0:
currentIE = 0
# metal oxidation state higher than highest known? Set IE arbitrarily high.
elif (
dONEC[sorted_metals[metal_type][0]][0]
> highest_known_ON[metal_type]
):
currentIE = 9999
# otherwise, use the appropriate IE.
else:
if not any(
[
(
math.isclose(
(dONEC[sorted_metals[metal_type][0]][0] % 1),
0,
abs_tol=0.0001,
)
),
(
math.isclose(
(dONEC[sorted_metals[metal_type][0]][0] % 1),
1,
abs_tol=0.0001,
)
),
]
):
currentIE = float(
IEs[metal_type][
math.floor(dONEC[sorted_metals[metal_type][0]][0])
]
)
else:
currentIE = float(
IEs[metal_type][
math.floor(dONEC[sorted_metals[metal_type][0]][0])
- 1
]
)
if not "highestIE" in locals():
highestIE = currentIE
highestMetal = metal_type
else:
if currentIE > highestIE:
highestIE = currentIE
highestMetal = metal_type
# if no probable next oxidation states are available,
# determine next highest ionization energy among probable next
# oxidation states
elif len(improb_metal_type) > 0:
for metal_type in improb_metal_type:
# metal in a negative oxidation state? Use IE = 0.
if dONEC[sorted_metals[metal_type][0]][0] <= 0:
currentIE = 0
# metal oxidation state higher than highest known? Set IE arbitrarily high.
elif (
dONEC[sorted_metals[metal_type][0]][0]
> highest_known_ON[metal_type]
):
currentIE = 9999
# otherwise, use the appropriate IE.
else:
if not any(
[
(
math.isclose(
(dONEC[sorted_metals[metal_type][0]][0] % 1),
0,
abs_tol=0.0001,
)
),
(
math.isclose(
(dONEC[sorted_metals[metal_type][0]][0] % 1),
1,
abs_tol=0.0001,
)
),
]
):
currentIE = float(
IEs[metal_type][
math.floor(dONEC[sorted_metals[metal_type][0]][0])
]
)
else:
currentIE = float(
IEs[metal_type][
math.floor(dONEC[sorted_metals[metal_type][0]][0])
- 1
]
)
if not "highestIE" in locals():
highestIE = currentIE
highestMetal = metal_type
else:
if currentIE > highestIE:
highestIE = currentIE
highestMetal = metal_type
# distribute one ionization energy level worth of charge
if (-1 * available_charge) >= len(sorted_metals[highestMetal]):
for metal in sorted_metals[highestMetal]:
dONEC[metal][0] -= 1
available_charge += 1
elif (-1 * available_charge) < len(sorted_metals[highestMetal]):
for metal in sorted_metals[highestMetal]:
dONEC[metal][0] += available_charge / (
len(sorted_metals[highestMetal])
)
available_charge = 0
# if all charge has been distributed, we're done, otherwise, roll again
if available_charge == 0:
return dONEC
else:
return recursive_distributor_global(
dONEC, available_charge, sorted_metals, IEs, ONP, highest_known_ON
)
# sort metals by element type
sorted_metals = {}
for metal in metalONdict:
sorted_metals[metal.atomic_symbol] = []
for metal in metalONdict:
sorted_metals[metal.atomic_symbol].append(metal)
# tally up the global charge to be distributed
# and initialize global ON to most probable for all metals
global_charge = 0
for metal in metalONdict:
global_charge += metalONdict[metal][0]
global_ONEC[metal] = [most_probable_ON[metal.atomic_symbol]]
global_charge -= most_probable_ON[metal.atomic_symbol]
global_ONEC[metal].append(int(metalONdict[metal][1]))
# recursively distribute network charge according to ionization energy
if math.isclose(global_charge, 0, abs_tol=0.0001):
distributed_ONEC = global_ONEC
else:
distributed_ONEC = recursive_distributor_global(
global_ONEC, global_charge, sorted_metals, IEs, ONP, highest_known_ON
)
# finally, adjust electron count to new oxidation state (OiL RiG)
for metal in metalONdict:
global_ONEC[metal][1] = (
valence_e(metal) + (2 * metal_CN[metal]) - distributed_ONEC[metal][0]
)
return global_ONEC
[docs]
def KnownONs() -> dict[str, list[int]]:
"""
Reads in the known oxidation states for each metal element.
Returns:
KONs (dict[str, list(int)]) : dictionary with metal element symbols as keys
and a list of their known oxidation states as values.
"""
data_str = """
H -1 1
He
Li 1
Be 0 1 2
B -5 -1 0 1 2 3
C -4 -3 -2 -1 0 1 2 3 4
N -3 -2 -1 1 2 3 4 5
O -2 -1 0 1 2
F -1 0
Ne
Na -1 1
Mg 0 1 2
Al -2 -1 1 2 3
Si -4 -3 -2 -1 0 1 2 3 4
P -3 -2 -1 0 1 2 3 4 5
S -2 -1 0 1 2 3 4 5 6
Cl -1 1 2 3 4 5 6 7
Ar 0
K -1 1
Ca 0 1 2
Sc 0 1 2 3
Ti -2 -1 0 1 2 3 4
V -3 1 0 1 2 3 4 5
Cr -4 -2 -1 0 1 2 3 4 5 6
Mn -3 -2 -1 0 1 2 3 4 5 6 7
Fe -4 -2 -1 0 1 2 3 4 5 6 7
Co -3 -1 0 1 2 3 4 5
Ni -2 -1 0 1 2 3 4
Cu -2 0 1 2 3 4
Zn -2 0 1 2
Ga -5 -4 -3 -2 -1 1 2 3
Ge -3 -2 -1 0 1 2 3 4
As -3 -2 -1 0 1 2 3 4 5
Se -2 -1 1 2 3 4 5 6
Br -1 1 3 4 5 7
Kr 0 1 2
Rb -1 1
Sr 0 1 2
Y 0 1 2 3
Zr -2 0 1 2 3 4
Nb -3 -1 0 1 2 3 4 5
Mo -4 -2 -1 0 1 2 3 4 5 6
Tc -3 -1 0 1 2 3 4 5 6 7
Ru -4 -2 0 1 2 3 4 5 6 7 8
Rh -3 -1 0 1 2 3 4 5 6
Pd 0 1 2 3 4
Ag -2 -1 1 2 3
Cd -2 1 2
In -5 -2 -2 1 2 3
Sn -4 -3 -2 -1 0 1 2 3 4
Sb -3 -2 -1 0 1 2 3 4 5
Te -2 -1 1 3 4 5 6 7
I -1 1 3 4 5 6 7
Xe 0 2 4 6 8
Cs -1 1
Ba 0 1 2
La 0 1 2 3
Ce 0 2 3 4
Pr 0 1 2 3 4 5
Nd 0 2 3 4
Pm 2 3
Sm 0 2 3
Eu 0 2 3
Gd 0 1 2 3
Tb 0 1 2 3 4
Dy 0 2 3 4
Ho 0 2 3
Er 0 2 3
Tm 0 2 3
Yb 0 2 3
Lu 0 2 3
Hf -2 0 1 2 3 4
Ta -3 -1 0 1 2 3 4 5
W -4 -2 -1 0 1 2 3 4 5 6
Re -3 -1 0 1 2 3 4 5 6 7
Os -4 -2 -1 0 1 2 3 4 5 6 7 8
Ir -3 -1 0 1 2 3 4 5 6 7 8 9
Pt -3 -2 -1 0 1 2 3 4 5 6
Au -3 -2 -1 0 1 2 3 5
Hg -2 1 2
Tl -5 -2 -1 1 2 3
Pb -4 -2 -1 1 2 3 4
Bi -3 -2 -1 1 2 3 4 5
Po -2 2 4 5 6
At -1 1 3 5 7
Rn 2 6
Fr 1
Ra 2
Ac 2 3
Th 1 2 3 4
Pa 3 4 5
U 1 2 3 4 5 6
Np 2 3 4 5 6 7
Pu 2 3 4 5 6 7 8
Am 2 3 4 5 6 7
Cm 3 4 5 6
Bk 2 3 4 5
Cf 2 3 4 5
Es 2 3 4
Fm 2 3
Md 2 3
No 2 3
Lr 3
Rf 4
Db 5
Sg 0 6
Bh 7
Hs 8
Mt
Ds
Rg
Cn 2
Nh
Fl
Mc
Lv
Ts
Og
""".strip()
KONs: dict[str, list[int]] = {}
for line in data_str.splitlines():
parts = line.split()
elem = parts[0]
# only parse lines with at least one oxidation state
if len(parts) > 1:
# map all remaining tokens to integers
states = [int(x) for x in parts[1:]]
KONs[elem] = states
else:
# no data: use empty list (or choose a default if you prefer)
KONs[elem] = []
return KONs
[docs]
def IonizationEnergies() -> dict[str, list[float]]:
"""
Reads in the reported ionization energies for each metal element.
Returns:
KIEs (dict[str, list[float]]): dictionary with metal element symbols as keys
and a list of their reported ionization energies as values.
"""
data_str = """
0 H 13.5984346
0 He 24.58738901
1 He 54.41776549
0 Li 5.391714996
1 Li 75.640097
2 Li 122.4543591
0 Be 9.322699
1 Be 18.21115
2 Be 153.896205
3 Be 217.7185861
0 B 8.298019
1 B 25.15483
2 B 37.93059
3 B 259.3715
4 B 340.2260229
0 C 11.260288
1 C 24.383154
2 C 47.88778
3 C 64.49352
4 C 392.090518
5 C 489.993198
0 N 14.53413
1 N 29.60125
2 N 47.4453
3 N 77.4735
4 N 97.8901
5 N 552.06733
6 N 667.046121
0 O 13.618055
1 O 35.12112
2 O 54.93554
3 O 77.4135
4 O 113.899
5 O 138.1189
6 O 739.32683
7 O 871.409883
0 F 17.42282
1 F 34.97081
2 F 62.70798
3 F 87.175
4 F 114.249
5 F 157.16311
6 F 185.1868
7 F 953.89805
8 F 1103.11748
0 Ne 21.564541
1 Ne 40.96297
2 Ne 63.4233
3 Ne 97.19
4 Ne 126.247
5 Ne 157.934
6 Ne 207.271
7 Ne 239.097
8 Ne 1195.80784
9 Ne 1362.19916
0 Na 5.13907696
1 Na 47.28636
2 Na 71.62
3 Na 98.936
4 Na 138.404
5 Na 172.23
6 Na 208.504
7 Na 264.192
8 Na 299.856
9 Na 1465.134502
0 Mg 7.646236
1 Mg 15.035271
2 Mg 80.1436
3 Mg 109.2654
4 Mg 141.33
5 Mg 186.76
6 Mg 225.02
7 Mg 265.924
8 Mg 327.99
9 Mg 367.489
0 Al 5.985769
1 Al 18.82855
2 Al 28.447642
3 Al 119.9924
4 Al 153.8252
5 Al 190.49
6 Al 241.76
7 Al 284.64
8 Al 330.21
9 Al 398.65
0 Si 8.15168
1 Si 16.34585
2 Si 33.493
3 Si 45.14179
4 Si 166.767
5 Si 205.279
6 Si 246.57
7 Si 303.59
8 Si 351.28
9 Si 401.38
0 P 10.486686
1 P 19.76949
2 P 30.20264
3 P 51.44387
4 P 65.02511
5 P 220.43
6 P 263.57
7 P 309.6
8 P 372.31
9 P 424.4
0 S 10.36001
1 S 23.33788
2 S 34.86
3 S 47.222
4 S 72.5945
5 S 88.0529
6 S 280.954
7 S 328.794
8 S 379.84
9 S 447.7
0 Cl 12.967633
1 Cl 23.81364
2 Cl 39.8
3 Cl 53.24
4 Cl 67.68
5 Cl 96.94
6 Cl 114.2013
7 Cl 348.306
8 Cl 400.851
9 Cl 456.7
0 Ar 15.7596119
1 Ar 27.62967
2 Ar 40.735
3 Ar 59.58
4 Ar 74.84
5 Ar 91.29
6 Ar 124.41
7 Ar 143.4567
8 Ar 422.6
9 Ar 479.76
0 K 4.34066373
1 K 31.625
2 K 45.8031
3 K 60.917
4 K 82.66
5 K 99.44
6 K 117.56
7 K 154.87
8 K 175.8174
9 K 503.67
0 Ca 6.11315547
1 Ca 11.871719
2 Ca 50.91316
3 Ca 67.2732
4 Ca 84.34
5 Ca 108.78
6 Ca 127.21
7 Ca 147.24
8 Ca 188.54
9 Ca 211.275
0 Sc 6.56149
1 Sc 12.79977
2 Sc 24.756839
3 Sc 73.4894
4 Sc 91.95
5 Sc 110.68
6 Sc 137.99
7 Sc 158.08
8 Sc 180.03
9 Sc 225.18
0 Ti 6.82812
1 Ti 13.5755
2 Ti 27.49171
3 Ti 43.26717
4 Ti 99.299
5 Ti 119.533
6 Ti 140.68
7 Ti 170.5
8 Ti 192.1
9 Ti 215.92
0 V 6.746187
1 V 14.634
2 V 29.3111
3 V 46.709
4 V 65.28165
5 V 128.125
6 V 150.72
7 V 173.55
8 V 206
9 V 230.5
0 Cr 6.76651
1 Cr 16.486305
2 Cr 30.959
3 Cr 49.16
4 Cr 69.46
5 Cr 90.6349
6 Cr 160.29
7 Cr 184.76
8 Cr 209.5
9 Cr 244.5
0 Mn 7.434038
1 Mn 15.63999
2 Mn 33.668
3 Mn 51.21
4 Mn 72.41
5 Mn 95.604
6 Mn 119.203
7 Mn 195.5
8 Mn 221.89
9 Mn 248.6
0 Fe 7.9024681
1 Fe 16.19921
2 Fe 30.651
3 Fe 54.91
4 Fe 75
5 Fe 98.985
6 Fe 124.976
7 Fe 151.06
8 Fe 233.6
9 Fe 262.1
0 Co 7.88101
1 Co 17.0844
2 Co 33.5
3 Co 51.27
4 Co 79.5
5 Co 102
6 Co 128.9
7 Co 157.8
8 Co 186.14
9 Co 275.4
0 Ni 7.639878
1 Ni 18.168838
2 Ni 35.187
3 Ni 54.92
4 Ni 76.06
5 Ni 108
6 Ni 132
7 Ni 162
8 Ni 193.2
9 Ni 224.7
0 Cu 7.72638
1 Cu 20.29239
2 Cu 36.841
3 Cu 57.38
4 Cu 79.8
5 Cu 103
6 Cu 139
7 Cu 166
8 Cu 198
9 Cu 232.2
0 Zn 9.394197
1 Zn 17.96439
2 Zn 39.7233
3 Zn 59.573
4 Zn 82.6
5 Zn 108
6 Zn 133.9
7 Zn 173.9
8 Zn 203
9 Zn 238
0 Ga 5.999302
1 Ga 20.51514
2 Ga 30.72576
3 Ga 63.241
4 Ga 86.01
5 Ga 112.7
6 Ga 140.8
7 Ga 169.9
8 Ga 211
9 Ga 244
0 Ge 7.899435
1 Ge 15.93461
2 Ge 34.0576
3 Ge 45.7155
4 Ge 90.5
5 Ge 115.9
6 Ge 144.9
7 Ge 176.4
8 Ge 212.5
9 Ge 252.1
0 As 9.78855
1 As 18.5892
2 As 28.349
3 As 50.15
4 As 62.77
5 As 121.19
6 As 147
7 As 180
8 As 213
9 As 247
0 Se 9.752392
1 Se 21.196
2 Se 31.697
3 Se 42.947
4 Se 68.3
5 Se 81.83
6 Se 155.327
7 Se 184
8 Se 219
9 Se 255
0 Br 11.81381
1 Br 21.591
2 Br 34.871
3 Br 47.782
4 Br 59.595
5 Br 87.39
6 Br 103.03
7 Br 192.61
8 Br 224
9 Br 261
0 Kr 13.9996055
1 Kr 24.35984
2 Kr 35.838
3 Kr 50.85
4 Kr 64.69
5 Kr 78.49
6 Kr 109.13
7 Kr 125.802
8 Kr 233
9 Kr 268
0 Rb 4.1771281
1 Rb 27.28954
2 Rb 39.247
3 Rb 52.2
4 Rb 68.44
5 Rb 82.9
6 Rb 98.67
7 Rb 132.79
8 Rb 150.628
9 Rb 277.12
0 Sr 5.69486745
1 Sr 11.0302765
2 Sr 42.88353
3 Sr 56.28
4 Sr 70.7
5 Sr 88
6 Sr 104
7 Sr 121.21
8 Sr 158.33
9 Sr 177.3
0 Y 6.21726
1 Y 12.2236
2 Y 20.52441
3 Y 60.6072
4 Y 75.35
5 Y 91.39
6 Y 110.02
7 Y 128.12
8 Y 145.64
9 Y 185.7
0 Zr 6.634126
1 Zr 13.13
2 Zr 23.17
3 Zr 34.41836
4 Zr 80.348
5 Zr 96.38
6 Zr 112
7 Zr 133.7
8 Zr 153
9 Zr 172.02
0 Nb 6.75885
1 Nb 14.32
2 Nb 25.04
3 Nb 37.611
4 Nb 50.5728
5 Nb 102.069
6 Nb 119.1
7 Nb 136
8 Nb 159.2
9 Nb 180
0 Mo 7.09243
1 Mo 16.16
2 Mo 27.13
3 Mo 40.33
4 Mo 54.417
5 Mo 68.82704
6 Mo 125.638
7 Mo 143.6
8 Mo 164.12
9 Mo 186.3
0 Tc 7.11938
1 Tc 15.26
2 Tc 29.55
3 Tc 41
4 Tc 57
5 Tc 72
6 Tc 88
7 Tc 150
8 Tc 169
9 Tc 189.9
0 Ru 7.3605
1 Ru 16.76
2 Ru 28.47
3 Ru 45
4 Ru 59
5 Ru 76
6 Ru 93
7 Ru 110
8 Ru 178.41
9 Ru 198
0 Rh 7.4589
1 Rh 18.08
2 Rh 31.06
3 Rh 42
4 Rh 63
5 Rh 80
6 Rh 97
7 Rh 115.1
8 Rh 135
9 Rh 207.51
0 Pd 8.336839
1 Pd 19.43
2 Pd 32.93
3 Pd 46
4 Pd 61
5 Pd 84.1
6 Pd 101
7 Pd 120
8 Pd 141
9 Pd 159.9
0 Ag 7.576234
1 Ag 21.4844
2 Ag 34.8
3 Ag 49
4 Ag 65
5 Ag 82
6 Ag 106
7 Ag 125
8 Ag 145.1
9 Ag 167
0 Cd 8.99382
1 Cd 16.908313
2 Cd 37.468
3 Cd 51
4 Cd 67.9
5 Cd 87
6 Cd 105
7 Cd 130.1
8 Cd 150
9 Cd 173
0 In 5.7863558
1 In 18.87041
2 In 28.04415
3 In 55.45
4 In 69.3
5 In 90
6 In 109
7 In 130.1
8 In 156
9 In 178
0 Sn 7.343918
1 Sn 14.63307
2 Sn 30.506
3 Sn 40.74
4 Sn 77.03
5 Sn 94
6 Sn 112.9
7 Sn 135
8 Sn 156
9 Sn 184
0 Sb 8.608389
1 Sb 16.626
2 Sb 25.3235
3 Sb 43.804
4 Sb 55
5 Sb 99.51
6 Sb 117
7 Sb 139
8 Sb 162
9 Sb 185
0 Tb 9.009808
1 Tb 18.6
2 Tb 27.84
3 Tb 37.4155
4 Tb 59.3
5 Tb 69.1
6 Tb 124.2
7 Tb 143
8 Tb 167
9 Tb 191.1
0 I 10.45126
1 I 19.13126
2 I 29.57
3 I 40.357
4 I 51.52
5 I 74.4
6 I 87.61
7 I 150.81
8 I 171
9 I 197
0 Xe 12.1298437
1 Xe 20.975
2 Xe 31.05
3 Xe 42.2
4 Xe 54.1
5 Xe 66.703
6 Xe 91.6
7 Xe 105.9778
8 Xe 179.84
9 Xe 202
0 Cs 3.893905727
1 Cs 23.15745
2 Cs 33.195
3 Cs 43
4 Cs 56
5 Cs 69.1
6 Cs 82.9
7 Cs 110.1
8 Cs 125.61
9 Cs 213.3
0 Ba 5.2116646
1 Ba 10.003826
2 Ba 35.8438
3 Ba 47
4 Ba 58
5 Ba 71
6 Ba 86
7 Ba 101
8 Ba 130.5
9 Ba 146.52
0 La 5.5769
1 La 11.18496
2 La 19.1773
3 La 49.95
4 La 61.6
5 La 74
6 La 88
7 La 105
8 La 119
9 La 151.4
0 Ce 5.5386
1 Ce 10.956
2 Ce 20.1974
3 Ce 36.906
4 Ce 65.55
5 Ce 77.6
6 Ce 91
7 Ce 106
8 Ce 125
9 Ce 140
0 Pr 5.4702
1 Pr 10.631
2 Pr 21.6237
3 Pr 38.981
4 Pr 57.53
5 Pr 82
6 Pr 97
7 Pr 112
8 Pr 131
9 Pr 148
0 Nd 5.525
1 Nd 10.783
2 Nd 22.09
3 Nd 40.6
4 Nd 60
5 Nd 84
6 Nd 99
7 Nd 114
8 Nd 136
9 Nd 152
0 Pm 5.58187
1 Pm 10.938
2 Pm 22.44
3 Pm 41.17
4 Pm 61.7
5 Pm 85
6 Pm 101
7 Pm 116
8 Pm 138
9 Pm 155
0 Sm 5.643722
1 Sm 11.078
2 Sm 23.55
3 Sm 41.64
4 Sm 62.7
5 Sm 87
6 Sm 103
7 Sm 118
8 Sm 141
9 Sm 158
0 Eu 5.670385
1 Eu 11.24
2 Eu 24.84
3 Eu 42.94
4 Eu 63.2
5 Eu 89
6 Eu 105
7 Eu 120
8 Eu 144
9 Eu 161
0 Gd 6.1498
1 Gd 12.076
2 Gd 20.54
3 Gd 44.44
4 Gd 64.8
5 Gd 89
6 Gd 106
7 Gd 123
8 Gd 144
9 Gd 165
0 Tb 5.8638
1 Tb 11.513
2 Tb 21.82
3 Tb 39.33
4 Tb 66.5
5 Tb 90
6 Tb 108
7 Tb 125
8 Tb 143
9 Tb 168
0 Dy 5.93905
1 Dy 11.647
2 Dy 22.89
3 Dy 41.23
4 Dy 62.1
5 Dy 93
6 Dy 110
7 Dy 127
8 Dy 152
9 Dy 170
0 Ho 6.0215
1 Ho 11.781
2 Ho 22.79
3 Ho 42.52
4 Ho 63.9
5 Ho 95
6 Ho 112
7 Ho 129
8 Ho 155
9 Ho 173
0 Er 6.1077
1 Er 11.916
2 Er 22.7
3 Er 42.42
4 Er 65.1
5 Er 96
6 Er 114
7 Er 131
8 Er 158
9 Er 177
0 Tm 6.18431
1 Tm 12.065
2 Tm 23.66
3 Tm 42.41
4 Tm 65.4
5 Tm 98
6 Tm 116
7 Tm 133
8 Tm 160
9 Tm 180
0 Yb 6.25416
1 Yb 12.179185
2 Yb 25.053
3 Yb 43.61
4 Yb 65.6
5 Yb 99
6 Yb 117
7 Yb 135
8 Yb 163
9 Yb 182
0 Lu 5.425871
1 Lu 14.13
2 Lu 20.9594
3 Lu 45.249
4 Lu 66.8
5 Lu 98
6 Lu 117
7 Lu 136
8 Lu 159
9 Lu 185
0 Hf 6.82507
1 Hf 14.61
2 Hf 22.55
3 Hf 33.37
4 Hf 68.37
5 Hf 98
6 Hf 118
7 Hf 137
8 Hf 157
9 Hf 187
0 Ta 7.549571
1 Ta 16.2
2 Ta 23.1
3 Ta 35
4 Ta 48.272
5 Ta 94.01
6 Ta 119
7 Ta 139
8 Ta 159
9 Ta 180
0 W 7.86403
1 W 16.37
2 W 26
3 W 38.2
4 W 51.6
5 W 64.77
6 W 122.01
7 W 141.2
8 W 160.2
9 W 179
0 Re 7.83352
1 Re 16.6
2 Re 27
3 Re 39.1
4 Re 51.9
5 Re 67
6 Re 82.71
7 Re 144.4
8 Re 165
9 Re 187
0 Os 8.43823
1 Os 17
2 Os 25
3 Os 41
4 Os 55
5 Os 70.1
6 Os 85.1
7 Os 102.02
8 Os 168.7
9 Os 190
0 Ir 8.96702
1 Ir 17
2 Ir 28
3 Ir 40
4 Ir 57
5 Ir 72
6 Ir 89
7 Ir 105
8 Ir 122.7
9 Ir 194.8
0 Pt 8.95883
1 Pt 18.56
2 Pt 29
3 Pt 43
4 Pt 56
5 Pt 75
6 Pt 91
7 Pt 109
8 Pt 126
9 Pt 144.9
0 Au 9.225554
1 Au 20.203
2 Au 30
3 Au 45
4 Au 60
5 Au 74
6 Au 94
7 Au 112
8 Au 130.1
9 Au 149
0 Hg 10.437504
1 Hg 18.75688
2 Hg 34.49
3 Hg 48.55
4 Hg 61.2
5 Hg 76.6
6 Hg 93
7 Hg 113.9
8 Hg 134
9 Hg 153
0 Tl 6.1082873
1 Tl 20.4283
2 Tl 29.852
3 Tl 51.14
4 Tl 62.6
5 Tl 80
6 Tl 97.9
7 Tl 116
8 Tl 135
9 Tl 158
0 Pb 7.4166799
1 Pb 15.032499
2 Pb 31.9373
3 Pb 42.33256
4 Pb 68.8
5 Pb 82.9
6 Pb 100.1
7 Pb 120
8 Pb 138
9 Pb 158
0 Bi 7.285516
1 Bi 16.703
2 Bi 25.57075
3 Bi 45.37
4 Bi 54.856
5 Bi 88.4
6 Bi 103
7 Bi 122
8 Bi 143
9 Bi 161.1
0 Po 8.41807
1 Po 19.3
2 Po 27.3
3 Po 36
4 Po 57
5 Po 69.1
6 Po 108
7 Po 125
8 Po 146.1
9 Po 166
0 At 9.31751
1 At 17.88
2 At 26.58
3 At 39.65
4 At 50.39
5 At 72
6 At 85.1
7 At 130.1
8 At 149
9 At 169
0 Rn 10.7485
1 Rn 21.4
2 Rn 29.4
3 Rn 36.9
4 Rn 52.9
5 Rn 64
6 Rn 88
7 Rn 102
8 Rn 154
9 Rn 173.9
0 Fr 4.0727411
1 Fr 22.4
2 Fr 33.5
3 Fr 39.1
4 Fr 50
5 Fr 67
6 Fr 80
7 Fr 106
8 Fr 120
9 Fr 179
0 Ra 5.2784239
1 Ra 10.14718
2 Ra 31
3 Ra 41
4 Ra 52.9
5 Ra 64
6 Ra 82
7 Ra 97
8 Ra 124
9 Ra 140
0 Ac 5.380226
1 Ac 11.75
2 Ac 17.431
3 Ac 44.8
4 Ac 55
5 Ac 67
6 Ac 79
7 Ac 98.9
8 Ac 113.9
9 Ac 143.9
0 Th 6.3067
1 Th 12.1
2 Th 18.32
3 Th 28.648
4 Th 58
5 Th 69.1
6 Th 82
7 Th 95
8 Th 118
9 Th 133
0 Pa 5.89
1 Pa 11.9
2 Pa 18.6
3 Pa 30.9
4 Pa 44.3
5 Pa 72
6 Pa 85.1
7 Pa 98.9
8 Pa 111
9 Pa 137
0 U 6.19405
1 U 11.6
2 U 19.8
3 U 36.7
4 U 46
5 U 62
6 U 89
7 U 101
8 U 116
9 U 128.9
0 Np 6.26554
1 Np 11.5
2 Np 19.7
3 Np 33.8
4 Np 48
5 Np 65
6 Np 92
7 Np 107
8 Np 121
9 Np 136
0 Pu 6.02576
1 Pu 11.5
2 Pu 21.1
3 Pu 35
4 Pu 49
5 Pu 80
6 Pu 95
7 Pu 109
8 Pu 124
9 Pu 139
0 Am 5.97381
1 Am 11.7
2 Am 21.7
3 Am 36.8
4 Am 50
5 Am 67.9
6 Am 95
7 Am 110
8 Am 125
9 Am 141
0 Cm 5.99141
1 Cm 12.4
2 Cm 20.1
3 Cm 37.7
4 Cm 51
5 Cm 69.1
6 Cm 97
7 Cm 112
8 Cm 128
9 Cm 144
0 Bk 6.19785
1 Bk 11.9
2 Bk 21.6
3 Bk 36
4 Bk 56
5 Bk 70.1
6 Bk 90
7 Bk 114
8 Bk 130
9 Bk 147
0 Cf 6.28166
1 Cf 12
2 Cf 22.4
3 Cf 37.7
4 Cf 51.9
5 Cf 75
6 Cf 91
7 Cf 112.9
8 Cf 133
9 Cf 152
0 Es 6.36758
1 Es 12.2
2 Es 22.7
3 Es 38.8
4 Es 54.1
5 Es 71
6 Es 97
7 Es 112.9
8 Es 137
9 Es 157
0 Fm 6.5
1 Fm 12.4
2 Fm 23.2
3 Fm 39.3
4 Fm 55
5 Fm 74
6 Fm 93
7 Fm 120
8 Fm 136
9 Fm 162
0 Md 6.58
1 Md 12.4
2 Md 24.3
3 Md 40
4 Md 54.1
5 Md 76
6 Md 96
7 Md 115.1
8 Md 143.9
9 Md 162
0 No 6.62621
1 No 12.93
2 No 25.8
3 No 41.5
4 No 60
5 No 74
6 No 97
7 No 119
8 No 140
9 No 170
0 Lr 4.96
1 Lr 14.54
2 Lr 21.8
3 Lr 43.6
4 Lr 56
5 Lr 80
6 Lr 96
7 Lr 121
8 Lr 143
9 Lr 165
0 Rf 6.02
1 Rf 14.35
2 Rf 23.84
3 Rf 31.87
4 Rf 64
5 Rf 77
6 Rf 102
7 Rf 119
8 Rf 146.1
9 Rf 169
0 Db 6.8
1 Db 14
2 Db 23.1
3 Db 33
4 Db 43
5 Db 86
6 Db 98.9
7 Db 126
8 Db 145.1
9 Db 172
0 Sg 7.8
1 Sg 17.1
2 Sg 25.8
3 Sg 35.5
4 Sg 47.2
5 Sg 59.3
6 Sg 109
7 Sg 122
8 Sg 152
9 Sg 170
0 Bh 7.7
1 Bh 17.5
2 Bh 26.7
3 Bh 37.3
4 Bh 49
5 Bh 62.1
6 Bh 74.9
7 Bh 134
8 Bh 148
9 Bh 178
0 Hs 7.6
1 Hs 18.2
2 Hs 29.3
3 Hs 37.7
4 Hs 51.2
5 Hs 64
6 Hs 78.1
7 Hs 91.7
8 Hs 159.9
9 Hs 173.9
4 Mt 50
5 Mt
6 Mt
7 Mt 94
8 Mt 109
9 Mt 187
5 Ds 65
6 Ds
7 Ds
8 Ds 112.9
9 Ds 128
""".strip()
KIEs: dict[str, list[float]] = {}
for line in data_str.splitlines():
parts = line.split()
if len(parts) < 3:
continue # skip blank or malformed lines
_, elem, energy = parts
KIEs.setdefault(elem, []).append(float(energy))
return KIEs
[docs]
def HighestKnownONs() -> dict[str, int]:
"""
Determines the highest known oxidation states for each metal element.
Returns:
HKONs (dict[str, int]) : dictionary with metal element symbols as keys
and their highest known oxidation state as values.
"""
data_str = """
H -1 1
He
Li 1
Be 0 1 2
B -5 -1 0 1 2 3
C -4 -3 -2 -1 0 1 2 3 4
N -3 -2 -1 1 2 3 4 5
O -2 -1 0 1 2
F -1 0
Ne
Na -1 1
Mg 0 1 2
Al -2 -1 1 2 3
Si -4 -3 -2 -1 0 1 2 3 4
P -3 -2 -1 0 1 2 3 4 5
S -2 -1 0 1 2 3 4 5 6
Cl -1 1 2 3 4 5 6 7
Ar 0
K -1 1
Ca 0 1 2
Sc 0 1 2 3
Ti -2 -1 0 1 2 3 4
V -3 1 0 1 2 3 4 5
Cr -4 -2 -1 0 1 2 3 4 5 6
Mn -3 -2 -1 0 1 2 3 4 5 6 7
Fe -4 -2 -1 0 1 2 3 4 5 6 7
Co -3 -1 0 1 2 3 4 5
Ni -2 -1 0 1 2 3 4
Cu -2 0 1 2 3 4
Zn -2 0 1 2
Ga -5 -4 -3 -2 -1 1 2 3
Ge -3 -2 -1 0 1 2 3 4
As -3 -2 -1 0 1 2 3 4 5
Se -2 -1 1 2 3 4 5 6
Br -1 1 3 4 5 7
Kr 0 1 2
Rb -1 1
Sr 0 1 2
Y 0 1 2 3
Zr -2 0 1 2 3 4
Nb -3 -1 0 1 2 3 4 5
Mo -4 -2 -1 0 1 2 3 4 5 6
Tc -3 -1 0 1 2 3 4 5 6 7
Ru -4 -2 0 1 2 3 4 5 6 7 8
Rh -3 -1 0 1 2 3 4 5 6
Pd 0 1 2 3 4
Ag -2 -1 1 2 3
Cd -2 1 2
In -5 -2 -2 1 2 3
Sn -4 -3 -2 -1 0 1 2 3 4
Sb -3 -2 -1 0 1 2 3 4 5
Te -2 -1 1 3 4 5 6 7
I -1 1 3 4 5 6 7
Xe 0 2 4 6 8
Cs -1 1
Ba 0 1 2
La 0 1 2 3
Ce 0 2 3 4
Pr 0 1 2 3 4 5
Nd 0 2 3 4
Pm 2 3
Sm 0 2 3
Eu 0 2 3
Gd 0 1 2 3
Tb 0 1 2 3 4
Dy 0 2 3 4
Ho 0 2 3
Er 0 2 3
Tm 0 2 3
Yb 0 2 3
Lu 0 2 3
Hf -2 0 1 2 3 4
Ta -3 -1 0 1 2 3 4 5
W -4 -2 -1 0 1 2 3 4 5 6
Re -3 -1 0 1 2 3 4 5 6 7
Os -4 -2 -1 0 1 2 3 4 5 6 7 8
Ir -3 -1 0 1 2 3 4 5 6 7 8 9
Pt -3 -2 -1 0 1 2 3 4 5 6
Au -3 -2 -1 0 1 2 3 5
Hg -2 1 2
Tl -5 -2 -1 1 2 3
Pb -4 -2 -1 1 2 3 4
Bi -3 -2 -1 1 2 3 4 5
Po -2 2 4 5 6
At -1 1 3 5 7
Rn 2 6
Fr 1
Ra 2
Ac 2 3
Th 1 2 3 4
Pa 3 4 5
U 1 2 3 4 5 6
Np 2 3 4 5 6 7
Pu 2 3 4 5 6 7 8
Am 2 3 4 5 6 7
Cm 3 4 5 6
Bk 2 3 4 5
Cf 2 3 4 5
Es 2 3 4
Fm 2 3
Md 2 3
No 2 3
Lr 3
Rf 4
Db 5
Sg 0 6
Bh 7
Hs 8
Mt
Ds
Rg
Cn 2
Nh
Fl
Mc
Lv
Ts
Og
""".strip()
HKONs: dict[str, int] = {}
for line in data_str.splitlines():
parts = line.split()
elem = parts[0]
# ignore entries with no numeric states
if len(parts) > 1:
# convert all remaining tokens to ints, take the max
states = [int(x) for x in parts[1:]]
HKONs[elem] = max(states)
else:
# if no oxidation state listed, set to 0 or remove entirely
HKONs[elem] = 0
return HKONs
[docs]
def ONprobabilities() -> dict[str, list[float]]:
"""
Reads in the probability of each oxidation state for all metal elements.
Approximate probabilities are assessed by their relative frequency of
occurence in the CSD metadata.
Returns:
ONP (dict[str, list[float]]): dictionary with metal element symbols as keys
and a list of the probability at the relevant oxidation
states as values.
"""
data_str = """
Li 0 0.993162393 0.003418803 0.001709402 0.001709402 0 0 0 0 0 0
Be 0.023529412 0 0.976470588 0 0 0 0 0 0 0 0
Na 0 0.998743719 0 0.001256281 0 0 0 0 0 0 0
Mg 0.002141328 0.016416845 0.981441827 0 0 0 0 0 0 0 0
Al 0 0.020121951 0.016463415 0.962804878 0.000609756 0 0 0 0 0 0
K 0 0.998137803 0 0.001862197 0 0 0 0 0 0 0
Ca 0 0.001176471 0.998823529 0 0 0 0 0 0 0 0
Sc 0 0.003656307 0.010968921 0.985374771 0 0 0 0 0 0 0
Ti 0.004095004 0.00020475 0.028460278 0.128992629 0.838247338 0 0 0 0 0 0
V 0.002768166 0.007612457 0.037197232 0.133564014 0.391868512 0.426816609 0.00017301 0 0 0 0
Cr 0.160139589 0.017836371 0.138037999 0.612252811 0.014928267 0.018805739 0.037999225 0 0 0 0
Mn 0.001919631 0.044599437 0.64864346 0.260621961 0.039928334 0.003519324 0.000319939 0.000447914 0 0 0
Fe 0.016919249 0.011083465 0.545216014 0.42053834 0.0059715 0.000226193 4.52E-05 0 0 0 0
Co 0.002508241 0.021678372 0.655403469 0.31990827 0.00035832 0.000143328 0 0 0 0 0
Ni 0.020168417 0.010846588 0.935890772 0.031222927 0.001871296 0 0 0 0 0 0
Cu 6.78E-05 0.236206984 0.75977341 0.003951765 0 0 0 0 0 0 0
Zn 7.78E-05 0.000778362 0.999104884 3.89E-05 0 0 0 0 0 0 0
Ga 0.003392706 0.100932994 0.066157761 0.828668363 0.000848176 0 0 0 0 0 0
Rb 0 0.9875 0.0125 0 0 0 0 0 0 0 0
Sr 0 0 1 0 0 0 0 0 0 0 0
Y 0.000608273 0.000608273 0.001824818 0.996350365 0.000608273 0 0 0 0 0 0
Zr 0.001476378 0 0.030019685 0.039370079 0.928149606 0 0.000984252 0 0 0 0
Nb 0.006417112 0.03315508 0.02459893 0.100534759 0.19144385 0.643850267 0 0 0 0 0
Mo 0.067368705 0.007830431 0.106115836 0.052922911 0.132847307 0.184150128 0.448764682 0 0 0 0
Tc 0 0.1488 0.0592 0.2256 0.056 0.4288 0.04 0.0416 0 0 0
Ru 0.01519372 0.011901747 0.819870009 0.119355111 0.027348696 0.000759686 0.005571031 0 0 0 0
Rh 0.002670673 0.455850442 0.141545652 0.397095643 0.00166917 0.001168419 0 0 0 0 0
Pd 0.018729923 0.009834445 0.961255251 0.004101804 0.005979738 9.88E-05 0 0 0 0 0
Ag 0.000163733 0.988784282 0.006631191 0.004420794 0 0 0 0 0 0 0
Cd 0 0.000421977 0.998818466 0.000759558 0 0 0 0 0 0 0
In 0 0.056062581 0.024771838 0.91916558 0 0 0 0 0 0 0
Sn 0.00222187 0.00095223 0.194730995 0.001110935 0.800666561 0.000158705 0.000158705 0 0 0 0
Cs 0 0.986486486 0.013513514 0 0 0 0 0 0 0 0
Ba 0 0 0.998236332 0.001763668 0 0 0 0 0 0 0
Hf 0 0 0.015473888 0.025145068 0.959381044 0 0 0 0 0 0
Ta 0.003726708 0.016149068 0.01242236 0.068322981 0.116770186 0.782608696 0 0 0 0 0
W 0.128280743 0.003538779 0.131229726 0.027720436 0.146564435 0.130934827 0.431731053 0 0 0 0
Re 0.001812324 0.316149819 0.044905356 0.124043496 0.065243657 0.364679823 0.017921869 0.065243657 0 0 0
Os 0.009817672 0.005610098 0.460729313 0.140953717 0.204067321 0.016830295 0.152173913 0.000701262 0.00911641 0 0
Ir 0.001902045 0.201854494 0.027104137 0.752258678 0.011650024 0.005230623 0 0 0 0 0
Pt 0.017449918 0.005606652 0.844021671 0.01870984 0.114085927 0 0.000125992 0 0 0 0
Au 0.001029204 0.731892448 0.015824006 0.251125691 0.00012865 0 0 0 0 0 0
Hg 0.00251004 0.013805221 0.983433735 0.000251004 0 0 0 0 0 0 0
Tl 0.001342282 0.616107383 0.013422819 0.369127517 0 0 0 0 0 0 0
Pb 0 0.000545852 0.964792576 0.000272926 0.034388646 0 0 0 0 0 0
Bi 0 0 0 1 0 0 0 0 0 0 0
Po 0 0 0 0 0 0 0 0 0 0 0
Fr 0 0 0 0 0 0 0 0 0 0 0
Ra 0 0 0 0 0 0 0 0 0 0 0
Rf 0 0 0 0 0 0 0 0 0 0 0
Db 0 0 0 0 0 0 0 0 0 0 0
Sg 0 0 0 0 0 0 0 0 0 0 0
Bh 0 0 0 0 0 0 0 0 0 0 0
Hs 0 0 0 0 0 0 0 0 0 0 0
Mt 0 0 0 0 0 0 0 0 0 0 0
Ds 0 0 0 0 0 0 0 0 0 0 0
Rg 0 0 0 0 0 0 0 0 0 0 0
Cn 0 0 0 0 0 0 0 0 0 0 0
Nh 0 0 0 0 0 0 0 0 0 0 0
Fl 0 0 0 0 0 0 0 0 0 0 0
Mc 0 0 0 0 0 0 0 0 0 0 0
Lv 0 0 0 0 0 0 0 0 0 0 0
La 0 0 0.002631579 0.996842105 0.000526316 0 0 0 0 0 0
Ce 0 0.000666223 0.005996003 0.742838108 0.250499667 0 0 0 0 0 0
Pr 0 0 0.002636204 0.993848858 0.003514938 0 0 0 0 0 0
Nd 0 0 0.005083884 0.994916116 0 0 0 0 0 0 0
Pm 0 0 0 1 0 0 0 0 0 0 0
Sm 0 0 0.153056235 0.846943765 0 0 0 0 0 0 0
Eu 0 0 0.081716037 0.918283963 0 0 0 0 0 0 0
Gd 0.000430663 0 0.005167959 0.994401378 0 0 0 0 0 0 0
Tb 0 0 0.132275132 0.866717057 0.001007811 0 0 0 0 0 0
Dy 0 0 0.007676768 0.992323232 0 0 0 0 0 0 0
Ho 0.001218027 0 0.007308161 0.990255786 0.001218027 0 0 0 0 0 0
Er 0 0 0.099644793 0.899233502 0.001121705 0 0 0 0 0 0
Tm 0 0 0.112903226 0.887096774 0 0 0 0 0 0 0
Yb 0 0 0.268343816 0.731656184 0 0 0 0 0 0 0
Lu 0 0 0.003663004 0.996336996 0 0 0 0 0 0 0
Ac 0 0 0 0 0.333333333 0.666666667 0 0 0 0 0
Th 0 0 0.008896797 0.019572954 0.96975089 0 0.001779359 0 0 0 0
Pa 0 0 0 0 0.333333333 0.666666667 0 0 0 0 0
U 0.000308452 0 0.003701419 0.098704503 0.333127699 0.064466379 0.499691548 0 0 0 0
Np 0 0 0.004048583 0.04048583 0.246963563 0.453441296 0.251012146 0.004048583 0 0 0
Pu 0 0 0 0.231292517 0.510204082 0.040816327 0.217687075 0 0 0 0
Am 0 0 0 0.857142857 0 0.057142857 0.085714286 0 0 0 0
Cm 0 0 0 1 0 0 0 0 0 0 0
Bk 0 0 0 1 0 0 0 0 0 0 0
Cf 0 0 0 1 0 0 0 0 0 0 0
Es 0 0 0 0 0 0 0 0 0 0 0
Fm 0 0 0 0 0 0 0 0 0 0 0
Md 0 0 0 0 0 0 0 0 0 0 0
No 0 0 0 0 0 0 0 0 0 0 0
Lr 0 0 0 0 0 0 0 0 0 0 0
Ge 0.00867052 0.012524085 0.512524085 0.002890173 0.462427746 0 0.000963391 0 0 0 0
As 0 0.021459227 0 0.811158798 0 0.167381974 0 0 0 0 0
Se 0 0.023809524 0.619047619 0 0.325396825 0 0.031746032 0 0 0 0
Sb 0 0.002742732 0 0.541963796 0.000548546 0.45419638 0.000548546 0 0 0 0
Te 0 0.001194743 0.338112306 0.001194743 0.628434886 0 0.031063321 0 0 0 0
""".strip()
ONP: dict[str, list[float]] = {}
for line in data_str.splitlines():
parts = line.split() # tab/space 都能分
element = parts[0]
probs = [float(x) for x in parts[1:]]
ONP[element] = probs
return ONP
[docs]
def ONmostprob(iONP: dict[str, list[float]]) -> dict[str, int]:
"""
Determines the highest probability oxidation state for each metal element.
These values are utilized during charge distribution routines.
Parameters:
iONP (dict[str, list[float]]): dictionary with metal element symbols as
keys and a list of the probability at the relevant oxidation
states as values.
Returns:
MPOS (dict[str, int]) : dictionary with metal element symbols as keys and
their oxidation state with the highest probability as values.
"""
MPOS = {}
for metal in iONP:
hprob = 0
for index, prob in enumerate(iONP[metal]):
if prob >= hprob:
hprob = prob
MPOS[metal] = index
return MPOS
[docs]
def getCN(lsites: dict[Atom, list[Atom]]) -> dict[Molecule, int]:
"""
Determines the highest probability oxidation state for each metal element.
These values are utilized during charge distribution routines.
Parameters:
lsites (dict[Atom, list[Atom]]): dictionary with metal Atom object as keys
and the list of ligand atoms which bind them as values.
Returns:
CNdict (dict[Molecule, int]): dictionary with as metal Atom objects as
keys and effective coordination number as values.
"""
CNdict = {}
for metal in lsites:
CNdict[metal] = 0
for ligand in lsites[metal]:
if hapticity(ligand, metal):
CNdict[metal] += 0.5
else:
CNdict[metal] += 1
for neighbour in metal.neighbours:
if neighbour.is_metal:
CNdict[metal] += 0.5
return CNdict
KnownON = KnownONs()
KnownIE = IonizationEnergies()
HighestKnownON = HighestKnownONs()
ONProb = ONprobabilities()
HighestProbON = ONmostprob(ONProb)
[docs]
def check(file_path: str) -> dict[str, list[str]]:
"""
Process a single .cif or .mol2 file and return a dict mapping each metal site
label to the list of flag-names that are not 'GOOD'.
"""
# Read structure
try:
if file_path.endswith(".cif"):
cif = readentry(file_path)
mol = cif.molecule
asymmol = cif.asymmetric_unit_molecule
else:
mol = readSBU(file_path)
asymmol = mol
except RuntimeError:
return {}
# Identify sites
uniquesites = get_unique_sites(mol, asymmol)
metalsites = get_metal_sites(uniquesites)
if not metalsites:
return {}
# Bonding and oxidation contributions
dVBO = delocalisedLBO(mol)
rVBO = ringVBOs(mol)
AON = iVBS_Oxidation_Contrib(uniquesites, rVBO, dVBO)
rAON = redundantAON(AON, mol)
ligand_sites = get_ligand_sites(metalsites, uniquesites)
binding_sites = get_binding_sites(metalsites, uniquesites)
binding_sphere = binding_domain(binding_sites, rAON, mol, {u.label: u for u in uniquesites})
bindingAON = binding_contrib(binding_sphere, binding_sites, rAON)
connected = get_metal_networks(ligand_sites, binding_sphere, bindingAON)
mCN = getCN(ligand_sites)
# ONE-center inner
ONEC_inner = {}
for metal in ligand_sites:
oxidation = 0
val = valence_e(metal)
for lig in ligand_sites[metal]:
LBO = bindingAON[lig]
Nbr = bridging(lig)
Ox = LBO / Nbr
oxidation += Ox
if Ox >= 2:
mCN[metal] += 1
if Ox >= 3:
mCN[metal] += 1
ONEC_inner[metal] = [oxidation, val + 2*mCN[metal] - oxidation]
# Network distribution
noint_balance = distribute_ONEC(ONEC_inner, connected, KnownIE, ONProb, HighestKnownON, mCN, HighestProbON)
# Outer-sphere
OSC = outer_sphere_contrib(outer_sphere_domain(uniquesites, binding_sphere), rAON)
noint_outer = distribute_OuterSphere(noint_balance, OSC, KnownIE, ONProb, HighestKnownON, mCN)
# Prepare flag dictionaries
flags = {name: {} for name in [
"impossible", "unknown", "zero_valence", "noint_flag",
"low_prob_1", "low_prob_2", "low_prob_3", "low_prob_multi"
]}
# Evaluate flags
for metal in noint_outer:
lbl = metal.label
flags["impossible"][lbl] = "GOOD"
flags["unknown"][lbl] = "GOOD"
flags["zero_valence"][lbl] = "GOOD"
flags["noint_flag"][lbl] = "GOOD"
flags["low_prob_1"][lbl] = "GOOD"
flags["low_prob_2"][lbl] = "GOOD"
flags["low_prob_3"][lbl] = "GOOD"
flags["low_prob_multi"][lbl] = "GOOD"
val = valence_e(metal)
os_val = noint_outer[metal][0]
if os_val > val:
flags["impossible"][lbl] = "BAD"
if os_val == 0:
flags["zero_valence"][lbl] = "BAD"
if not any(math.isclose(os_val, i, abs_tol=0.5) for i in KnownON[metal.atomic_symbol]):
flags["unknown"][lbl] = "BAD"
# low-prob checks
if math.isclose(os_val % 1, 0, abs_tol=1e-4):
prob = ONProb[metal.atomic_symbol][round(os_val)] if round(os_val) < len(ONProb[metal.atomic_symbol]) else 0
if prob < 0.01:
flags["low_prob_1"][lbl] = "LIKELY_BAD"
if prob < 0.001:
flags["low_prob_2"][lbl] = "BAD"
if prob < 0.0001:
flags["low_prob_3"][lbl] = "BAD"
return flags
import glob
import os
import json
from concurrent.futures import ProcessPoolExecutor, as_completed
from tqdm import tqdm
import pandas as pd
[docs]
def worker(cif_path):
name = os.path.basename(cif_path).replace(".cif", "")
try:
result = check(cif_path)
return {
"name": name,
"impossible": result.get("impossible", "unknown"),
"unknown": result.get("unknown", "unknown"),
"zero_valence": result.get("zero_valence", "unknown"),
"noint_flag": result.get("noint_flag", "unknown"),
"low_prob_1": result.get("low_prob_1", "unknown"),
"low_prob_2": result.get("low_prob_2", "unknown"),
"low_prob_3": result.get("low_prob_3", "unknown"),
"low_prob_multi": result.get("low_prob_multi", "unknown"),
}
except Exception:
return {
"name": name,
"impossible": "error",
"unknown": "error",
"zero_valence": "error",
"noint_flag": "error",
"low_prob_1": "error",
"low_prob_2": "error",
"low_prob_3": "error",
"low_prob_multi": "error",
}
[docs]
def run(cif_folder, save_path="./", max_workers=64):
os.makedirs(save_path, exist_ok=True)
all_cifs = glob.glob(cif_folder+"/*.cif")
with ProcessPoolExecutor(max_workers=max_workers) as exe:
futures = {exe.submit(worker, cif): cif for cif in all_cifs}
for fut in tqdm(as_completed(futures), total=len(futures), desc="Checking CIFs"):
result_dict = fut.result()
name = result_dict["name"]
out_path = os.path.join(save_path, f"{name}.json")
with open(out_path, "w", encoding="utf-8") as jf:
json.dump(result_dict, jf, ensure_ascii=False, indent=2)
all_data = []
columns = ["impossible", "unknown", "zero_valence", "noint_flag",
"low_prob_1", "low_prob_2", "low_prob_3", "low_prob_multi"]
for file in tqdm(glob.glob(save_path+"/*json")):
name = file.split("/")[-1].replace(".json", "")
data_each = [name, False, False,
False, False, False,
False, False, False]
with open(file, "r") as f:
data = json.load(f)
i=0
for col in columns:
for metal in data[col]:
if data[col][metal] != "GOOD":
data_each[i+1] = True
i+=1
all_data.append(data_each)
data = pd.DataFrame(all_data, columns=["id", "impossible", "unknown",
"zero_valence", "noint_flag",
"low_prob_1", "low_prob_2",
"low_prob_3",
"low_prob_multi"
]
).to_csv(save_path+"/mosaec_results.csv", index=False)