"""Process your CIF to get mofid v1 and v2.
"""
from ase.io import read as ase_read
from ase.io import write as ase_write
from ase import neighborlist
import networkx as nx
from ase.build import sort
import ase
import os, glob, shutil
import numpy as np
import collections
from pymatgen.core.structure import Structure
from pymatgen.core.lattice import Lattice
from mofid.id_constructor import extract_fragments
from collections import Counter
from mofid.run_mofid import cif2mofid
from pymatgen.analysis.structure_matcher import ElementComparator, StructureMatcher
import selfies as sf
from openbabel import openbabel as ob
metals = ['Li', 'Na', 'K', 'Rb', 'Cs', 'Fr', 'Be', 'Mg', 'Ca', 'Sr', 'Ba', 'Ra',
'Al', 'Ga', 'Ge', 'In', 'Sn', 'Sb', 'Tl', 'Pb', 'Bi', 'Po',
'Sc', 'Ti', 'V' , 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn',
'Y' , 'Zr', 'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd',
'Hf', 'Ta', 'W' , 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg',
'La', 'Ce', 'Pr', 'Nd', 'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'U', 'Tm', 'Yb', 'Lu',
'Ac', 'Th', 'Pa', 'U', 'Np', 'Pu', 'Am', 'Cm', 'Bk', 'Cf', 'Es', 'Fm', 'Md', 'No', 'Lr'
]
[docs]
def run_v1(structure):
"""Converting CIF to mofid-v1, see https://snurr-group.github.io/mofid/ for additional installation. Tip: Please check CMAKE, JAVA, etc. before installing.
Args:
structure (str): path to your CIF.
Returns:
String:
- mofid-v1.
"""
mofid_v1 = cif2mofid(structure)
return mofid_v1 # mofid_v1['mofid'], mofid_v1['smiles_nodes'], mofid_v1['smiles_linkers'], mofid_v1['topology'], mofid_v1['cat']
[docs]
def dict2str(dct):
"""Convert symbol-to-number dict to str.
"""
return ''.join(symb + (str(n)) for symb, n in dct.items())
[docs]
def split_nodes_from_cif(structure, prefix='Output'):
"""Split nodes (CIF) to single XYZ.
Args:
structure (str): path to your CIF.
prefix (str): the path to save processed XYZ.
Returns:
int:
- 0: sucess; 1: fail.
"""
try:
atoms = ase_read(structure)
except:
print('Error with reading CIF in {}'.format(structure))
return 1
cutOff = neighborlist.natural_cutoffs(atoms)
neighborList = neighborlist.NeighborList(cutOff, self_interaction=False, bothways=True, skin=0.3)
neighborList.update(atoms)
G = nx.Graph()
for k in range(len(atoms)):
tup = (k, {"element":"{}".format(atoms.get_chemical_symbols()[k]), "pos": atoms.get_positions()[k]})
G.add_nodes_from([tup])
for k in range(len(atoms)):
for i in neighborList.get_neighbors(k)[0]:
G.add_edge(k, i)
Gcc = sorted(nx.connected_components(G), key=len, reverse=True)
form_dicts = []
for index, g in enumerate(Gcc):
g = list(g)
fragment = atoms[g]
fragment = sort(fragment)
form_dict = fragment.symbols.formula.count()
form_dicts.append(dict2str(form_dict))
#nodes = [atoms[list(Gcc[0])]]
#unique_formdicts = [form_dicts[0]]
nodes = []
unique_formdicts = []
if len(form_dicts) > 1:
for index, form_dict in enumerate(form_dicts):
if form_dict not in unique_formdicts:
nodes.append(atoms[list(Gcc[index])])
unique_formdicts.append(form_dict)
elif len(form_dicts) == 1:
nodes.append(atoms[list(Gcc[0])])
unique_formdicts.append(form_dicts[0])
for index, _ in enumerate(nodes):
ase_write('{}/node{}.xyz'.format(prefix, index), nodes[index]) # /AllNode/nodes.cif
return 0
[docs]
def split_linkers_from_cif(structure, prefix='Output'):
"""Split linkers (CIF) to single XYZ.
Args:
structure (str): path to your CIF.
prefix (str): the path to save processed XYZ.
Returns:
int:
- 0: sucess; 1: fail.
"""
try:
atoms = ase_read(structure)
except:
print('Error with reading CIF in {}'.format(structure))
return 1
cutOff = neighborlist.natural_cutoffs(atoms)
neighborList = neighborlist.NeighborList(cutOff, self_interaction=False, bothways=True, skin=0.3)
neighborList.update(atoms)
G = nx.Graph()
for k in range(len(atoms)):
tup = (k, {"element":"{}".format(atoms.get_chemical_symbols()[k]), "pos": atoms.get_positions()[k]})
G.add_nodes_from([tup])
for k in range(len(atoms)):
for i in neighborList.get_neighbors(k)[0]:
G.add_edge(k, i)
Gcc = sorted(nx.connected_components(G), key=len, reverse=True)
form_dicts = []
for index, g in enumerate(Gcc):
g = list(g)
fragment = atoms[g]
fragment = sort(fragment)
form_dict = fragment.symbols.formula.count()
form_dicts.append(dict2str(form_dict))
nodes = []
unique_formdicts = []
if len(form_dicts) > 1:
for index, form_dict in enumerate(form_dicts):
if form_dict not in unique_formdicts:
nodes.append(atoms[list(Gcc[index])])
unique_formdicts.append(form_dict)
elif len(form_dicts) == 1:
nodes.append(atoms[list(Gcc[0])])
unique_formdicts.append(form_dicts[0])
for index, _ in enumerate(nodes):
ase_write('{}_MetalOxolinker{}.xyz'.format(prefix, index), nodes[index]) # /MetalOxo/linkers.cif
return 0
[docs]
def get_node_linker_files(structure, prefix='Output'):
"""Split MOF to sbu + linker.
Args:
structure (str): path to your CIF.
prefix (str): the path to save processed XYZ.
Returns:
files:
- structure, node, linker, ...
"""
os.makedirs(prefix, exist_ok=True)
extract_fragments(structure, prefix)
[docs]
def xyz2fomula(xyzpath):
"""from XYZ to chemical fomula based on A-Z.
Args:
xyzpath (str): path to your XYZ.
Returns:
str:
- fomula.
"""
atoms = ase_read(xyzpath)
symbols = atoms.get_chemical_symbols()
counts = Counter(symbols)
sorted_items = sorted(counts.items(), key=lambda x: x[0])
# formula = ''.join(f"{el}{n if n > 1 else ''}" for el, n in sorted_items)
formula = ''.join(f"{el}{n}" for el, n in sorted_items)
return formula
[docs]
def convert_ase_pymat(ase_objects):
"""convert to ase to pymatgen atoms.
Args:
ase_objects (ase.Atoms): ase-type atoms.
Returns:
ase.Atoms:
- pymatgen-type atoms.
"""
structure_lattice = Lattice(ase_objects.cell)
structure_species = ase_objects.get_chemical_symbols()
structure_positions = ase_objects.get_positions()
return Structure(structure_lattice,structure_species,structure_positions)
[docs]
def remove_pbc_cuts(atoms):
"""Remove building block cuts due to periodic boundary conditions. After the
removal, the atoms object is centered at the center of the unit cell.
Args:
atoms (ase.Atoms): aase.Atoms.
Returns:
ase.Atoms:
- The processed atoms object.
"""
try:
# setting cuttoff parameter
scale = 1.4
cutoffs = ase.neighborlist.natural_cutoffs(atoms)
cutoffs = [scale * c for c in cutoffs]
#making neighbor_list
#graphs of single metal is not constructed because there is no neighbor.
I, J, D = ase.neighborlist.neighbor_list("ijD",atoms,cutoff=cutoffs)
nl = [[] for _ in atoms]
for i, j, d in zip(I, J, D):
nl[i].append((j, d))
visited = [False for _ in atoms]
q = collections.deque()
# Center of the unit cell.
abc_half = np.sum(atoms.get_cell(), axis=0) * 0.5
positions = {}
q.append((0, np.array([0.0, 0.0, 0.0])))
while q:
i, pos = q.pop()
visited[i] = True
positions[i] = pos
for j, d in nl[i]:
if not visited[j]:
q.append((j, pos + d))
visited[j] = True
centroid = np.array([0.0, 0.0, 0.0])
for v in positions.values():
centroid += v
centroid /= len(positions)
syms = [None for _ in atoms]
poss = [None for _ in atoms]
for i in range(len(atoms)):
syms[i] = atoms.symbols[i]
poss[i] = positions[i] - centroid + abc_half
atoms = ase.Atoms(
symbols=syms, positions=poss, pbc=True, cell=atoms.get_cell()
)
# resize of cell
cell_x = np.max(atoms.positions[:,0]) - np.min(atoms.positions[:,0])
cell_y = np.max(atoms.positions[:,1]) - np.min(atoms.positions[:,1])
cell_z = np.max(atoms.positions[:,2]) - np.min(atoms.positions[:,2])
cell = max([cell_x,cell_y,cell_z])
atoms.set_cell([cell+2,cell+2,cell+2, 90,90,90])
center_mass = atoms.get_center_of_mass()
cell_half = atoms.cell.cellpar()[0:3]/2
atoms.positions = atoms.positions - center_mass + cell_half
return atoms
except:
return atoms
[docs]
def run_v2(structure, nodes_dataset, refname):
"""run mofidv2 from CIF.
Args:
structure (str): path to your MOF.
nodes_dataset (str): path to your node dataset (download from https://github.com/sxm13/CoREMOF_tools/tree/main/CoREMOF/data/mofid/nodes.zip).
refname (str): file name of your structure or define by yourself.
Returns:
str:
- mofid-v2.
"""
# get list of nodes
try:
shutil.rmtree("Output")
except:
pass
# get list of nodes
nodes_type = glob.glob(nodes_dataset+"/*xyz")
nodes = []
for node_file in nodes_type:
nodes.append(node_file.split("/")[-1].split("_")[0])
nodes = list(set(nodes))
# get information of mofid-v1
mofidv1 = run_v1(structure)
linkers = mofidv1["smiles_linkers"]
all_linkers = []
for linker in linkers:
all_linkers.append(sf.encoder(linker))
topology = mofidv1["topology"]
cat = mofidv1["cat"]
# get_node_linker_files(cifpath)
check = split_nodes_from_cif("Output/AllNode/nodes.cif", "Output")
if check == 1:
print("nan")
return "nan"
else:
all_nodes_xyz = glob.glob("./Output/node*xyz")
all_nodes_part = []
for node_xyz in all_nodes_xyz:
node_formula = xyz2fomula(node_xyz)
if node_formula in nodes:
matcher = StructureMatcher(ltol = 0.3,
stol = 2,
angle_tol = 5,
primitive_cell = False,
scale = False,
comparator=ElementComparator())
matched = False
known_nodes = glob.glob(nodes_dataset+ "/" + node_formula + "*xyz")
mof = remove_pbc_cuts(ase_read(node_xyz))
a = convert_ase_pymat(mof)
for i in range(len(known_nodes)):
b = convert_ase_pymat(remove_pbc_cuts(ase_read(known_nodes[i])))
if matcher.fit(a, b):
node_part = os.path.basename(known_nodes[i].replace(".xyz", ""))
all_nodes_part.append(node_part)
matched = True
print(node_formula, "the node can be found in nodes dataset")
break
if not matched:
# raise RuntimeError(f"fail matched from nodes dataset, stop")
node_part = node_formula + "_Type-" + str(len(known_nodes) + 1)
all_nodes_part.append(node_part)
shutil.move(node_xyz, nodes_dataset + "/" + node_part + ".xyz")
print("new node found, has moved the nodes dataset")
else:
node_part = node_formula + "_Type-1"
all_nodes_part.append(node_part)
shutil.move(node_xyz, nodes_dataset + "/" + node_part + ".xyz")
print("new node found, has moved the nodes dataset")
linkers_part = ".".join(all_linkers)
nodes_part = ".".join(f"[{node}]" for node in all_nodes_part)
mofidv2 = nodes_part + "." + linkers_part + " " + "MOFid-v2." + topology + ".cat" + cat + ";" + refname
try:
shutil.rmtree("Output")
except:
pass
return mofidv2
[docs]
def are_identical_smiles(smiles1, smiles2):
obConversion = ob.OBConversion()
obConversion.SetInFormat("smi")
obConversion.SetOutFormat("can") # Set output to canonical SMILES
mol1 = ob.OBMol()
mol2 = ob.OBMol()
# Read the SMILES strings into molecules
obConversion.ReadString(mol1, smiles1)
obConversion.ReadString(mol2, smiles2)
# Convert molecules to canonical SMILES
can_smiles1 = obConversion.WriteString(mol1).strip()
can_smiles2 = obConversion.WriteString(mol2).strip()
# Compare canonical SMILES
return can_smiles1 == can_smiles2