Source code for CoREMOF.calculation.mof
"""Classes used for open metal site analysis.
"""
import json
import copy
from pymatgen.core.structure import Structure
from CoREMOF.calculation.atomic_parameters import Atom
import numpy as np
import sys
import itertools
import os
import shutil
import hashlib
import datetime
import math
[docs]
class MofStructure(Structure):
"""Extend the pymatgen Structure class to add MOF specific features. Create a MOf structure. The arguments are the same as in the pymatgen Structure class with the addition of the name argument.
The super constructor is called and additional MOF specific properties are initialized.
Args:
Structure: MOF name, used to identify the structure."""
def __init__(self, lattice, species, coords, charge=None,
validate_proximity=False, to_unit_cell=False,
coords_are_cartesian=False, site_properties=None, name="N/A"):
super().__init__(lattice, species, coords,
charge=charge,
validate_proximity=validate_proximity,
to_unit_cell=to_unit_cell,
coords_are_cartesian=coords_are_cartesian,
site_properties=site_properties)
self._all_coord_spheres_indices = None
self._all_distances = None
self._metal_coord_spheres = []
self._name = name
self.metal = None
self.metal_indices = []
self.organic = None
self.species_str = [str(s) for s in self.species]
metal_set = set([s for s in self.species_str if Atom(s).is_metal])
non_metal_set = set([s for s in self.species_str
if not Atom(s).is_metal])
todays_date = datetime.datetime.now().isoformat()
self.summary = {'cif_okay': 'N/A',
'problematic': 'N/A',
'has_oms': 'N/A',
'metal_sites': [],
'oms_density': 'N/A',
'checksum': 'N/A',
'metal_species': list(metal_set),
'non_metal_species': list(non_metal_set),
'name': name,
'uc_volume': self.volume,
'density': self.density,
'date_created': str(todays_date)}
self._tolerance = None
self._split_structure_to_organic_and_metal()
[docs]
@classmethod
def from_file(cls, filename, primitive=False, sort=False, merge_tol=0.0):
"""Create a MofStructure from a CIF file. This makes use of the from_file function of the Structure class and catches the exception in case a CIF file cannot be read. If the CIF is read successfully then the MofStructure is marked as okay, and the file checksum is added to the summary. If the CIF file cannot be read then it is marked as not okay and all the other properties are set to None and because there cannot be an empty Structure a carbon atom is added as placeholder at 0,0,0.
Args:
filename (str): The filename to read from.
primitive (bool): Whether to convert to a primitive cell Only available for cifs. Defaults to False.
sort (bool): Whether to sort sites. Default to False.
merge_tol (float): If this is some positive number, sites that are within merge_tol from each other will be merged. Usually 0.01 should be enough to deal with common numerical issues.
Returns:
- The created MofStructure
"""
mof_name = os.path.splitext(os.path.basename(filename))[0]
try:
s = Structure.from_file(filename, primitive=primitive, sort=sort,
merge_tol=merge_tol)
s_mof = cls(s.lattice, s.species, s.frac_coords, name=mof_name)
s_mof.summary['cif_okay'] = True
s_mof.summary['checksum'] = Helper.get_checksum(filename)
except Exception as e:
print('\nAn Exception occurred: {}'.format(e))
print('Cannot load {}\n'.format(filename))
# Make a placeholder MOF object, set all its summary entries
# to None and set cif_okay to False
s_mof = cls([[10, 0, 0], [0, 10, 0], [0, 0, 10]],
["C"], [[0, 0, 0]], name=mof_name)
s_mof._mark_failed_to_read()
s_mof.summary['cif_okay'] = False
return s_mof
[docs]
def analyze_metals(self, output_folder, verbose='normal'):
"""Run analysis to detect all open metal sites in a MofStructure. In addition the metal sites are marked as unique.
Args:
output_folder (str): Folder where OMS analysis results will be stored.
verbose: Verbosity level for the output of the analysis.
"""
Helper.make_folder(output_folder)
running_indicator = output_folder + "/analysis_running"
open(running_indicator, 'w').close()
self.summary['problematic'] = False
ms_cs_list = {True: [], False: []}
for m, omc in enumerate(self.metal_coord_spheres):
m_index = self.metal_indices[m]
omc.check_if_open()
if not self.summary['problematic']:
self.summary['problematic'] = omc.is_problematic
cs = self._find_coordination_sequence(m_index)
cs = [self.species_str[m_index]] + cs
omc.is_unique = self._check_if_new_site(ms_cs_list[omc.is_open], cs)
if omc.is_unique:
ms_cs_list[omc.is_open].append(cs)
self.summary['metal_sites'].append(omc.metal_summary)
unique_sites = [s['unique'] for s in self.summary['metal_sites']]
open_sites = [s['is_open'] for s in self.summary['metal_sites']]
self.summary['oms_density'] = sum(unique_sites) / self.volume
self.summary['has_oms'] = any(open_sites)
self.write_results(output_folder, verbose)
os.remove(running_indicator)
[docs]
def write_results(self, output_folder, verbose='normal'):
"""Store summary dictionary holding all MOF and OMS information to a JSON file, store CIF files for the metal and non-metal parts of the MOF as well as all the identified coordination spheres.
Args:
output_folder: Location to be used to store.
verbose: Verbosity level.
"""
Helper.make_folder(output_folder)
for index, mcs in enumerate(self.metal_coord_spheres):
mcs.write_cif_file(output_folder, index)
if self.metal:
output_fname = "{}/{}_metal.cif".format(output_folder,
self.summary['name'])
self.metal.to(filename=output_fname)
output_fname = "{}/{}_organic.cif".format(output_folder,
self.summary['name'])
self.organic.to(filename=output_fname)
json_file_out = "{}/{}.json".format(output_folder, self.summary['name'])
summary = copy.deepcopy(self.summary)
if verbose == 'normal':
for ms in summary["metal_sites"]:
ms.pop('all_dihedrals', None)
ms.pop('min_dihedral', None)
with open(json_file_out, 'w') as outfile:
json.dump(summary, outfile, indent=3)
@property
def tolerance(self):
"""Tolerance values for dihedral checks. If not set, defaults are given.
"""
if self._tolerance is None:
self._tolerance = {'on_plane': 15}
return self._tolerance
@property
def name(self):
"""Name of the MofStructure."""
return self._name
@name.setter
def name(self, name):
"""Setter for the name of the MofStructure."""
self._name = name
self.summary['name'] = name
@property
def all_distances(self):
"""Distances between all atoms in the MofStructure"""
if self._all_distances is None:
self._all_distances = self.lattice.get_all_distances(
self.frac_coords, self.frac_coords)
return self._all_distances
@property
def all_coord_spheres_indices(self):
"""Compute the indices of the atoms in the first coordination shell for all atoms in the MofStructure
"""
if self._all_coord_spheres_indices:
return self._all_coord_spheres_indices
self._all_coord_spheres_indices = [self._find_cs_indices(i)
for i in range(len(self))]
return self._all_coord_spheres_indices
@property
def metal_coord_spheres(self):
"""For all metal atoms in a MofStructure compute the first coordination sphere as a MetalSite object.
"""
if not self._metal_coord_spheres:
self._metal_coord_spheres = [self._find_metal_coord_sphere(c)
for c in self.metal_indices]
return self._metal_coord_spheres
def _mark_failed_to_read(self):
"""If a CIF cannot be read set certain properties to None"""
self.summary['metal_species'] = None
self.summary['non_metal_species'] = None
self.summary['uc_volume'] = None
self.summary['density'] = None
def _split_structure_to_organic_and_metal(self):
"""Split a MOF to two pymatgen Structures, one containing only metal atoms and one containing only non-metal atoms."""
self.metal = Structure(self.lattice, [], [])
self.organic = Structure(self.lattice, [], [])
i = 0
for s, fc in zip(self.species, self.frac_coords):
if Atom(str(s)).is_metal:
self.metal.append(s, fc)
self.metal_indices.append(i)
else:
self.organic.append(s, fc)
i += 1
def _find_cs_indices(self, center):
"""Find the indices of the atoms in the coordination sphere.
Args:
center: Central atom of coordination sphere.
Returns:
c_sphere_indices: Return in the coordination sphere of center.
"""
dist = list(self.all_distances[center])
if dist[center] > 0.0000001:
sys.exit('The self distance appears to be non-zero')
a = Atom(self.species_str[center])
c_sphere_indices = [i for i, dis in enumerate(dist)
if i != center
and a.check_bond(self.species_str[i], dis)]
c_sphere_indices.insert(0, center)
return c_sphere_indices
def _find_metal_coord_sphere(self, center):
"""Identify the atoms in the first coordination sphere of a metal atom. Obtain all atoms connecting to the metal using the all_coord_spheres_indices values and keeping only valid bonds as well as center the atoms around the metal center for visualization purposes.
Args:
center:
"""
dist = self.all_distances[center]
if dist[center] > 0.0000001:
sys.exit('The self distance appears to be non-zero')
c_sphere = MetalSite(self.lattice, [self.species[center]],
[self.frac_coords[center]],
tolerance=self.tolerance)
cs_i = self.all_coord_spheres_indices[center]
for i in cs_i[1:]:
c_sphere.append(self.species_str[i], self.frac_coords[i])
c_sphere.keep_valid_bonds()
c_sphere.center_around_metal()
return c_sphere
@staticmethod
def _check_if_new_site(cs_list, cs):
"""Check if a given site is unique based on its coordination sequence"""
for cs_i in cs_list:
if Helper.compare_lists(cs_i, cs):
return False
return True # len(cs_list),
def _find_coordination_sequence(self, center):
"""Compute the coordination sequence up to the 6th coordination shell.
Args:
center: Atom to compute coordination sequence
Returns:
cs: Coordination sequence for center
"""
shell_list = {(center, (0, 0, 0))}
shell_list_prev = set([])
all_shells = set(shell_list)
n_shells = 6
cs = []
count_total = 0
for n in range(0, n_shells):
c_set = set([])
for a_uc in shell_list:
a = a_uc[0]
lattice = a_uc[1]
coord_sphere = self.all_coord_spheres_indices[a]
count_total += 1
coord_sphere_with_uc = []
for c in coord_sphere:
diff = self.frac_coords[a] - self.frac_coords[c]
new_lat_i = [round(d, 0) for d in diff]
uc = tuple(l-nl for l, nl in zip(lattice, new_lat_i))
coord_sphere_with_uc.append((c, uc))
coord_sphere_with_uc = tuple(coord_sphere_with_uc)
c_set = c_set.union(set(coord_sphere_with_uc))
for a in shell_list_prev:
c_set.discard(a)
for a in shell_list:
c_set.discard(a)
cs.append(len(c_set))
all_shells = all_shells.union(c_set)
shell_list_prev = shell_list
shell_list = c_set
return cs
[docs]
class MetalSite(MofStructure):
def __init__(self, lattice, species, coords, validate_proximity=False,
to_unit_cell=False, coords_are_cartesian=False,
site_properties=None, tolerance=None, name='N/A'):
super().__init__(lattice, species, coords,
validate_proximity=validate_proximity,
to_unit_cell=to_unit_cell,
coords_are_cartesian=coords_are_cartesian,
site_properties=site_properties,
name=name)
self._metal_type = "unknown"
self._tolerance = tolerance
self._is_open = None
self._is_unique = None
self._is_problematic = None
self._t_factor = None
self._min_dihedral = None
self._all_dihedrals = {}
@property
def tolerance(self):
"""Tolerance values for dihedral checks. If not set, defaults are given.
"""
if self._tolerance is None:
self._tolerance = {'on_plane': 15}
return self._tolerance
@property
def num_linkers(self):
"""Number of linkers in coordination sphere of MetalSite."""
return self.num_sites - 1
@property
def is_open(self):
"""Whether the MetalSite is open or not."""
return self._is_open
@property
def is_problematic(self):
"""Whether the MetalSite is problematic or not."""
return self._is_problematic
@property
def is_unique(self):
"""Whether the MetalSite is unique or not."""
return self._is_unique
@is_unique.setter
def is_unique(self, value):
if not isinstance(value, bool):
sys.exit('is_unique can only be boolean.')
self._is_unique = value
@property
def metal_type(self):
"""The type of the metal center."""
return self._metal_type
@property
def metal_summary(self):
"""Whether the MetalSite is problematic or not."""
_summary = {"metal": str(self.species[0]),
"type": self.metal_type,
"is_open": self.is_open,
"unique": self.is_unique,
"problematic": self.is_problematic,
"number_of_linkers": self.num_linkers,
"min_dihedral": 0.0,
"all_dihedrals": 0.0,
't_factor': self._t_factor}
return _summary
[docs]
def keep_valid_bonds(self):
"""Loop over atoms in the coordination sphere and remove any extraneous sites.
"""
if len(self) == 0:
return
all_dists = self.lattice.get_all_distances(self.frac_coords,
self.frac_coords)
for i, j in itertools.combinations(range(1, len(self)), 2):
assert all_dists[i][j] == all_dists[j][i]
dis = all_dists[i][j]
if not self._valid_pair(i, j, dis):
dist_ij_c = [all_dists[i][0], all_dists[j][0]]
if len(set(dist_ij_c)) == 1:
index_to_remove = i
else:
index_to_remove = [i, j][dist_ij_c.index(max(dist_ij_c))]
self.remove_sites([index_to_remove])
return self.keep_valid_bonds()
[docs]
def center_around_metal(self):
"""Shift atoms across periodic boundary conditions to have the coordination appear centered around the metal atom for visualisation purposes
"""
gc = self.lattice.get_cartesian_coords
center = self.frac_coords[0]
center_cart_coords = gc(center)
for i in range(1, self.num_sites):
c_i = self.frac_coords[i]
dist_vector = center - c_i
dist_vector_r = []
for j in range(0, 3):
dist_vector_r.append(round(dist_vector[j]))
dist_before = np.linalg.norm(center_cart_coords - gc(c_i))
c_i_centered = c_i + dist_vector_r
dist_after = np.linalg.norm(center_cart_coords - gc(c_i_centered))
if dist_after > dist_before:
for j in range(0, 3):
dist_vector_r[j] = np.rint(dist_vector[j])
c_i_centered = c_i + dist_vector_r
if dist_after > dist_before:
c_i_centered = c_i
self.replace(i, self.species[i], c_i_centered)
[docs]
def check_if_open(self):
"""Get t-factor, check if problematic based on number of linkers and if necessary call to check the dihedrals to determine if the metal site is open.
"""
self.get_t_factor()
if Atom(str(self.species[0])).is_lanthanide_or_actinide:
self._is_problematic = self.num_linkers < 5
else:
self._is_problematic = self.num_linkers < 3
self._is_open = False
self._metal_type = "Closed"
if self.num_linkers <= 3:
self._mark_oms(oms_type='3_or_less')
return
else:
# 0 should always correspond to the
self._check_planes(0)
def _mark_oms(self, oms_type):
self._metal_type = oms_type
self._is_open = True
[docs]
def get_t_factor(self):
"""Compute t-factors, only meaningful for 4-,5-, and 6-coordinated metals, if not the value of -1 is assigned.
"""
nl = self.num_sites - 1
index_range = range(1, self.num_sites)
all_angles = []
for i in itertools.combinations(index_range, 2):
angle = self.get_angle(i[0], 0, i[1])
all_angles.append([angle, i[0], i[1]])
all_angles.sort(key=lambda x: x[0])
if nl == 5 or nl == 4:
# beta is the largest angle and alpha is the second largest angle
# in the coordination sphere; using the same convention
# as Yang et al. DOI: 10.1039/b617136b
beta = all_angles[-1][0]
alpha = all_angles[-2][0]
if nl == 4:
tau = self.get_t4_factor(alpha, beta)
else:
tau = self.get_t5_factor(alpha, beta)
elif nl == 6:
max_indices_all = all_angles[-1][1:3]
l3_l4_angles = [x for x in all_angles if
x[1] not in max_indices_all and
x[2] not in max_indices_all]
max_indices_all_3_4 = max(l3_l4_angles, key=lambda x: x[0])[1:3]
l5_l6_angles = [x for x in l3_l4_angles
if x[1] not in max_indices_all_3_4 and
x[2] not in max_indices_all_3_4]
gamma = max(l5_l6_angles, key=lambda x: x[0])[0]
tau = self.get_t6_factor(gamma)
else:
tau = -1
self._t_factor = tau
[docs]
def write_cif_file(self, output_folder, index):
"""Write MofSite to specified output_folder as a CIF file and use index to name it.
"""
Helper.make_folder(output_folder)
output_fname = output_folder
output_fname += '/first_coordination_sphere'+str(index)+'.cif'
self.to(filename=output_fname)
def _valid_pair(self, i, j, dis):
"""Determine whether two atoms in the coordination sphere form a valid pair. A pair is not valid if it forms a bond unless both atoms are metals of the same kind as the center or both atoms are carbon atoms (e.g. in the case of a ferocene type coordination sphere).
Args:
i:
j:
dis:
"""
s_one = str(self.species[i])
s_two = str(self.species[j])
a_one = Atom(s_one)
a_two = Atom(s_two)
bond = a_one.check_bond(s_two, dis, a_one.bond_tolerance(s_two))
same_atoms = s_one == s_two == str(self.species[0])
two_same_metals = same_atoms and a_one.is_metal and a_two.is_metal
carbon_atoms = s_one == s_two == 'C'
return (not bond) or two_same_metals or carbon_atoms
def _check_planes(self, site):
"""Determine whether a site is open using the dihedral angles between the atoms in the coordination sphere.
Args:
site: Index of site to be checked.
"""
for i, j, k in itertools.combinations(range(self.num_sites), 3):
plane = self._compute_plane(i, j, k)
if all([abs(p-0.0) < 1e-5 for p in plane]):
continue
sides = self._sides([i, j, k], plane)
# Side of the site in question.
s_site = sides[site]
# All sites that are not on the plane and are not the site in
# question.
s_o = [s for i, s in enumerate(sides) if i != site and s != 0]
# Keep only the unique sides
s_o_unique = list(set(s_o))
# Number of unique sides for other sites (sites not on plane and
# not the site in question)
ls = len(s_o_unique)
# ls = 0 : all other sites are on the plane
# ls = 1 : all other sites on one side of plane
# ls = 2 : other sites are on both sides of plane
if ls == 0 or (ls == 1 and s_site != s_o_unique[0]):
# Site is open if:
# a) If all other sites fall on the plane. (ls == 0)
# b) The metal site falls on the plane and all other sites
# fall on one side of the plane. (ls == 1, s_site == 0 and
# s_site != s_o_unique[0])
# c) The metal site falls on one side of the plane and all
# other sites fall on the oposite side of the plane. (ls == 1,
# s_site == 1,-1 and s_site != s_o_unique[0])
place = {0: "over", 1: "on"}[abs(s_site)]
msg = "{}_{}L_{}_open_plane".format(self[site].specie,
self.num_linkers, place)
self._mark_oms(msg)
break
assert self.is_open is False
def _sides(self, p_i, plane):
"""Given a plane p defined by 3 of the atoms in the MetalSite determine on which side of the plane all the atoms in the MetalSite fall (-1 or 1) or if it falls on the plane (0).
Args:
p_i: Indices of the 3 atoms that define the plane
plane: Plane constants
Returns:
List of side value for all atoms in the MetalSite, possible values can -1, 0, and 1.
"""
atoms_on_plane = [True if i in p_i
else self._is_point_on_plane(self[i].coords, p_i,
plane)
for i in range(len(self))]
dists = [self._get_distance_from_plane(s.coords, plane) for s in self]
sides = [0 if a or d == 0.0
else int(d/abs(d))
for d, a in zip(dists, atoms_on_plane)]
return sides
def _is_point_on_plane(self, point, p_i, p):
"""Given a point and plane determine if the point falls on the plane, using the angle between the projection of the point, each atom on the plane and the actual position of the point with a specified tolerance value.
Args:
point: Cartesian coordinates of point to check.
p: plane in the form of a list with the 4 constants defining a plane.
Returns:
True if the point falls on plane and False otherwise.
"""
tol = self.tolerance['on_plane']
point_on_plane = self._project_point_onto_plane(point, p)
angles = [self._get_angle_c(point_on_plane, self[ii].coords, point)
for ii in p_i]
return all([a < tol for a in angles])
def _get_angle_c(self, c1, c2, c3):
"""
Calculates the angle between three points in degrees.
Args:
c1: Coordinates of first point.
c2: Coordinates of second point.
c3: Coordinates of third point.
Returns:
Angle between them in degrees.
"""
v1 = c1 - c2
v2 = c3 - c2
return self._get_angle_v(v1, v2)
@staticmethod
def _get_angle_v(v1, v2):
"""
Calculates the angle between two vectors in degrees.
Args:
v1: First vector.
v2: Second vector.
Returns:
Angle between them in degrees.
"""
if np.dot(v1, v2) == 0.0:
return 0.0
d = np.dot(v1, v2) / np.linalg.norm(v1) / np.linalg.norm(v2)
d = min(d, 1.0)
d = max(d, -1.0)
angle = math.acos(d)
return math.degrees(angle)
@staticmethod
def _get_distance_from_plane(point, plane):
"""Given a point and a plane compute the distance between the point and the projection of the point on the plane."""
plane_xyz = plane[0:3]
distance = np.inner(plane_xyz, point) - plane[3]
return distance / np.linalg.norm(plane_xyz)
def _compute_plane(self, i, j, k):
"""Given three atom indices, compute the plane that passes through them.
"""
c1 = self[i].coords
c2 = self[j].coords
c3 = self[k].coords
return self._compute_plane_c(c1, c2, c3)
@staticmethod
def _compute_plane_c(c1, c2, c3):
"""Given three atom coordinates, compute the plane that passes through them.
"""
ij = c1 - c2
kj = c3 - c2
p_vector = np.cross(ij, kj)
c = np.dot(c1, p_vector)
plane = list(p_vector) + [c]
return plane
@staticmethod
def _project_point_onto_plane(point, plane):
"""Given a point and plane compute the projection of the point onto the plane.
"""
vector = plane[0:3]
constant = plane[3]
nom = np.inner(vector, point) - constant
denom = np.inner(vector, vector)
const = nom / denom
return np.array([po - v * const for po, v in zip(point, vector)])
[docs]
class Helper:
[docs]
@classmethod
def make_folder(cls, output_folder):
if not os.path.exists(output_folder):
os.makedirs(output_folder)
[docs]
@classmethod
def compare_lists(cls, l1, l2):
if len(l1) != len(l2):
return False
return all([i == j for i, j in zip(l1, l2)])
[docs]
@classmethod
def copy_folder(cls, dest, src):
if not os.path.exists(dest):
os.makedirs(dest)
s = src.split('/')[-1]
d = os.path.join(dest, s)
if not os.path.exists(d):
shutil.copytree(src, d)
[docs]
@classmethod
def get_checksum(cls, filename):
with open(filename, 'rb') as f:
file = f.read()
return hashlib.sha256(file).hexdigest()