Tutorial 6 - Soprano for defect calculations

Tutorial 6 - Soprano for defect calculations#

      _
    /|_|\   
   / / \ \  
  /_/   \_\  
  \ \   / /  
   \ \_/ /  
    \|_|/  

SOPRANO: a Python library for generation, manipulation and analysis of large batches of crystalline structures

Developed within the CCP-NC project. Copyright STFC 2022

# Basic imports
import os, sys
sys.path.insert(0, os.path.abspath('..')) # This to add the Soprano path to the PYTHONPATH
                                          # so we can load it without installing it
# Other useful imports

import numpy as np

import ase
from ase import Atoms
from ase.build import bulk, molecule

%matplotlib inline
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

1 - Creating defect structures#

Soprano has a number of functionalities dedicated to defect calculations. Some of these have been used in work on finding the site in which a muonium pseudo-atom would sit after relaxing inside a crystal ( L. Liborio, S. Sturniolo et al, Computational prediction of muon stopping sites using ab initio random structure searching (AIRSS) ).

First, Soprano provides a number of generator functions to create defect structures, either randomly or in a specific way.

defectGen allows creating structures with a single added interstitial defect, distributed in a way to fill the empty space in between atoms while never creating any two configurations where the defects are closer than a given radius, using a so-called Poisson sphere distribution. These are good starting points for a random structure search approach.

from soprano.collection import AtomsCollection
from soprano.collection.generate import defectGen, substitutionGen, additionGen

si2 = bulk('Si') # Bulk silicon
dG = defectGen(si2, 'H', poisson_r=0.6, vdw_scale=0.5) # poisson_r controls the minimum distance between two defects, 
                                                       # vdw_scale the one between defects and existing atoms
                                                       # (proportional to both's Van der Waals radius)
dColl = AtomsCollection(dG)

# The smaller poisson_r, the more structures are generated. The process is random and the final number isn't fixed.
print('{0} structures generated'.format(len(dColl)))
95 structures generated
# By default, the defect is placed in position 0 in each new structure
def_pos = dColl.all.get_positions()[:,0]

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

ax.scatter3D(*def_pos.T, color=(0, 0.5, 0.9), s=30) # Hydrogen defects are blue
ax.scatter3D(*si2.get_positions().T, color=(0.9,0.5,0), s=200) # Silicon atoms are orange
plt.show()
../_images/ece084ffa6e9c5c758f5195413971c8d7548739311a6c1cac41b935945246c10.png

substitutionGen allows to simply replace one or more existing atoms with new ones. The replacements can be limited to only a specific selection, and can be accepted or rejected with a test function.

from soprano.properties.linkage import Bonds
from soprano.selection import AtomSelection

# As an example, we generate a molecule of ethyl mercaptan
ethmerc = molecule('CH3CH2SH')
syms = ethmerc.get_chemical_symbols()
bonds, bondmat = Bonds(return_matrix=True)(ethmerc)

# Where are the hydrogens? And the sulfur?
hsel = AtomSelection.from_element(ethmerc, 'H')
ssel = AtomSelection.from_element(ethmerc, 'S')

def isnot_S_bonded(s, subs):
    # Return True only if none of the atoms in subs is bonded to sulfur
    return np.all(bondmat[ssel.indices, list(subs)] == 0)

# Substitute two hydrogens at a time with chlorine, but only if neither is bonded to sulfur
sG = substitutionGen(ethmerc, 'Cl', to_replace=hsel, n=2, accept=isnot_S_bonded)

sColl = AtomsCollection(sG)

print('{0} structures generated'.format(len(sColl)))
10 structures generated
  WARNING:  divide by zero encountered in divide (/home/runner/work/soprano/soprano/soprano/utils.py, line: 354)
  WARNING:  invalid value encountered in dot (/home/runner/work/soprano/soprano/soprano/utils.py, line: 354)
  WARNING:  invalid value encountered in cast (/home/runner/work/soprano/soprano/soprano/utils.py, line: 361)

additionGen operates similarly, adding one or more atoms of the given species to a certain selection, and picking the direction to add them in a way that it will be as far as possible from existing bonds, again, under verification of an acceptance function if required.

# We try additioning hydrogen to ethylene
eth = molecule('C2H4')

csel = AtomSelection.from_element(eth, 'C')
aG = additionGen(eth, 'H', to_addition=csel, add_r=1.0)

aColl = AtomsCollection(aG)

# Should generate four structures: 2 carbon atoms x 2 possible directions (above and below the plane of the molecule)
print('{0} structures generated'.format(len(aColl)))
4 structures generated
# Plot structure with index s_i
s_i = 0
s = aColl.structures[s_i]

pos = s.get_positions()
csel = AtomSelection.from_element(s, 'C')
hsel = AtomSelection.from_element(s, 'H')


fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

ax.set_xlim(-3,3)
ax.set_ylim(-3,3)
ax.set_zlim(-3,3)

ax.scatter3D(*pos[csel.indices].T, c=(0,0,0), s=200)
ax.scatter3D(*pos[hsel.indices].T, c=(0,0.5,0.9), s=30)
  WARNING:  *c* argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with *x* & *y*.  Please use the *color* keyword-argument or provide a 2D array with a single row if you intend to specify the same RGB or RGBA value for all points. (/tmp/ipykernel_2477/974287187.py, line: 17)
  WARNING:  *c* argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with *x* & *y*.  Please use the *color* keyword-argument or provide a 2D array with a single row if you intend to specify the same RGB or RGBA value for all points. (/tmp/ipykernel_2477/974287187.py, line: 18)
<mpl_toolkits.mplot3d.art3d.Path3DCollection at 0x7f211c376d50>
../_images/72ce0f743a6bba17864f9f29929cf551283ccd55f5f1c523d7d5cc6f0923b2d7.png

2 - Analysing defect structures#

A different problem when dealing with the results of random structure searching is how to group together a number of predicted defect positions that might be crystallographically equivalent. In Soprano, this is helped by a clustering gene called defect_asymmetric_fdist, for “asymmetric fractional distance”. This is a pair gene (it only computes a distance between pairs of structures and only allows for hierarchical clustering), and it computes how close in fractional coordinates two defects can get if all possible symmetry operations that apply to a given pure structure are tried to bring them together. The distance itself has no specific physical meaning as it’s in fractional space, but it works well to group the defects together if there’s a meaningful structure to begin with. The gene requires having the space group library spglib installed.

from scipy.cluster.hierarchy import dendrogram
from soprano.analyse.phylogen import PhylogenCluster, Gene

# Let's create a number of different possible sites in silicon, split in two groups: tetrahedral and bond-centred site,
# but within a number of equivalent coordinates, and with some noise added in.

def_fpos = np.array([
    [0.125]*3,
    [0.125*5, 0.125, 0.125],
    [0.125]*3,
    [0.125*5, 0.125, 0.125],
    [0.5]*3,
    [0.75]*3,
    [0.5]*3,
    [0.75]*3
])

def_fpos += (np.random.random(def_fpos.shape)-0.5)*0.05

defColl = AtomsCollection([Atoms('H', scaled_positions=[fp], cell=si2.get_cell())+si2 for fp in def_fpos])

# Specify that the defect is at index 0, and that the pure structure is si2
dafGene = Gene('defect_asymmetric_fdist', params={'struct': si2, 'index': 0})

defClust = PhylogenCluster(defColl, genes=[dafGene])
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
File ~/work/soprano/soprano/soprano/properties/symmetry/backend.py:140, in resolve_backend(backend)
    139 try:
--> 140     import spglib  # noqa: F401
    142     return "spglib"

ModuleNotFoundError: No module named 'spglib'

During handling of the above exception, another exception occurred:

ImportError                               Traceback (most recent call last)
Cell In[8], line 25
     22 # Specify that the defect is at index 0, and that the pure structure is si2
     23 dafGene = Gene('defect_asymmetric_fdist', params={'struct': si2, 'index': 0})
---> 25 defClust = PhylogenCluster(defColl, genes=[dafGene])

File ~/work/soprano/soprano/soprano/analyse/phylogen/phylogenclust.py:105, in PhylogenCluster.__init__(self, coll, genes, norm_range, norm_dist)
    103 # Now to actually add the genes
    104 if genes is not None:
--> 105     self.set_genes(genes=genes)

File ~/work/soprano/soprano/soprano/analyse/phylogen/phylogenclust.py:152, in PhylogenCluster.set_genes(self, genes, load_arrays)
    150     # And confirm that there are no NaNs inside
    151     if gene_val is None or np.any(np.isnan(gene_val)):
--> 152         gene_val = g.evaluate(self._collection)
    154     self._gene_storage[g.name] = {"def": g, "val": gene_val}
    156 self._needs_recalc = True

File ~/work/soprano/soprano/soprano/analyse/phylogen/genes.py:138, in Gene.evaluate(self, c)
    136 def evaluate(self, c):
    137     """Evaluate the gene on a given AtomsCollection"""
--> 138     val = self._parser(c, **self.params)
    140     # Check for various possible modes of failure
    141     if val is None or None in val:

File ~/work/soprano/soprano/soprano/analyse/phylogen/genes.py:375, in parsegene_defect_asymmetric_fdist(c, index, struct)
    372     raise ValueError("defect_asymmetric_fdist gene requires a struct " "argument")
    374 fp = c.all.get_scaled_positions()[:, index, :]
--> 375 return compute_asymmetric_distmat(struct, fp, linearized=False)

File ~/work/soprano/soprano/soprano/utils.py:1045, in compute_asymmetric_distmat(struct, points, linearized, return_images, images_centre, symprec, backend)
   1043 # Get symmetry operations
   1044 from soprano.properties.symmetry.backend import get_symmetry_dataset
-> 1045 symm = get_symmetry_dataset(struct, symprec=symprec, backend=backend)
   1046 rots = symm.rotations
   1047 transls = symm.translations

File ~/work/soprano/soprano/soprano/properties/symmetry/backend.py:197, in get_symmetry_dataset(atoms, symprec, backend)
    175 def get_symmetry_dataset(
    176     atoms,
    177     *,
    178     symprec: float = 1e-5,
    179     backend: BackendLiteral = "auto",
    180 ) -> SpacegroupDataset:
    181     """Return a :class:`SpacegroupDataset` for *atoms*.
    182 
    183     Parameters
   (...)    195     SpacegroupDataset
    196     """
--> 197     resolved = resolve_backend(backend)
    198     if resolved == "moyo":
    199         return _dataset_moyo(atoms, symprec)

File ~/work/soprano/soprano/soprano/properties/symmetry/backend.py:144, in resolve_backend(backend)
    142         return "spglib"
    143     except ImportError:
--> 144         raise ImportError(
    145             "No symmetry backend is available.  Install moyopy with\n"
    146             "    pip install moyopy\n"
    147             "or spglib with\n"
    148             "    pip install 'spglib>=2.4'"
    149         )
    150 if backend == "moyo":
    151     try:

ImportError: No symmetry backend is available.  Install moyopy with
    pip install moyopy
or spglib with
    pip install 'spglib>=2.4'
Z = defClust.get_linkage()

fig = plt.figure()
ax = fig.add_subplot(111)

# The structures should be neatly split in two clusters of four
dd = dendrogram(Z, ax=ax)
../_images/5bb6b6e0f295c9888ebae696a909f2b770aac82c390b281a7498f1d6f058c5da.png