Skip to content

Commit

Permalink
Merge pull request #125 from robpollice/bugfix-120-radical-kekulization
Browse files Browse the repository at this point in the history
Handling of radicals in kekulization (fix #120)
  • Loading branch information
MarioKrenn6240 authored Jan 6, 2025
2 parents 55f4e34 + 5a3b0e2 commit 381bd12
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 8 deletions.
7 changes: 7 additions & 0 deletions selfies/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,13 @@
"O": (2, 4), "S": (2, 4), "Se": (2, 4), "Te": (2, 4)
}

VALENCE_ELECTRONS = {
"B": 3, "Al": 3,
"C": 4, "Si": 4,
"N": 5, "P": 5, "As": 5,
"O": 6, "S": 6, "Se": 6, "Te": 6
}

AROMATIC_SUBSET = set(e.lower() for e in AROMATIC_VALENCES)

# =============================================================================
Expand Down
31 changes: 23 additions & 8 deletions selfies/mol_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from dataclasses import dataclass, field

from selfies.bond_constraints import get_bonding_capacity
from selfies.constants import AROMATIC_VALENCES
from selfies.constants import AROMATIC_VALENCES, VALENCE_ELECTRONS
from selfies.utils.matching_utils import find_perfect_matching


Expand Down Expand Up @@ -254,7 +254,7 @@ def kekulize(self) -> bool:

ds = self._delocal_subgraph
kept_nodes = set(itertools.filterfalse(self._prune_from_ds, ds))

# relabel kept DS nodes to be 0, 1, 2, ...
label_to_node = list(sorted(kept_nodes))
node_to_label = {v: i for i, v in enumerate(label_to_node)}
Expand All @@ -265,7 +265,7 @@ def kekulize(self) -> bool:
label = node_to_label[node]
for adj in filter(lambda v: v in kept_nodes, ds[node]):
pruned_ds[label].append(node_to_label[adj])

matching = find_perfect_matching(pruned_ds)
if matching is None:
return False
Expand All @@ -288,18 +288,33 @@ def _prune_from_ds(self, node):
adj_nodes = self._delocal_subgraph[node]
if not adj_nodes:
return True # aromatic atom with no aromatic bonds

atom = self._atoms[node]
valences = AROMATIC_VALENCES[atom.element]

# each bond in DS has order 1.5 - we treat them as single bonds
used_electrons = int(self._bond_counts[node] - 0.5 * len(adj_nodes))

if atom.h_count is None: # account for implicit Hs
assert atom.charge == 0
return any(used_electrons == v for v in valences)
else:
valence = valences[-1] - atom.charge
used_electrons += atom.h_count
free_electrons = valence - used_electrons
return not ((free_electrons >= 0) and (free_electrons % 2 != 0))

# count the total number of bound electrons of each atom
bound_electrons = (max(0, atom.charge) + atom.h_count
+ int(self._bond_counts[node])
+ int(2 * (self._bond_counts[node] % 1)))

# calculate the number of unpaired electrons of each atom
radical_electrons = (max(0, VALENCE_ELECTRONS[atom.element]
- bound_electrons) % 2)

# unpaired electrons do not contribute to the aromatic system
free_electrons = valence - used_electrons - radical_electrons

if any(used_electrons == v - atom.charge for v in valences):
return True
else:
return not ((free_electrons >= 0) and (free_electrons % 2 != 0))
22 changes: 22 additions & 0 deletions tests/test_specific_cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@ def decode_eq(selfies, smiles):
return s == smiles


def roundtrip_eq(smiles_in, smiles_out):
sel = sf.encoder(smiles_in)
smi = sf.decoder(sel)
return smi == smiles_out


def test_branch_and_ring_at_state_X0():
"""Tests SELFIES with branches and rings at state X0 (i.e. at the
very beginning of a SELFIES). These symbols should be skipped.
Expand Down Expand Up @@ -330,6 +336,7 @@ def test_old_symbols():
except Exception:
assert False


def test_large_selfies_decoding():
"""Test that we can decode extremely large SELFIES strings (used to cause a RecursionError)
"""
Expand All @@ -339,8 +346,23 @@ def test_large_selfies_decoding():

assert decode_eq(large_selfies, expected_smiles)


def test_radical_kekulization():
"""Tests kekulization of aromatic systems with radicals and charges.
"""

assert roundtrip_eq("c1ccc[c]c1", "C1=CC=C[CH0]=C1")
assert roundtrip_eq("c1[c]n1(C)", "C1=[CH0]N1C")
assert roundtrip_eq("c1[C][n+]1(C)", "C=1[CH0][N+1]=1C")
assert roundtrip_eq("c1nnn[n-]1", "C1=NN=N[N-1]1")
assert roundtrip_eq("c1ccn[c-](C)[n+]1=O", "C1=CC=N[C-1](C)[N+1]1=O")
assert roundtrip_eq("c1ccs[n+]1c2ccccc2", "C=1C=CS[N+1]=1C2=CC=CC=C2")
assert roundtrip_eq("c1ccs[nH+]1", "C=1C=CS[NH1+1]=1")


def test_novel_charged_symbols():
"""Test decoding of updated constraints for charged atoms (update in 2.2.0)."""
assert decode_eq("[N][#C+1][#NH1][#C@H1]", "N#[C+1]")
assert decode_eq("[O+1][=P+1][#P-1][#C@@]", "[O+1]=[P+1]=[P-1]#[C@@]")
assert decode_eq("[=C-1][#S+1][#B]", "[C-1]#[S+1]=B")

0 comments on commit 381bd12

Please sign in to comment.