# -*- coding: utf-8 -*-
"""
molvs.charge
~~~~~~~~~~~~
This module implements tools for manipulating charges on molecules. In particular, :class:`~molvs.charge.Reionizer`,
which competitively reionizes acids such that the strongest acids ionize first, and :class:`~molvs.charge.Uncharger`,
which attempts to neutralize ionized acids and bases on a molecule.
"""
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import division
import copy
import logging
from rdkit import Chem
from .utils import memoized_property
log = logging.getLogger(__name__)
[docs]class AcidBasePair(object):
"""An acid and its conjugate base, defined by SMARTS.
A strength-ordered list of AcidBasePairs can be used to ensure the strongest acids in a molecule ionize first.
"""
def __init__(self, name, acid, base):
"""Initialize an AcidBasePair with the following parameters:
:param string name: A name for this AcidBasePair.
:param string acid: SMARTS pattern for the protonated acid.
:param string base: SMARTS pattern for the conjugate ionized base.
"""
log.debug('Initializing AcidBasePair: %s', name)
self.name = name
self.acid_str = acid
self.base_str = base
@memoized_property
def acid(self):
log.debug('Loading AcidBasePair acid: %s', self.name)
return Chem.MolFromSmarts(self.acid_str)
@memoized_property
def base(self):
log.debug('Loading AcidBasePair base: %s', self.name)
return Chem.MolFromSmarts(self.base_str)
def __repr__(self):
return 'AcidBasePair({!r}, {!r}, {!r})'.format(self.name, self.acid_str, self.base_str)
def __str__(self):
return self.name
#: The default list of AcidBasePairs, sorted from strongest to weakest. This list is derived from the Food and Drug
#: Administration Substance Registration System Standard Operating Procedure guide.
ACID_BASE_PAIRS = (
AcidBasePair('-OSO3H', 'OS(=O)(=O)[OH]', 'OS(=O)(=O)[O-]'),
AcidBasePair('–SO3H', '[!O]S(=O)(=O)[OH]', '[!O]S(=O)(=O)[O-]'),
AcidBasePair('-OSO2H', 'O[SD3](=O)[OH]', 'O[SD3](=O)[O-]'),
AcidBasePair('-SO2H', '[!O][SD3](=O)[OH]', '[!O][SD3](=O)[O-]'),
AcidBasePair('-OPO3H2', 'OP(=O)([OH])[OH]', 'OP(=O)([OH])[O-]'),
AcidBasePair('-PO3H2', '[!O]P(=O)([OH])[OH]', '[!O]P(=O)([OH])[O-]'),
AcidBasePair('-CO2H', 'C(=O)[OH]', 'C(=O)[O-]'),
AcidBasePair('thiophenol', 'c[SH]', 'c[S-]'),
AcidBasePair('(-OPO3H)-', 'OP(=O)([O-])[OH]', 'OP(=O)([O-])[O-]'),
AcidBasePair('(-PO3H)-', '[!O]P(=O)([O-])[OH]', '[!O]P(=O)([O-])[O-]'),
AcidBasePair('phthalimide', 'O=C2c1ccccc1C(=O)[NH]2', 'O=C2c1ccccc1C(=O)[N-]2'),
AcidBasePair('CO3H (peracetyl)', 'C(=O)O[OH]', 'C(=O)O[O-]'),
AcidBasePair('alpha-carbon-hydrogen-nitro group', 'O=N(O)[CH]', 'O=N(O)[C-]'),
AcidBasePair('-SO2NH2', 'S(=O)(=O)[NH2]', 'S(=O)(=O)[NH-]'),
AcidBasePair('-OBO2H2', 'OB([OH])[OH]', 'OB([OH])[O-]'),
AcidBasePair('-BO2H2', '[!O]B([OH])[OH]', '[!O]B([OH])[O-]'),
AcidBasePair('phenol', 'c[OH]', 'c[O-]'),
AcidBasePair('SH (aliphatic)', 'C[SH]', 'C[S-]'),
AcidBasePair('(-OBO2H)-', 'OB([O-])[OH]', 'OB([O-])[O-]'),
AcidBasePair('(-BO2H)-', '[!O]B([O-])[OH]', '[!O]B([O-])[O-]'),
AcidBasePair('cyclopentadiene', 'C1=CC=C[CH2]1', 'c1ccc[cH-]1'),
AcidBasePair('-CONH2', 'C(=O)[NH2]', 'C(=O)[NH-]'),
AcidBasePair('imidazole', 'c1cnc[nH]1', 'c1cnc[n-]1'),
AcidBasePair('-OH (aliphatic alcohol)', '[CX4][OH]', '[CX4][O-]'),
AcidBasePair('alpha-carbon-hydrogen-keto group', 'O=C([!O])[C!H0+0]', 'O=C([!O])[C-]'),
AcidBasePair('alpha-carbon-hydrogen-acetyl ester group', 'OC(=O)[C!H0+0]', 'OC(=O)[C-]'),
AcidBasePair('sp carbon hydrogen', 'C#[CH]', 'C#[C-]'),
AcidBasePair('alpha-carbon-hydrogen-sulfone group', 'CS(=O)(=O)[C!H0+0]', 'CS(=O)(=O)[C-]'),
AcidBasePair('alpha-carbon-hydrogen-sulfoxide group', 'C[SD3](=O)[C!H0+0]', 'C[SD3](=O)[C-]'),
AcidBasePair('-NH2', '[CX4][NH2]', '[CX4][NH-]'),
AcidBasePair('benzyl hydrogen', 'c[CX4H2]', 'c[CX3H-]'),
AcidBasePair('sp2-carbon hydrogen', '[CX3]=[CX3!H0+0]', '[CX3]=[CX2-]'),
AcidBasePair('sp3-carbon hydrogen', '[CX4!H0+0]', '[CX3-]'),
)
class ChargeCorrection(object):
"""An atom that should have a certain charge applied, defined by a SMARTS pattern."""
def __init__(self, name, smarts, charge):
"""Initialize a ChargeCorrection with the following parameters:
:param string name: A name for this ForcedAtomCharge.
:param string smarts: SMARTS pattern to match. Charge is applied to the first atom.
:param int charge: The charge to apply.
"""
log.debug('Initializing ChargeCorrection: %s', name)
self.name = name
self.smarts_str = smarts
self.charge = charge
@memoized_property
def smarts(self):
log.debug('Loading ChargeCorrection smarts: %s', self.name)
return Chem.MolFromSmarts(self.smarts_str)
def __repr__(self):
return 'ChargeCorrection({!r}, {!r}, {!r})'.format(self.name, self.smarts_str, self.charge)
def __str__(self):
return self.name
#: The default list of ChargeCorrections.
CHARGE_CORRECTIONS = (
ChargeCorrection('[Li,Na,K]', '[Li,Na,K;X0+0]', 1),
ChargeCorrection('[Mg,Ca]', '[Mg,Ca;X0+0]', 2),
ChargeCorrection('[Cl]', '[Cl;X0+0]', -1),
# TODO: Extend to other incorrectly charged atoms
)
[docs]class Reionizer(object):
"""A class to fix charges and reionize a molecule such that the strongest acids ionize first."""
def __init__(self, acid_base_pairs=ACID_BASE_PAIRS, charge_corrections=CHARGE_CORRECTIONS):
"""Initialize a Reionizer with the following parameter:
:param acid_base_pairs: A list of :class:`AcidBasePairs <molvs.charge.AcidBasePair>` to reionize, sorted from
strongest to weakest.
:param charge_corrections: A list of :class:`ChargeCorrections <molvs.charge.ChargeCorrection>`.
"""
log.debug('Initializing Reionizer')
self.acid_base_pairs = acid_base_pairs
self.charge_corrections = charge_corrections
[docs] def __call__(self, mol):
"""Calling a Reionizer instance like a function is the same as calling its reionize(mol) method."""
return self.reionize(mol)
[docs] def reionize(self, mol):
"""Enforce charges on certain atoms, then perform competitive reionization.
First, charge corrections are applied to ensure, for example, that free metals are correctly ionized. Then, if
a molecule with multiple acid groups is partially ionized, ensure the strongest acids ionize first.
The algorithm works as follows:
- Use SMARTS to find the strongest protonated acid and the weakest ionized acid.
- If the ionized acid is weaker than the protonated acid, swap proton and repeat.
:param mol: The molecule to reionize.
:type mol: rdkit.Chem.rdchem.Mol
:return: The reionized molecule.
:rtype: rdkit.Chem.rdchem.Mol
"""
log.debug('Running Reionizer')
start_charge = Chem.GetFormalCharge(mol)
# Apply forced charge corrections
for cc in self.charge_corrections:
for match in mol.GetSubstructMatches(cc.smarts):
atom = mol.GetAtomWithIdx(match[0])
log.info('Applying charge correction %s (%s %+d)', cc.name, atom.GetSymbol(), cc.charge)
atom.SetFormalCharge(cc.charge)
current_charge = Chem.GetFormalCharge(mol)
charge_diff = Chem.GetFormalCharge(mol) - start_charge
# If molecule is now neutral, assume everything is now fixed
# But otherwise, if charge has become more positive, look for additional protonated acid groups to ionize
if not current_charge == 0:
while charge_diff > 0:
ppos, poccur = self._strongest_protonated(mol)
if ppos is None:
break
log.info('Ionizing %s to balance previous charge corrections', self.acid_base_pairs[ppos].name)
patom = mol.GetAtomWithIdx(poccur[-1])
patom.SetFormalCharge(patom.GetFormalCharge() - 1)
if patom.GetNumExplicitHs() > 0:
patom.SetNumExplicitHs(patom.GetNumExplicitHs() - 1)
# else:
patom.UpdatePropertyCache()
charge_diff -= 1
already_moved = set()
while True:
ppos, poccur = self._strongest_protonated(mol)
ipos, ioccur = self._weakest_ionized(mol)
if ioccur and poccur and ppos < ipos:
if poccur[-1] == ioccur[-1]:
# Bad! H wouldn't be moved, resulting in infinite loop.
log.warning('Aborted reionization due to unexpected situation')
break
key = tuple(sorted([poccur[-1], ioccur[-1]]))
if key in already_moved:
log.warning('Aborting reionization to avoid infinite loop due to it being ambiguous where to put a Hydrogen')
break
already_moved.add(key)
log.info('Moved proton from %s to %s', self.acid_base_pairs[ppos].name, self.acid_base_pairs[ipos].name)
# Remove hydrogen from strongest protonated
patom = mol.GetAtomWithIdx(poccur[-1])
patom.SetFormalCharge(patom.GetFormalCharge() - 1)
# If no implicit Hs to autoremove, and at least 1 explicit H to remove, reduce explicit count by 1
if patom.GetNumImplicitHs() == 0 and patom.GetNumExplicitHs() > 0:
patom.SetNumExplicitHs(patom.GetNumExplicitHs() - 1)
# TODO: Remove any chiral label on patom?
patom.UpdatePropertyCache()
# Add hydrogen to weakest ionized
iatom = mol.GetAtomWithIdx(ioccur[-1])
iatom.SetFormalCharge(iatom.GetFormalCharge() + 1)
# Increase explicit H count if no implicit, or aromatic N or P, or non default valence state
if (iatom.GetNoImplicit() or
((patom.GetAtomicNum() == 7 or patom.GetAtomicNum() == 15) and patom.GetIsAromatic()) or
iatom.GetTotalValence() not in list(Chem.GetPeriodicTable().GetValenceList(iatom.GetAtomicNum()))):
iatom.SetNumExplicitHs(iatom.GetNumExplicitHs() + 1)
iatom.UpdatePropertyCache()
else:
break
# TODO: Canonical ionization position if multiple equivalent positions?
Chem.SanitizeMol(mol)
return mol
def _strongest_protonated(self, mol):
for position, pair in enumerate(self.acid_base_pairs):
for occurrence in mol.GetSubstructMatches(pair.acid):
return position, occurrence
return None, None
def _weakest_ionized(self, mol):
for position, pair in enumerate(reversed(self.acid_base_pairs)):
for occurrence in mol.GetSubstructMatches(pair.base):
return len(self.acid_base_pairs) - position - 1, occurrence
return None, None
[docs]class Uncharger(object):
"""Class for neutralizing charges in a molecule.
This class uncharges molecules by adding and/or removing hydrogens. In cases where there is a positive charge that
is not neutralizable, any corresponding negative charge is also preserved.
"""
def __init__(self, acid_base_pairs=ACID_BASE_PAIRS):
log.debug('Initializing Uncharger')
self.acid_base_pairs = acid_base_pairs
self.nitro = Chem.MolFromSmarts('[!#8][NX3+](=O)[O-]')
[docs] def __call__(self, mol):
"""Calling an Uncharger instance like a function is the same as calling its uncharge(mol) method."""
return self.uncharge(mol)
[docs] def uncharge(self, mol):
"""Neutralize molecule by adding/removing hydrogens.
:param mol: The molecule to uncharge.
:type mol: rdkit.Chem.rdchem.Mol
:return: The uncharged molecule.
:rtype: rdkit.Chem.rdchem.Mol
"""
log.debug('Running Uncharger')
mol = copy.deepcopy(mol)
# Neutralize positive charges
pos_remainder = 0
neg_count = 0
for atom in mol.GetAtoms():
# Remove hydrogen from positive atoms and reduce formal change until neutral or no more hydrogens
while atom.GetFormalCharge() > 0 and atom.GetNumExplicitHs() > 0:
atom.SetNumExplicitHs(atom.GetNumExplicitHs() - 1)
atom.SetFormalCharge(atom.GetFormalCharge() - 1)
log.info('Removed positive charge')
chg = atom.GetFormalCharge()
if chg > 0:
# Record number of non-neutralizable positive charges
pos_remainder += chg
elif chg < 0:
# Record total number of negative charges
neg_count += -chg
# Choose negative charges to leave in order to balance non-neutralizable positive charges
neg_skip = self._get_neg_skip(mol, pos_remainder)
# Neutralize remaining negative charges
for atom in mol.GetAtoms():
log.info(atom.GetIdx())
if atom.GetIdx() in neg_skip:
continue
# Make sure to stop when neg_count <= pos_remainder, as it is possible that neg_skip is not large enough
while atom.GetFormalCharge() < 0 and neg_count > pos_remainder:
atom.SetNumExplicitHs(atom.GetNumExplicitHs() + 1)
atom.SetFormalCharge(atom.GetFormalCharge() + 1)
neg_count -= 1
log.info('Removed negative charge')
return mol
def _get_neg_skip(self, mol, pos_count):
"""Get negatively charged atoms to skip (up to pos_count)."""
neg_skip = set()
if pos_count:
# Get negative oxygens in charge-separated nitro groups TODO: Any other special cases to skip?
for occurrence in mol.GetSubstructMatches(self.nitro):
neg_skip.add(occurrence[-1])
if len(neg_skip) >= pos_count:
return neg_skip
# Get strongest ionized acids
for position, pair in enumerate(self.acid_base_pairs):
for occurrence in mol.GetSubstructMatches(pair.base):
neg_skip.add(occurrence[-1])
if len(neg_skip) >= pos_count:
return neg_skip
return neg_skip