Source code for torch_molecule.utils.graph.features

# allowable multiple choice node and edge features 
allowable_features = {
    # atom types: 1-118, 119 is masked atom, 120 is misc (e.g. * for polymers)
    # index: 0-117, 118, 119
    'possible_atomic_num_list' : list(range(1, 120)) + ['misc'],
    'possible_chirality_list' : [
        'CHI_UNSPECIFIED',
        'CHI_TETRAHEDRAL_CW',
        'CHI_TETRAHEDRAL_CCW',
        'CHI_OTHER',
        'misc'
    ],
    'possible_degree_list' : [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 'misc'],
    'possible_formal_charge_list' : [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 'misc'],
    'possible_numH_list' : [0, 1, 2, 3, 4, 5, 6, 7, 8, 'misc'],
    'possible_number_radical_e_list': [0, 1, 2, 3, 4, 'misc'],
    'possible_hybridization_list' : [
        'SP', 'SP2', 'SP3', 'SP3D', 'SP3D2', 'misc'
        ],
    'possible_is_aromatic_list': [False, True],
    'possible_is_in_ring_list': [False, True],
    'possible_bond_type_list' : [
        'SINGLE',
        'DOUBLE',
        'TRIPLE',
        'AROMATIC',
        'misc'
    ],
    'possible_bond_stereo_list': [
        'STEREONONE',
        'STEREOZ',
        'STEREOE',
        'STEREOCIS',
        'STEREOTRANS',
        'STEREOANY',
    ], 
    'possible_is_conjugated_list': [False, True],
}

[docs] def safe_index(l, e): """ Return index of element e in list l. If e is not present, return the last index """ try: return l.index(e) except: return len(l) - 1
# # miscellaneous case # i = safe_index(allowable_features['possible_atomic_num_list'], 'asdf') # assert allowable_features['possible_atomic_num_list'][i] == 'misc' # # normal case # i = safe_index(allowable_features['possible_atomic_num_list'], 2) # assert allowable_features['possible_atomic_num_list'][i] == 2
[docs] def atom_to_feature_vector(atom): """ Converts rdkit atom object to feature list of indices :param mol: rdkit atom object :return: list """ atom_feature = [ safe_index(allowable_features['possible_atomic_num_list'], atom.GetAtomicNum()), safe_index(allowable_features['possible_chirality_list'], str(atom.GetChiralTag())), safe_index(allowable_features['possible_degree_list'], atom.GetTotalDegree()), safe_index(allowable_features['possible_formal_charge_list'], atom.GetFormalCharge()), safe_index(allowable_features['possible_numH_list'], atom.GetTotalNumHs()), safe_index(allowable_features['possible_number_radical_e_list'], atom.GetNumRadicalElectrons()), safe_index(allowable_features['possible_hybridization_list'], str(atom.GetHybridization())), allowable_features['possible_is_aromatic_list'].index(atom.GetIsAromatic()), allowable_features['possible_is_in_ring_list'].index(atom.IsInRing()), ] return atom_feature
# from rdkit import Chem # mol = Chem.MolFromSmiles('Cl[C@H](/C=C/C)Br') # atom = mol.GetAtomWithIdx(1) # chiral carbon # atom_feature = atom_to_feature_vector(atom) # assert atom_feature == [5, 2, 4, 5, 1, 0, 2, 0, 0]
[docs] def get_atom_feature_dims(): return list(map(len, [ allowable_features['possible_atomic_num_list'], allowable_features['possible_chirality_list'], allowable_features['possible_degree_list'], allowable_features['possible_formal_charge_list'], allowable_features['possible_numH_list'], allowable_features['possible_number_radical_e_list'], allowable_features['possible_hybridization_list'], allowable_features['possible_is_aromatic_list'], allowable_features['possible_is_in_ring_list'] ]))
[docs] def bond_to_feature_vector(bond): """ Converts rdkit bond object to feature list of indices :param mol: rdkit bond object :return: list """ bond_feature = [ safe_index(allowable_features['possible_bond_type_list'], str(bond.GetBondType())), allowable_features['possible_bond_stereo_list'].index(str(bond.GetStereo())), allowable_features['possible_is_conjugated_list'].index(bond.GetIsConjugated()), ] return bond_feature
# uses same molecule as atom_to_feature_vector test # bond = mol.GetBondWithIdx(2) # double bond with stereochem # bond_feature = bond_to_feature_vector(bond) # assert bond_feature == [1, 2, 0]
[docs] def get_bond_feature_dims(): return list(map(len, [ allowable_features['possible_bond_type_list'], allowable_features['possible_bond_stereo_list'], allowable_features['possible_is_conjugated_list'] ]))
[docs] def atom_feature_vector_to_dict(atom_feature): [atomic_num_idx, chirality_idx, degree_idx, formal_charge_idx, num_h_idx, number_radical_e_idx, hybridization_idx, is_aromatic_idx, is_in_ring_idx] = atom_feature feature_dict = { 'atomic_num': allowable_features['possible_atomic_num_list'][atomic_num_idx], 'chirality': allowable_features['possible_chirality_list'][chirality_idx], 'degree': allowable_features['possible_degree_list'][degree_idx], 'formal_charge': allowable_features['possible_formal_charge_list'][formal_charge_idx], 'num_h': allowable_features['possible_numH_list'][num_h_idx], 'num_rad_e': allowable_features['possible_number_radical_e_list'][number_radical_e_idx], 'hybridization': allowable_features['possible_hybridization_list'][hybridization_idx], 'is_aromatic': allowable_features['possible_is_aromatic_list'][is_aromatic_idx], 'is_in_ring': allowable_features['possible_is_in_ring_list'][is_in_ring_idx] } return feature_dict
# # uses same atom_feature as atom_to_feature_vector test # atom_feature_dict = atom_feature_vector_to_dict(atom_feature) # assert atom_feature_dict['atomic_num'] == 6 # assert atom_feature_dict['chirality'] == 'CHI_TETRAHEDRAL_CCW' # assert atom_feature_dict['degree'] == 4 # assert atom_feature_dict['formal_charge'] == 0 # assert atom_feature_dict['num_h'] == 1 # assert atom_feature_dict['num_rad_e'] == 0 # assert atom_feature_dict['hybridization'] == 'SP3' # assert atom_feature_dict['is_aromatic'] == False # assert atom_feature_dict['is_in_ring'] == False
[docs] def bond_feature_vector_to_dict(bond_feature): [bond_type_idx, bond_stereo_idx, is_conjugated_idx] = bond_feature feature_dict = { 'bond_type': allowable_features['possible_bond_type_list'][bond_type_idx], 'bond_stereo': allowable_features['possible_bond_stereo_list'][bond_stereo_idx], 'is_conjugated': allowable_features['possible_is_conjugated_list'][is_conjugated_idx] } return feature_dict
# # uses same bond as bond_to_feature_vector test # bond_feature_dict = bond_feature_vector_to_dict(bond_feature) # assert bond_feature_dict['bond_type'] == 'DOUBLE' # assert bond_feature_dict['bond_stereo'] == 'STEREOE' # assert bond_feature_dict['is_conjugated'] == False from rdkit.Chem import AllChem
[docs] def getmorganfingerprint(mol): return list(AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=1024))
[docs] def getmaccsfingerprint(mol): fp = AllChem.GetMACCSKeysFingerprint(mol) return [int(b) for b in fp.ToBitString()]