Excel中根据SMILES添加分子结构图片

  • #注:Excel中SMILES所在列名称需为'SMILES'
  • #yulong.shi
  • #2024.02.20
  • import pandas as pd 
  • import xlsxwriter
  • import openpyxl
  • import os
  • from rdkit import Chem
  • from rdkit.Chem import Draw
  • from rdkit.Chem.Draw.MolDrawing import MolDrawing,DrawingOptions

  • filename='ligand.xlsx'

  • # 读入要处理的数据
  • df = pd.read_excel(filename, engine='openpyxl',header=0)
  •  
  • # 根据smiles编码生成图片,生成的图片在figure文件夹中
  • def generation_images(data):
  •     if not os.path.isdir('figure'):
  •         os.mkdir('figure')
  •     draw = data['SMILES'].tolist()   #tolist()用于将数组或矩阵转为列表。
  •     for i in draw:
  •         mol = Chem.MolFromSmiles(i)
  •         Draw.MolToFile(mol,f'./figure/img{i}.png',size=(150,100))
  •  
  • # 创建excel表格,在首列插入生成的图片和原表格数据
  • def load_images(data):
  •     workbook = xlsxwriter.Workbook('dataset_with_images.xlsx')
  •     worksheet = workbook.add_worksheet('result')
  •     row_height = 75           # 设置行高
  •     worksheet.set_column('A:A', 21)   # 设置A列宽度
  •     worksheet.write_row(f'A1',['Structure',*list(df.columns)])
  •     for row in range(1, len(data)+1):
  •         worksheet.set_row(row, row_height)
  •         worksheet.write_row(f'B{row+1}',data.loc[row-1])
  •         worksheet.insert_image(f'A{row+1}', f'./figure/img'+str(data['SMILES'][row-1])+'.png')
  •     workbook.close()
  •  
  • generation_images(df)
  • load_images(df)

ligand.xlsx格式(可以有很多其他列,比如对接打分等)
  • NAME SMILES Score
  • m1 CCC -6
  • m2 CCCN -7