Excel中根据SMILES添加分子结构图片
- #注:Excel中SMILES所在列名称需为'SMILES'
- #yulong.shi
- #2024.02.20
- import pandas as pd
- import xlsxwriter
- import openpyxl
- import os
- from rdkit import Chem
- from rdkit.Chem import Draw
- from rdkit.Chem.Draw.MolDrawing import MolDrawing,DrawingOptions
- filename='ligand.xlsx'
- # 读入要处理的数据
- df = pd.read_excel(filename, engine='openpyxl',header=0)
- # 根据smiles编码生成图片,生成的图片在figure文件夹中
- def generation_images(data):
- if not os.path.isdir('figure'):
- os.mkdir('figure')
- draw = data['SMILES'].tolist() #tolist()用于将数组或矩阵转为列表。
- for i in draw:
- mol = Chem.MolFromSmiles(i)
- Draw.MolToFile(mol,f'./figure/img{i}.png',size=(150,100))
- # 创建excel表格,在首列插入生成的图片和原表格数据
- def load_images(data):
- workbook = xlsxwriter.Workbook('dataset_with_images.xlsx')
- worksheet = workbook.add_worksheet('result')
- row_height = 75 # 设置行高
- worksheet.set_column('A:A', 21) # 设置A列宽度
- worksheet.write_row(f'A1',['Structure',*list(df.columns)])
- for row in range(1, len(data)+1):
- worksheet.set_row(row, row_height)
- worksheet.write_row(f'B{row+1}',data.loc[row-1])
- worksheet.insert_image(f'A{row+1}', f'./figure/img'+str(data['SMILES'][row-1])+'.png')
- workbook.close()
- generation_images(df)
- load_images(df)
ligand.xlsx格式(可以有很多其他列,比如对接打分等)
- NAME SMILES Score
- m1 CCC -6
- m2 CCCN -7