ONIOM自动化脚本

功能

为小分子配体按gjf格式补齐所需的所有参数。

准备工作

当前目录(根目录)下存放该脚本,以及每个体系的相关文件的子文件夹,如4B4M,6FER。
每个子文件夹内需要有:
需要补参数的配体的Gaussian计算log文件
选好高低层原子的gjf文件,命名中需要带“oniom”字样,有时配体需要补电荷的部分GV会自动给一些,需要全部删掉。
非必需:
自己提供resp电荷+原子名,力场信息,分别命名为resp.txt和force_field.txt。如果有这个,就不需要log文件。

使用

  • python ONIOM_prep-demo.py 4B4M
输入文件夹名即可。自动识别文件夹内所有带oniom字样带gjf文件,进行补参数。
文件夹下需要放入以下两个文件:

报错

报错:
  • The number of atoms in gjf file is……
检查配体原子数是否一致,gjf文件末尾高层原子,确认所有的配体原子都没有标注电荷,确认上方残基没有缺失电荷。

后续计划

自动调用modeller实现同源建模(优化模式+残基坐标不动模式),调用pdb2pqr加氢,调用obabel合并蛋白配体文件。

代码

oniom.py

  • ##
  • import os
  • import subprocess
  • import argparse
  • import re
  • from filetools import FileWalker

  • ##
  • class ONIOM:

  •     def __init__(self, rootpath='./'):

  •         self.orgin_path = os.getcwd()
  •         self.rootpath = rootpath
  •         os.chdir(self.rootpath)

  •     def lig_prep(self,ph=7.4):

  •         pass

  •     def get_lig_information(self):

  •         command_antechamber1 = 'antechamber -i *.log -fi gout -o lig.prep -fo prepi -c resp '
  •         command_antechamber2 = 'awk \'$1==\"ATOM\" {print $3, toupper($10) \"-\" $9}\' ANTECHAMBER_PREP.AC'
  •         command_parmchk1 = 'parmchk  -i lig.prep -f prepi -o forcefiled.frcmod -a y'
  •         command_parmchk2 = 'awk \'/NONBON/, /NR/\' *mod | awk \'{print \"VDW\", $1, $2, $3}\''
  •         command_parmchk3 = 'awk \'/^BOND/, /ANGLE/\' *mod | awk \'{print \"HrmStr1\", $1, $2, $3, $4}\' | tr \'-\' \' \''
  •         command_parmchk4 = "awk \'/^ANGLE/, /DIHE/\' *mod | awk \'{print \"HrmBnd1\", $1, $2, $3, $4, $5}\' | tr \'-\' \' \'"

  •         subprocess.run(command_antechamber1, text=True, stdout=subprocess.PIPE, check=True, shell=True)
  •         result = subprocess.run(command_antechamber2, text=True, stdout=subprocess.PIPE, check=True, shell=True)
  •         self.resp = result
  •         with open('resp.txt', 'w') as f:
  •             f.write(result.stdout)

  •         text = ''
  •         subprocess.run(command_parmchk1, text=True, stdout=subprocess.PIPE, check=True, shell=True)
  •         result = subprocess.run(command_parmchk2, text=True, stdout=subprocess.PIPE, check=True, shell=True)
  •         text += result.stdout
  •         result = subprocess.run(command_parmchk3, text=True, stdout=subprocess.PIPE, check=True, shell=True)
  •         text += result.stdout
  •         result = subprocess.run(command_parmchk4, text=True, stdout=subprocess.PIPE, check=True, shell=True)
  •         text += result.stdout
  •         text = text.split('\n')
  •         text_clean = []
  •         for line in text:
  •             if len(line.split()) > 3:
  •                 text_clean.append(line)
  •         text_clean = '\n'.join(text_clean)
  •         self.force_field = text_clean
  •         with open('force_field.txt', 'w') as f:
  •             f.write(text_clean)
  •             f.write('\nHrmBnd1 hc c3 h1 35.0 109.5\nHrmBnd1 hc ct h1 35.0 109.5')

  •     def _parse_gjf_file(self, filepath):

  •         location = []
  •         location_index = []
  •         with open(filepath, 'r') as file:
  •             file_content = file.readlines()
  •             for index,line in enumerate(file_content):
  •                 if 'PDBName' not in line:
  •                     continue
  •                 split_contents = re.findall('[^(]+', line)
  •                 if len(split_contents) == 2:
  •                     if 0 < len(re.findall('[^-]+', split_contents[0])) < 3:
  •                         location.append('(' + re.findall('[^\n]+', split_contents[1])[0])
  •                         location_index.append(index)

  •         return location, location_index, file_content

  •     def _parse_resp_file(self):

  •         atom_types = []
  •         with open('resp.txt', 'r') as file:
  •             for line in file:
  •                 parts = re.findall('[^ ]+', line)
  •                 atom_type = re.findall('[A-Za-z]+', parts[0])[0] + '-' + parts[1]
  •                 atom_type = re.findall('[^\n]+', atom_type)[0]
  •                 atom_types.append(atom_type)

  •         return atom_types

  •     def _parse_force_field_file(self):

  •         with open('force_field.txt', 'r') as f:
  •             content = f.read()

  •         return content

  •     def write_parameter_to_gjf(self, filepath=None):

  •         #输入为文件夹,识别文件夹内所有带oniom字样带gjf文件,进行补参数。也可以输入单个需要补参数带gjf文件名
  •         if filepath == None:
  •             filepath = self.rootpath
  •         if os.path.isdir(filepath):
  •             gjf_list = FileWalker.get_files(root_filepath=filepath, filetype='gjf', file_filter='oniom')
  •         else:
  •             gjf_list = [filepath]
  •         #若电荷或力场参数文件不存在,根据小分子计算log文件,自动生成
  •         if not (os.path.exists('resp.txt') and os.path.exists('force_field.txt')):
  •             self.get_lig_information()
  •         resp_atom_types = self._parse_resp_file()
  •         force_field_content = self._parse_force_field_file()
  •         print(gjf_list)
  •         for filepath in gjf_list:
  •             location, location_index, file_content = self._parse_gjf_file(filepath)
  •         #需要补充参数的原子数目必须和小分子计算文件的数目一致
  •             if len(location) == len(resp_atom_types):
  •                 for i in range(len(resp_atom_types)):
  •                     output = f' {resp_atom_types[i]}{location[i]}\n'
  •                     file_content[location_index[i]] = output
  •                 with open(filepath, 'w') as f:
  •                     f.write(''.join(file_content))
  •                     f.write('\n')
  •                     f.write(force_field_content)
  •                     f.write('\n\n')
  •                 print(f'{filepath} success')
  •             else:
  •                 print(f'The number of atoms in gjf file is {len(location)} not the same with that \
  • {len(resp_atom_types)} in resp file. Please ensure that all ligand atoms in high layer have NO resp in gjf file \
  • and all atoms in low layer DO have resp. The atoms lack of resp are:\n')
  •                 for item in location:
  •                     print(item)


  •     def generate_com_pdb(self):

  •         command_obabel = 'obabel -ipdb *_pro_fix.pdb  *_lig.pdb -opdb -j -O _com.pdb'

  •     def generate_protein(self, ph=7.0, sys_name=None):

  •         try:
  •             command_pdb2pqr = f'pdb2pqr --ff=amber --ffout=amber --chain --with-ph={ph} 3rx4_com.pdb 3RX4_pro_fix.pqr'
  •             result = subprocess.run(command_pdb2pqr, text=True, stdout=subprocess.PIPE, check=True, shell=True)
  •         except Exception as e1:
  •             print(f'pdb2pq failed. {e1}')
  •             try:
  •                 command_pdb2pqr = 'pdb2pqr30 input.pdb out.pqr --ff AMBER --ffout AMBER --with-ph 7.4 --pdb-output out.pdb'
  •                 subprocess.run(command_pdb2pqr, text=True, stdout=subprocess.PIPE, check=True, shell=True)
  •             except Exception as e2:
  •                 print(f'pdb2pqr30 failed. {e2}')

  •         command_obabel = 'obabel -ipqr *_pro_fix.pqr -o pdb -O 3RX4_pro_fix.pdb'
  •         subprocess.run(command_obabel, text=True, stdout=subprocess.PIPE, check=True, shell=True)

  •     def __del__(self):

  •         os.chdir(self.orgin_path)


  • if __name__ == '__main__':
  •     paser = argparse.ArgumentParser()
  •     paser.add_argument('rootpath', type=str)
  •     args = paser.parse_args()
  •     myOniom = ONIOM(args.rootpath)
  •     myOniom.write_parameter_to_gjf()
  •     
filetools.py
  • import  os

  • class FileWalker:

  •     @staticmethod
  •     def get_files(root_filepath='./', filetype='gjf', file_filter=None):

  •         filetype = '.' + filetype
  •         gjf_file_path_list = []
  •         for root, _, files in os.walk(root_filepath):
  •             for file in files:
  •                 if file.lower().endswith(filetype):
  •                     gjf_file_path_list.append(os.path.join(root, file))

  •         if file_filter:
  •             gjf_file_path_list_filter = []
  •             for item in gjf_file_path_list:
  •                 if file_filter in item:
  •                     gjf_file_path_list_filter.append(item)
  •             return gjf_file_path_list_filter
  •         else:
  •             return gjf_file_path_list

  •     # level_list二维列表[[],[],[]],第n行为n级目录,每列为n级目录的所有子目录
  •     @staticmethod
  •     def make_folders(level_list):

  •         from itertools import product
  •         # 使用itertools.product生成所有子列表的笛卡尔积
  •         combinations = list(product(*level_list))
  •         path_list = [''.join(combination) for combination in combinations]
  •         for path in path_list:
  •             try:
  •                 os.makedirs(path, exist_ok=True)
  •             except:
  •                 print(f'directory {path} make failed')
  •         return path_list