ONIOM自动化脚本
功能
为小分子配体按gjf格式补齐所需的所有参数。
准备工作
当前目录(根目录)下存放该脚本,以及每个体系的相关文件的子文件夹,如4B4M,6FER。
每个子文件夹内需要有:
需要补参数的配体的Gaussian计算log文件
选好高低层原子的gjf文件,命名中需要带“oniom”字样,有时配体需要补电荷的部分GV会自动给一些,需要全部删掉。
非必需:
自己提供resp电荷+原子名,力场信息,分别命名为resp.txt和force_field.txt。如果有这个,就不需要log文件。
使用
- python ONIOM_prep-demo.py 4B4M
输入文件夹名即可。自动识别文件夹内所有带oniom字样带gjf文件,进行补参数。
文件夹下需要放入以下两个文件:
报错
报错:
- The number of atoms in gjf file is……
检查配体原子数是否一致,gjf文件末尾高层原子,确认所有的配体原子都没有标注电荷,确认上方残基没有缺失电荷。
后续计划
自动调用modeller实现同源建模(优化模式+残基坐标不动模式),调用pdb2pqr加氢,调用obabel合并蛋白配体文件。
代码
oniom.py
- ##
- import os
- import subprocess
- import argparse
- import re
- from filetools import FileWalker
- ##
- class ONIOM:
- def __init__(self, rootpath='./'):
- self.orgin_path = os.getcwd()
- self.rootpath = rootpath
- os.chdir(self.rootpath)
- def lig_prep(self,ph=7.4):
- pass
- def get_lig_information(self):
- command_antechamber1 = 'antechamber -i *.log -fi gout -o lig.prep -fo prepi -c resp '
- command_antechamber2 = 'awk \'$1==\"ATOM\" {print $3, toupper($10) \"-\" $9}\' ANTECHAMBER_PREP.AC'
- command_parmchk1 = 'parmchk -i lig.prep -f prepi -o forcefiled.frcmod -a y'
- command_parmchk2 = 'awk \'/NONBON/, /NR/\' *mod | awk \'{print \"VDW\", $1, $2, $3}\''
- command_parmchk3 = 'awk \'/^BOND/, /ANGLE/\' *mod | awk \'{print \"HrmStr1\", $1, $2, $3, $4}\' | tr \'-\' \' \''
- command_parmchk4 = "awk \'/^ANGLE/, /DIHE/\' *mod | awk \'{print \"HrmBnd1\", $1, $2, $3, $4, $5}\' | tr \'-\' \' \'"
- subprocess.run(command_antechamber1, text=True, stdout=subprocess.PIPE, check=True, shell=True)
- result = subprocess.run(command_antechamber2, text=True, stdout=subprocess.PIPE, check=True, shell=True)
- self.resp = result
- with open('resp.txt', 'w') as f:
- f.write(result.stdout)
- text = ''
- subprocess.run(command_parmchk1, text=True, stdout=subprocess.PIPE, check=True, shell=True)
- result = subprocess.run(command_parmchk2, text=True, stdout=subprocess.PIPE, check=True, shell=True)
- text += result.stdout
- result = subprocess.run(command_parmchk3, text=True, stdout=subprocess.PIPE, check=True, shell=True)
- text += result.stdout
- result = subprocess.run(command_parmchk4, text=True, stdout=subprocess.PIPE, check=True, shell=True)
- text += result.stdout
- text = text.split('\n')
- text_clean = []
- for line in text:
- if len(line.split()) > 3:
- text_clean.append(line)
- text_clean = '\n'.join(text_clean)
- self.force_field = text_clean
- with open('force_field.txt', 'w') as f:
- f.write(text_clean)
- f.write('\nHrmBnd1 hc c3 h1 35.0 109.5\nHrmBnd1 hc ct h1 35.0 109.5')
- def _parse_gjf_file(self, filepath):
- location = []
- location_index = []
- with open(filepath, 'r') as file:
- file_content = file.readlines()
- for index,line in enumerate(file_content):
- if 'PDBName' not in line:
- continue
- split_contents = re.findall('[^(]+', line)
- if len(split_contents) == 2:
- if 0 < len(re.findall('[^-]+', split_contents[0])) < 3:
- location.append('(' + re.findall('[^\n]+', split_contents[1])[0])
- location_index.append(index)
- return location, location_index, file_content
- def _parse_resp_file(self):
- atom_types = []
- with open('resp.txt', 'r') as file:
- for line in file:
- parts = re.findall('[^ ]+', line)
- atom_type = re.findall('[A-Za-z]+', parts[0])[0] + '-' + parts[1]
- atom_type = re.findall('[^\n]+', atom_type)[0]
- atom_types.append(atom_type)
- return atom_types
- def _parse_force_field_file(self):
- with open('force_field.txt', 'r') as f:
- content = f.read()
- return content
- def write_parameter_to_gjf(self, filepath=None):
- #输入为文件夹,识别文件夹内所有带oniom字样带gjf文件,进行补参数。也可以输入单个需要补参数带gjf文件名
- if filepath == None:
- filepath = self.rootpath
- if os.path.isdir(filepath):
- gjf_list = FileWalker.get_files(root_filepath=filepath, filetype='gjf', file_filter='oniom')
- else:
- gjf_list = [filepath]
- #若电荷或力场参数文件不存在,根据小分子计算log文件,自动生成
- if not (os.path.exists('resp.txt') and os.path.exists('force_field.txt')):
- self.get_lig_information()
- resp_atom_types = self._parse_resp_file()
- force_field_content = self._parse_force_field_file()
- print(gjf_list)
- for filepath in gjf_list:
- location, location_index, file_content = self._parse_gjf_file(filepath)
- #需要补充参数的原子数目必须和小分子计算文件的数目一致
- if len(location) == len(resp_atom_types):
- for i in range(len(resp_atom_types)):
- output = f' {resp_atom_types[i]}{location[i]}\n'
- file_content[location_index[i]] = output
- with open(filepath, 'w') as f:
- f.write(''.join(file_content))
- f.write('\n')
- f.write(force_field_content)
- f.write('\n\n')
- print(f'{filepath} success')
- else:
- print(f'The number of atoms in gjf file is {len(location)} not the same with that \
- {len(resp_atom_types)} in resp file. Please ensure that all ligand atoms in high layer have NO resp in gjf file \
- and all atoms in low layer DO have resp. The atoms lack of resp are:\n')
- for item in location:
- print(item)
- def generate_com_pdb(self):
- command_obabel = 'obabel -ipdb *_pro_fix.pdb *_lig.pdb -opdb -j -O _com.pdb'
- def generate_protein(self, ph=7.0, sys_name=None):
- try:
- command_pdb2pqr = f'pdb2pqr --ff=amber --ffout=amber --chain --with-ph={ph} 3rx4_com.pdb 3RX4_pro_fix.pqr'
- result = subprocess.run(command_pdb2pqr, text=True, stdout=subprocess.PIPE, check=True, shell=True)
- except Exception as e1:
- print(f'pdb2pq failed. {e1}')
- try:
- command_pdb2pqr = 'pdb2pqr30 input.pdb out.pqr --ff AMBER --ffout AMBER --with-ph 7.4 --pdb-output out.pdb'
- subprocess.run(command_pdb2pqr, text=True, stdout=subprocess.PIPE, check=True, shell=True)
- except Exception as e2:
- print(f'pdb2pqr30 failed. {e2}')
- command_obabel = 'obabel -ipqr *_pro_fix.pqr -o pdb -O 3RX4_pro_fix.pdb'
- subprocess.run(command_obabel, text=True, stdout=subprocess.PIPE, check=True, shell=True)
- def __del__(self):
- os.chdir(self.orgin_path)
- if __name__ == '__main__':
- paser = argparse.ArgumentParser()
- paser.add_argument('rootpath', type=str)
- args = paser.parse_args()
- myOniom = ONIOM(args.rootpath)
- myOniom.write_parameter_to_gjf()
filetools.py
- import os
- class FileWalker:
- @staticmethod
- def get_files(root_filepath='./', filetype='gjf', file_filter=None):
- filetype = '.' + filetype
- gjf_file_path_list = []
- for root, _, files in os.walk(root_filepath):
- for file in files:
- if file.lower().endswith(filetype):
- gjf_file_path_list.append(os.path.join(root, file))
- if file_filter:
- gjf_file_path_list_filter = []
- for item in gjf_file_path_list:
- if file_filter in item:
- gjf_file_path_list_filter.append(item)
- return gjf_file_path_list_filter
- else:
- return gjf_file_path_list
- # level_list二维列表[[],[],[]],第n行为n级目录,每列为n级目录的所有子目录
- @staticmethod
- def make_folders(level_list):
- from itertools import product
- # 使用itertools.product生成所有子列表的笛卡尔积
- combinations = list(product(*level_list))
- path_list = [''.join(combination) for combination in combinations]
- for path in path_list:
- try:
- os.makedirs(path, exist_ok=True)
- except:
- print(f'directory {path} make failed')
- return path_list