ONIOM自动化脚本

功能

为小分子配体按gjf格式补齐所需的所有参数。

准备工作

当前目录（根目录）下存放该脚本，以及每个体系的相关文件的子文件夹，如4B4M，6FER。

每个子文件夹内需要有：

需要补参数的配体的Gaussian计算log文件

选好高低层原子的gjf文件，命名中需要带“oniom”字样，有时配体需要补电荷的部分GV会自动给一些，需要全部删掉。

非必需：

自己提供resp电荷+原子名，力场信息，分别命名为resp.txt和force_field.txt。如果有这个，就不需要log文件。

使用

python ONIOM_prep-demo.py 4B4M

输入文件夹名即可。自动识别文件夹内所有带oniom字样带gjf文件，进行补参数。

文件夹下需要放入以下两个文件：

ONIOM_prep-demo.py

filetools.py

报错

报错：

The number of atoms in gjf file is……

检查配体原子数是否一致，gjf文件末尾高层原子，确认所有的配体原子都没有标注电荷，确认上方残基没有缺失电荷。

后续计划

自动调用modeller实现同源建模（优化模式+残基坐标不动模式），调用pdb2pqr加氢，调用obabel合并蛋白配体文件。

代码

oniom.py



##
import os
import subprocess
import argparse
import re
from filetools import FileWalker


##
class ONIOM:


    def __init__(self, rootpath='./'):


        self.orgin_path = os.getcwd()
        self.rootpath = rootpath
        os.chdir(self.rootpath)


    def lig_prep(self,ph=7.4):


        pass


    def get_lig_information(self):


        command_antechamber1 = 'antechamber -i *.log -fi gout -o lig.prep -fo prepi -c resp '
        command_antechamber2 = 'awk \'$1==\"ATOM\" {print $3, toupper($10) \"-\" $9}\' ANTECHAMBER_PREP.AC'
        command_parmchk1 = 'parmchk  -i lig.prep -f prepi -o forcefiled.frcmod -a y'
        command_parmchk2 = 'awk \'/NONBON/, /NR/\' *mod | awk \'{print \"VDW\", $1, $2, $3}\''
        command_parmchk3 = 'awk \'/^BOND/, /ANGLE/\' *mod | awk \'{print \"HrmStr1\", $1, $2, $3, $4}\' | tr \'-\' \' \''
        command_parmchk4 = "awk \'/^ANGLE/, /DIHE/\' *mod | awk \'{print \"HrmBnd1\", $1, $2, $3, $4, $5}\' | tr \'-\' \' \'"


        subprocess.run(command_antechamber1, text=True, stdout=subprocess.PIPE, check=True, shell=True)
        result = subprocess.run(command_antechamber2, text=True, stdout=subprocess.PIPE, check=True, shell=True)
        self.resp = result
        with open('resp.txt', 'w') as f:
            f.write(result.stdout)


        text = ''
        subprocess.run(command_parmchk1, text=True, stdout=subprocess.PIPE, check=True, shell=True)
        result = subprocess.run(command_parmchk2, text=True, stdout=subprocess.PIPE, check=True, shell=True)
        text += result.stdout
        result = subprocess.run(command_parmchk3, text=True, stdout=subprocess.PIPE, check=True, shell=True)
        text += result.stdout
        result = subprocess.run(command_parmchk4, text=True, stdout=subprocess.PIPE, check=True, shell=True)
        text += result.stdout
        text = text.split('\n')
        text_clean = []
        for line in text:
            if len(line.split()) > 3:
                text_clean.append(line)
        text_clean = '\n'.join(text_clean)
        self.force_field = text_clean
        with open('force_field.txt', 'w') as f:
            f.write(text_clean)
            f.write('\nHrmBnd1 hc c3 h1 35.0 109.5\nHrmBnd1 hc ct h1 35.0 109.5')


    def _parse_gjf_file(self, filepath):


        location = []
        location_index = []
        with open(filepath, 'r') as file:
            file_content = file.readlines()
            for index,line in enumerate(file_content):
                if 'PDBName' not in line:
                    continue
                split_contents = re.findall('[^(]+', line)
                if len(split_contents) == 2:
                    if 0 < len(re.findall('[^-]+', split_contents[0])) < 3:
                        location.append('(' + re.findall('[^\n]+', split_contents[1])[0])
                        location_index.append(index)


        return location, location_index, file_content


    def _parse_resp_file(self):


        atom_types = []
        with open('resp.txt', 'r') as file:
            for line in file:
                parts = re.findall('[^ ]+', line)
                atom_type = re.findall('[A-Za-z]+', parts[0])[0] + '-' + parts[1]
                atom_type = re.findall('[^\n]+', atom_type)[0]
                atom_types.append(atom_type)


        return atom_types


    def _parse_force_field_file(self):


        with open('force_field.txt', 'r') as f:
            content = f.read()


        return content


    def write_parameter_to_gjf(self, filepath=None):


        #输入为文件夹，识别文件夹内所有带oniom字样带gjf文件，进行补参数。也可以输入单个需要补参数带gjf文件名
        if filepath == None:
            filepath = self.rootpath
        if os.path.isdir(filepath):
            gjf_list = FileWalker.get_files(root_filepath=filepath, filetype='gjf', file_filter='oniom')
        else:
            gjf_list = [filepath]
        #若电荷或力场参数文件不存在，根据小分子计算log文件，自动生成
        if not (os.path.exists('resp.txt') and os.path.exists('force_field.txt')):
            self.get_lig_information()
        resp_atom_types = self._parse_resp_file()
        force_field_content = self._parse_force_field_file()
        print(gjf_list)
        for filepath in gjf_list:
            location, location_index, file_content = self._parse_gjf_file(filepath)
        #需要补充参数的原子数目必须和小分子计算文件的数目一致
            if len(location) == len(resp_atom_types):
                for i in range(len(resp_atom_types)):
                    output = f' {resp_atom_types[i]}{location[i]}\n'
                    file_content[location_index[i]] = output
                with open(filepath, 'w') as f:
                    f.write(''.join(file_content))
                    f.write('\n')
                    f.write(force_field_content)
                    f.write('\n\n')
                print(f'{filepath} success')
            else:
                print(f'The number of atoms in gjf file is {len(location)} not the same with that \
{len(resp_atom_types)} in resp file. Please ensure that all ligand atoms in high layer have NO resp in gjf file \
and all atoms in low layer DO have resp. The atoms lack of resp are:\n')
                for item in location:
                    print(item)




    def generate_com_pdb(self):


        command_obabel = 'obabel -ipdb *_pro_fix.pdb  *_lig.pdb -opdb -j -O _com.pdb'


    def generate_protein(self, ph=7.0, sys_name=None):


        try:
            command_pdb2pqr = f'pdb2pqr --ff=amber --ffout=amber --chain --with-ph={ph} 3rx4_com.pdb 3RX4_pro_fix.pqr'
            result = subprocess.run(command_pdb2pqr, text=True, stdout=subprocess.PIPE, check=True, shell=True)
        except Exception as e1:
            print(f'pdb2pq failed. {e1}')
            try:
                command_pdb2pqr = 'pdb2pqr30 input.pdb out.pqr --ff AMBER --ffout AMBER --with-ph 7.4 --pdb-output out.pdb'
                subprocess.run(command_pdb2pqr, text=True, stdout=subprocess.PIPE, check=True, shell=True)
            except Exception as e2:
                print(f'pdb2pqr30 failed. {e2}')


        command_obabel = 'obabel -ipqr *_pro_fix.pqr -o pdb -O 3RX4_pro_fix.pdb'
        subprocess.run(command_obabel, text=True, stdout=subprocess.PIPE, check=True, shell=True)


    def __del__(self):


        os.chdir(self.orgin_path)




if __name__ == '__main__':
    paser = argparse.ArgumentParser()
    paser.add_argument('rootpath', type=str)
    args = paser.parse_args()
    myOniom = ONIOM(args.rootpath)
    myOniom.write_parameter_to_gjf()

filetools.py

import  os


class FileWalker:


    @staticmethod
    def get_files(root_filepath='./', filetype='gjf', file_filter=None):


        filetype = '.' + filetype
        gjf_file_path_list = []
        for root, _, files in os.walk(root_filepath):
            for file in files:
                if file.lower().endswith(filetype):
                    gjf_file_path_list.append(os.path.join(root, file))


        if file_filter:
            gjf_file_path_list_filter = []
            for item in gjf_file_path_list:
                if file_filter in item:
                    gjf_file_path_list_filter.append(item)
            return gjf_file_path_list_filter
        else:
            return gjf_file_path_list


    # level_list二维列表[[],[],[]]，第n行为n级目录，每列为n级目录的所有子目录
    @staticmethod
    def make_folders(level_list):


        from itertools import product
        # 使用itertools.product生成所有子列表的笛卡尔积
        combinations = list(product(*level_list))
        path_list = [''.join(combination) for combination in combinations]
        for path in path_list:
            try:
                os.makedirs(path, exist_ok=True)
            except:
                print(f'directory {path} make failed')
        return path_list