5.27 sys pre (g16)
由于 lig_parameter_cal.py 不能自动将任务排队,所以得我分批提交
配体高斯优化
由于脚本里只能识别sdf文件的电荷,原始数据集又没有sdf文件,所以得手动转换
- 75.3 /home/data/zyzhou/project/koff/pdbbind_dataset/mol2_obabel_sdf
- for i in
`cat index`
;do nohup obabel -imol2 ${i}/${i}.mol2 -osdf -O ${i}/${i}.sdf;done - # 有111个是转换有点问题的
- (md_traj_analysis) dddc@ubuntu:/home/data/zyzhou/project/koff/pdbbind_dataset/mol2_obabel_sdf$ grep Failed nohup.out |wc -l
- 111
- # grep Failed nohup.out|sed 's/ Failed to kekulize aromatic bonds in MOL2 file (title is //g'|sed 's/.mol2)//g'>failed_index
- # 将成功转换的569个体系提取出来
- cat failed_index index failed_index |sort|uniq -u >569_sucess_index
体系 1-100
75.3
- # /home/data/zyzhou/project/koff/pdbbind_dataset/g16_1-10
- cp ../mol2_obabel_sdf/569_sucess_index ./index_1-10
- # vi 里删除11行往后的部分,相当于手动挑选前10个体系
- for i in
`cat index_1-10`
;do cp -r ../mol2_obabel_sdf/${i} ./;done - source ~/.g16.sh
- conda activate AmberTools25
- nohup python lig_parameter_cal.py -i /home/data/zyzhou/project/koff/pdbbind_dataset/g16_1-10 -t sdf &
85.24
- # /home/databank/zzy/project/MD/koff/pdbbind_dataset/g16_11-40
- mv g16_1-10 g16_11-40/
- cp ../mol2_obabel_sdf/569_sucess_index ./index_11-40
- # vi 里删除41行往后的部分及1-10行,相当于手动挑选11-40,共30个体系,占120核
- for i in
`cat index_11-40`
;do cp -r ../mol2_obabel_sdf/${i} ./;done - nohup python lig_parameter_cal.py -i /home/databank/zzy/project/MD/koff/pdbbind_dataset/g16_11-40 -t sdf &
- # 有一个体系报错,原因是配体有原子键连方式不合理(N上5根键),报错信息已记录
- # 其余29个体系已提交任务
- mv nohup.out test1.out
85.23
- # /home/databank_70t/zzy/project/koff/pdbbind_dataset/g16_41-70
- conda activate ambertools
- mv g16_1-10/ g16_41-70/
- cp ../mol2_obabel_sdf/569_sucess_index ./index_41-70
- # vi 里删除71行往后的部分及1-40行,相当于手动挑选41-70,共30个体系,占120核
- for i in
`cat index_41-70`
;do cp -r ../mol2_obabel_sdf/${i} ./;done - nohup python lig_parameter_cal.py -i /home/databank_70t/zzy/project/koff/pdbbind_dataset/g16_41-70 -t sdf &
- # 有一个体系报错,原因是配体有原子键连方式不合理(N上5根键),报错信息已记录
- mv nohup.out test1.out
- # 脚本提交后由于一个体系报错,会推出,这之前的体系高斯任务是已经在运行的。第二次提交,遗憾地发现并不会识别已提交的文件夹,从而只提交未提交的文件夹任务,导致部分体系重复。
- # 将错就错,看看这个情况下会发生什么(另一个原因是高斯任务太多了,杀起来麻烦)
- # /home/databank_70t/zzy/project/koff/pdbbind_dataset/g16_71-100
- conda activate ambertools
- mkdir g16_71-100
- cd g16_71-100
- cp ../mol2_obabel_sdf/569_sucess_index ./index_71-100
- # vi 里删除101行往后的部分及1-70行,相当于手动挑选71-100,共30个体系,占120核
- for i in
`cat index_71-100`
;do cp -r ../mol2_obabel_sdf/${i} ./;done - nohup python lig_parameter_cal.py -i /home/databank_70t/zzy/project/koff/pdbbind_dataset/g16_71-100 -t sdf &
- # 有4个体系报错,原因是配体有原子键连方式不合理(N上5根键),报错信息已记录
- # 4次测试的报错记录见 test.out
- # 26个体系已成功提交
体系 101-200
75.3
- # /home/data/zyzhou/project/koff/pdbbind_dataset/g16_101-130
- source ~/.g16.sh
- conda activate AmberTools25
- mkdir g16_101-130
- cd g16_101-130
- cp ../mol2_obabel_sdf/569_sucess_index ./index_101-130
- # vi 里删除131行往后的部分及1-100行,相当于手动挑选101-130,共30个体系,占120核
- for i in
`cat index_101-130`
;do cp -r ../mol2_obabel_sdf/${i} ./;done - nohup python lig_parameter_cal.py -i /home/data/zyzhou/project/koff/pdbbind_dataset/g16_101-130 -t sdf &
- # 30个体系已成功提交
85.23
- # /home/databank_70t/zzy/project/koff/pdbbind_dataset/g16_131-160
- conda activate ambertools
- mkdir g16_131-160
- cd g16_131-160
- cp ../mol2_obabel_sdf/569_sucess_index ./index_131-160
- # vi 里删除161行往后的部分及1-130行,相当于手动挑选131-160,共30个体系,占120核
- for i in
`cat index_131-160`
;do cp -r ../mol2_obabel_sdf/${i} ./;done - nohup python lig_parameter_cal.py -i /home/databank_70t/zzy/project/koff/pdbbind_dataset/g16_131-160 -t sdf &
- # 30个体系已成功提交
- # /home/databank_70t/zzy/project/koff/pdbbind_dataset/g16_161-180
- conda activate ambertools
- mkdir g16_161-180
- cd g16_161-180
- cp ../mol2_obabel_sdf/569_sucess_index ./index_161-180
- # vi 里删除181行往后的部分及1-160行,相当于手动挑选161-180,共20个体系,占80核
- for i in
`cat index_161-180`
;do cp -r ../mol2_obabel_sdf/${i} ./;done - nohup python lig_parameter_cal.py -i /home/databank_70t/zzy/project/koff/pdbbind_dataset/g16_161-180 -t sdf &
- # 20个体系已成功提交
- # /home/databank_70t/zzy/project/koff/pdbbind_dataset/g16_181-200
- conda activate ambertools
- mkdir g16_181-200
- cd g16_181-200
- cp ../mol2_obabel_sdf/569_sucess_index ./index_181-200
- # vi 里删除201行往后的部分及1-180行,相当于手动挑选181-200,共20个体系,占80核
- for i in
`cat index_181-200`
;do cp -r ../mol2_obabel_sdf/${i} ./;done - nohup python lig_parameter_cal.py -i /home/databank_70t/zzy/project/koff/pdbbind_dataset/g16_181-200 -t sdf &
- # 20个体系已成功提交
体系 201-300
85.24
- # /home/databank/zzy/project/MD/koff/pdbbind_dataset/g16_201-220
- conda activate ambertools
- mkdir g16_201-220
- cd g16_201-220
- cp ../mol2_obabel_sdf/569_sucess_index ./index_201-220
- # vi 里删除221行往后的部分及1-200行,相当于手动挑选181-200,共20个体系,占80核
- for i in
`cat index_201-220`
;do cp -r ../mol2_obabel_sdf/${i} ./;done - # 把 lig_parameter_cal.py 高斯任务那部分删去,检查 antechamber报错后删除报错体系,再提交任务
- python check.py -i /home/databank/zzy/project/MD/koff/pdbbind_dataset/g16_201-220 -t sdf
- # 每次的报错信息输入 test.out
- cat test.out |grep antechamber
- # 找到所有报错体系,删去
- # 运行
- nohup python lig_parameter_cal.py -i /home/databank/zzy/project/MD/koff/pdbbind_dataset/g16_201-220 -t sdf &
- # 16/20 个体系已成功提交
85.23
- # /home/databank_70t/zzy/project/koff/pdbbind_dataset/g16_221-250
- mkdir g16_221-250
- cd g16_221-250
- cp ../mol2_obabel_sdf/569_sucess_index ./index_221-250
- # vi 里删除250行往后的部分及1-220行,相当于手动挑选221-250,共30个体系,占120核
- for i in
`cat index_221-250`
;do cp -r ../mol2_obabel_sdf/${i} ./;done - python check.py -i /home/databank_70t/zzy/project/koff/pdbbind_dataset/g16_221-250 -t sdf
- nohup python lig_parameter_cal.py -i /home/databank_70t/zzy/project/koff/pdbbind_dataset/g16_221-250 -t sdf &
- # 30/30 个体系已成功提交
- # /home/databank_70t/zzy/project/koff/pdbbind_dataset/g16_251-280
- mkdir g16_251-280
- cd g16_251-280
- cp ../g16_221-250/*py ./
- cp ../mol2_obabel_sdf/569_sucess_index ./index_251-280
- # vi 里删除280行往后的部分及1-250行,相当于手动挑选251-280,共30个体系,占120核
- for i in
`cat index_251-280`
;do cp -r ../mol2_obabel_sdf/${i} ./;done - python check.py -i /home/databank_70t/zzy/project/koff/pdbbind_dataset/g16_251-280 -t sdf
- nohup python lig_parameter_cal.py -i /home/databank_70t/zzy/project/koff/pdbbind_dataset/g16_251-280 -t sdf &
- # 30/30 个体系已成功提交
- # /home/databank_70t/zzy/project/koff/pdbbind_dataset/g16_281-300
- mkdir g16_281-300
- cd g16_281-300
- cp ../g16_221-250/*py ./
- cp ../mol2_obabel_sdf/569_sucess_index ./index_281-300
- # vi 里删除300行往后的部分及1-280行,相当于手动挑选281-300,共30个体系,占120核
- for i in
`cat index_281-300`
;do cp -r ../mol2_obabel_sdf/${i} ./;done - python check.py -i /home/databank_70t/zzy/project/koff/pdbbind_dataset/g16_281-300 -t sdf
- nohup python lig_parameter_cal.py -i /home/databank_70t/zzy/project/koff/pdbbind_dataset/g16_281-300 -t sdf &
- # 29/30 个体系已成功提交
- # 4x5y_ligand_native_659 报错,在该工作路径中已删除
体系 301-360
85.24
- # /home/databank/zzy/project/MD/koff/pdbbind_dataset/g16_301-330
- conda activate ambertools
- mkdir g16_301-330
- cd g16_301-330
- scp dddc@172.21.85.23:/home/databank_70t/zzy/project/koff/pdbbind_dataset/g16_281-300/*py ./
- cp ../mol2_obabel_sdf/569_sucess_index ./index_301-330
- # vi 里删除331行往后的部分及1-300行,相当于手动挑选301-330,共30个体系,占120核
- for i in
`cat index_301-330`
;do cp -r ../mol2_obabel_sdf/${i} ./;done - python check.py -i /home/databank/zzy/project/MD/koff/pdbbind_dataset/g16_301-330 -t sdf
- # 运行
- nohup python lig_parameter_cal.py -i /home/databank/zzy/project/MD/koff/pdbbind_dataset/g16_301-330 -t sdf &
- # 28/30 个体系已成功提交
- # 5ah2_ligand_native_344 与 5fkj_ligand_native_289 报错,在该工作路径中已删除
85.23
- # /home/databank_70t/zzy/project/koff/pdbbind_dataset/g16_331-360
- mkdir g16_331-360
- cd g16_331-360
- cp ../g16_221-250/*py ./
- cp ../mol2_obabel_sdf/569_sucess_index ./index_331-360
- # vi 里删除360行往后的部分及1-330行,相当于手动挑选331-360,共30个体系,占120核
- for i in
`cat index_331-360`
;do cp -r ../mol2_obabel_sdf/${i} ./;done - python check.py -i /home/databank_70t/zzy/project/koff/pdbbind_dataset/g16_331-360 -t sdf
- nohup python lig_parameter_cal.py -i /home/databank_70t/zzy/project/koff/pdbbind_dataset/g16_331-360 -t sdf &
- # 30/30 个体系已成功提交
MD 自动化测试
tleap
因为要测试pre_equ.py 的 批量提交及自动退出,先用75.3 上高斯优化完成的前10个体系 测试了一下MD-auto,在tleap这里疯狂报错,由此发现了PDBBind koff dataset 蛋白结构文件的一些问题
虚无的氢原子
例如 1ebw 这个体系 41位 ARG 有一个让人摸不着头脑的HB1,pymol里检查该原子完全不对
与乐云师姐讨论后,师姐建议 grep -v 把氢原子全部删除,因为很多这些pdb里氢原子命名不符合力场规范
质子化状态
发现原始pdb文件里面都有 HIS,这样它的质子化状态是不确定的,还是得pdb2pqr 处理一下。
其他问题
链开头的 ALA 第一个N 原子无法识别?tleap认为该氨基酸错误
蛋白结构清洗
- # 对于能够跑通配体高斯优化的体系,将其重命名为 systemX 的形式
- for i in {1..10};do name=
`sed -n "${i}p" index_1-10`
;mv ${name} system${i};done - # 原始pdb文件去除氢原子,pdb2pqr 在pH=7预测质子化状态
- for i in {1..10};do mv system${i}/*pdb system${i}/with_H.txt;awk '$12!="H"{print}' system${i}/with_H.txt >system${i}/system${i}_raw_noH.pdb;pdb2pqr30 system${i}/system${i}_raw_noH.pdb system${i}/system${i}.pqr --ff AMBER --ffout AMBER --with-ph 7 --pdb-output system${i}/system${i}.pdb;rm system${i}/system${i}_raw_noH.pdb;rm system${i}/system${i}.log;done