command
代码思路
1.先明确想做的事,人该怎么做
2.写伪代码,小规模测试
3.写正常代码(可以尝试GPT)
跨服务器传输
rsync -rzP dddc@172.21.85.23:/home/databank_70t/zzy/project/trip13/vsREMD-500n/
python
argparse
需要有argparse包
- #!/usr/bin/env python3
- import argparse
- if __name__ == '__main__':
- parser = argparse.ArgumentParser(description='test....')
- parser.add_argument('-p', '--pdb', required=True, type=str,
- help='the pdb id of protein')
- parser.add_argument('-c', '--chain', default='A', type=str,
- help='the chain of protein, Default: %(default)s')
- parser.add_argument('--num', required=True, type=int,
- help='the index of atom')
- args = parser.parse_args()
- print(f"pdb id is {args.pdb}")
- print(f"the chain of protein is {args.chain}")
- print(f"the index of atom is {args.num}")
GPU
- ### 查看GPU使用情况
- nvidia-smi
- ### 指定某块GPU(默认0盘)
- export CUDA_VISIBLE_DEVICES=0
- export CUDA_VISIBLE_DEVICES=1
- export CUDA_VISIBLE_DEVICES=2
shell文本操作
shuf
The shuf command in Linux writes a random permutation of the input lines to standard output.
- shuf file.txt
grep
方括号要加转义符
awk
数据清洗
- # 获取某一列非空的数据
- awk -F ',' '{if($2!="")print $0}' 1.txt
- # 取两位小数(会把字符串转为0,故建议awk处理浮点数列,结合paste整合字符串列)
- # 下面这个例子是把第三列后的全部取两位小数
- awk '{for(i = 3; i <= NF; i++) {printf("%.2f\t", $i)} {printf("\n")}}' analysis.txt
- # OFS 指定输出的分隔符
- sort -t , -r -n -k 2 n2-mf.csv|awk -v OFS="," -F ',' '{print $1,$2}'|head >> n2-count.txt
- # 使用 awk 根据特定列去重 (gpt)
- awk '!seen[$2]++' data.txt
数据统计
- # 获取第3列数据中大于1的数据行
- awk '$3<-1 {print}' 1.txt
- # 用于MMGBSA 计算后统计结合能大于-1kcal/mol的残基
- awk -F ' ' '$28<-1 {print}' FINAL_DECOMP_MMPBSA.dat
- for i in {1,2,28};do awk -F ' ' '$28<-1 {print $'${i}'}' 400-500ns_c4_atps-site/FINAL_DECOMP_MMPBSA.dat;done
- # 功能类似于grep,但更灵活
- awk -F ';' '$10=="寒" {print $1,$10}' ~/zzy/origin/230914_tcm_label.csv >>hl.txt
- # 按列做乘法并輸出
- awk -F ',' '{print $1*10,$2*10,$3}' ref_pro_mol_rmsd.csv > ref_pro_mol_rmsd_A.csv
- awk '{print $1*0.2}' num|awk -F '.' '{print $1}' >0.2num
- # 输出每行数据的最大值
- awk '{for(i = 2; i <= NF; i++) {if($i < $(i - 1)) {$i = $(i - 1)}} {print $NF}}' test.txt
- # 变量计算
- top1_rate=$(awk 'BEGIN{print "'$top1'"/"'$sum'"}')
- sum_higher_rate=$(awk 'BEGIN{print "'${sum_higher_num}'"/"'${all_num}'"}')
- score_frag_sum=$(awk 'BEGIN{print "'${score_frag_i_1}'"+"'${score_frag_i_2}'"}')
- all_num=
`expr ${all_num} + 1`
- sum_higher_num=
`expr ${sum_higher_num} + 1`
paste
- paste -d ',' file temp > input_list.csv
sed
- ### Print odd lines (~2 means skip 2 lines)
- sed -n '1~2p'
- ###
- sed -i 's/oo/kk/g' testfile
- sed -i 's/Performance: //g'
- ###
- sed -i "2c $i" $i.mol2
text manipulation
- grep -A 1 min dock-1_c1.sdf |sed -n '/-/p'|sed -n '1~2p'
- ### get lines that exist in a but are missing in b.
- cat a b b | sort | uniq -u
- ### Convert plain text files from DOS or Mac format to Unix
- dos2unix *
- ### practical
- for a in {1..51};do b=
`sed -n "${a}p" SW`
; sed -i "1i ${a}.sdf" $b;done - for a in {1..10};do sed -i "{$a/$/$a/}" test.smi;done
- ### vi replace :
- :1,$s/\[/\\[/g
- :12,22s/abc/123/g
- # for 循环中用文本文件,空格会识别为分隔符,可以先在vi里用特殊符号替换空格,再用sed将$i 中的特殊符号替换成空格。由此,可以正确grep
- for i in
`cat up1k_target_hl.txt`
;do b=`echo $i|sed 's/_/ /g'`
;grep -w "$b" /home/yqyang/DDB-AI/script/Index_MPNN_CNN.txt >>id.txt;done