1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 conda create --name metawrap --channel ursky metawrap-mg=1.3.2 cd ~/miniconda3/envs/metawrapln -s lib/perl5/site_perl/5.22.0/ perl5which config-metawrapcp ~/config-metawrap ~/miniconda3/envs/metawrap/bin/config-metawrapconda install tbb=2020.2 conda activate metawrap
Insall blast DB Aspera download link
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 ftp open ftp.ncbi.nlm.nih.gov anonymous cd /blast/db/passive mls nt.*.tar.gz download.list.txt cd /blast/db/v4/mls nt_v4.*.tar.gz downloadV4.list.txt bye nohup cat NCBI.nt.download.list.txt | \ xargs -n 1 -P 1 \ bash -c '~/.aspera/connect/bin/ascp -v -k 1 -T -l ` `1000m -i ~/asperaweb_id_dsa.openssh ` anonftp@ftp.ncbi.nlm.nih.gov:/blast/db/$0 ./' \ >> downloadlog.txt 2>&1 & nohup cat downloadV4.list.txt | \ xargs -n 1 -P 1 \ bash -c '~/.aspera/connect/bin/ascp -v -k 1 -T -l ` `1000m -i ~/asperaweb_id_dsa.openssh ` anonftp@ftp.ncbi.nlm.nih.gov:/blast/db/v4/$0 ./' \ >> downloadlog.txt 2>&1 & for a in nt*.tar.gz; do tar xzf $a ; done vim ~/miniconda3/envs/metawrap/bin/config-metawrap ~/.aspera/connect/bin/ascp -v -k 1 -T -l 1000m \ -i ~/asperaweb_id_dsa.openssh \ anonftp@ftp.ncbi.nlm.nih.gov:/pub/taxonomy/taxdump.tar.gz ./ tar -xvf taxdump.tar.gz
Install checKM DB 1 2 3 4 5 6 7 8 9 mkdir CHECKM_DBcd CHECKM_DBwget \ https://data.ace.uq.edu.au/public/CheckM_databases/checkm_data_2015_01_16.tar.gz tar -xvf *.tar.gz rm *.gzcd ..checkm data setRoot CHECKM_DB
Install Kraken DB 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 mkdir /home/xiongyi/database/KRAKEN_DBcd /home/xiongyi/database/KRAKEN_DBkraken2-build --download-library bacteria --db MY_KRAKEN2_DB kraken2-build --download-library archaea --db MY_KRAKEN2_DB kraken2-build --download-library fungi --db MY_KRAKEN2_DB kraken2-build --download-library viral --db MY_KRAKEN2_DB source ~/miniconda3/etc/profile.d/conda.shconda activate metawrap cd /home/xiongyi/database/KRAKEN_DBkraken2-build --build --db MY_KRAKEN2_DB --threads 24 vim kraken2-build.sh qsub kraken2-build.sh
Install salmon megahit 1 2 conda create -c bioconda -n salmon salmon conda create -c bioconda -n soil megahit
Unzip 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 cd cai2mkdir cleanFastaq KRAKENreads \ INITIAL_BINNING BIN_REFINEMENT \ BIN_REASSEMBLY BLOBOLOGY QUANT_BINS \ BIN_CLASSIFICATION FUNCT_ANNOT QUANT_CONTIG cd /home/xiongyi/cai2/cleanFastaqgunzip -c /home/xiongyi/caiyuanfeng/data2/E3_clean_R1.fq.gz > E3_1.fastq gunzip -c /home/xiongyi/caiyuanfeng/data2/E3_clean_R2.fq.gz > E3_2.fastq gunzip -c /home/xiongyi/caiyuanfeng/data2/E10_clean_R1.fq.gz > E10_1.fastq gunzip -c /home/xiongyi/caiyuanfeng/data2/E10_clean_R2.fq.gz > E10_2.fastq gunzip -c /home/xiongyi/caiyuanfeng/data2/E13_clean_R1.fq.gz > E13_1.fastq gunzip -c /home/xiongyi/caiyuanfeng/data2/E13_clean_R2.fq.gz > E13_2.fastq gunzip -c /home/xiongyi/caiyuanfeng/data2/E17_clean_R1.fq.gz > E17_1.fastq gunzip -c /home/xiongyi/caiyuanfeng/data2/E17_clean_R2.fq.gz > E17_2.fastq vim unzip-fastaq.sh qsub unzip-fastaq.sh
convert SRR file from NCBI SRA 1 2 3 4 5 6 #!/bin/bash source ~/miniconda3/etc/profile.d/conda.shconda activate megahit ls SRR*/SRR*.sra | \ xargs -n 1 -P 8 bash -c \ 'fastq-dump --split-3 --gzip -O . $0'
Fastq quality control 1 2 3 4 5 6 7 8 9 10 11 12 #!/bin/bash source ~/miniconda3/etc/profile.d/conda.shconda activate megahit files=(SRR*/SRR*.sra) files=($(echo "${files[@]} " | tr ' ' '\n' | cut -d '/' -f1 | tr '\n' ' ' )) echo ${files[@]} | \ xargs -n 1 -P 2 bash -c \ 'fastp -i $0_1.fastq.gz -I $0_2.fastq.gz -o $0_clean_1.fq.gz -O $0_clean_2.fq.gz --thread 15' wait echo ${files[@]} | \ xargs -n 1 -P 2 bash -c \ 'fastp -i $0.fastq.gz -o $0_clean.fq.gz --thread 15'
Assemble merge fastq firstly 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 #!/bin/sh -login cd /home/xiongyi/cai2/cat cleanFastaq/*_1.fastq > cleanFastaq/All1.fastqcat cleanFastaq/*_2.fastq > cleanFastaq/All2.fastqvim merge.sh source ~/miniconda3/etc/profile.d/conda.shconda activate soil cd /home/xiongyi/cai2/megahit -1 cleanFastaq/All1.fastq \ -2 cleanFastaq/All2.fastq \ -o ASSEMBLY \ --min-count 2 \ -m 123 \ --k-list 27,37,47,57,67,77,87 \ -t 24 cd /home/xiongyi/cai2/vim megahit.sh
ensemble directly 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 #!/bin/bash source ~/miniconda3/etc/profile.d/conda.shconda activate megahit function join_by { local d=${1-} f=${2-} if shift 2; then printf %s "$f " "${@/#/$d} " fi } R1=(*_1.fq.gz) R1=$(join_by , "${R1[@]} " ) R2=(*_2.fq.gz) R2=$(join_by , "${R2[@]} " ) megahit -1 $R1 \ -2 $R2 \ -o ../megahitOut \ --presets meta-large \ -t 44
Quantify contig 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 #!/bin/sh -login source ~/miniconda3/etc/profile.d/conda.shconda activate salmon cd /home/xiongyi/sunhongyang/MbPL202011862/salmon index -t ASSEMBLY/final.contigs.fa \ -i QUANT_CONTIG/contig_index \ --threads 24 files=(cleanFastaq/*_*.fastq) files=($(echo "${files[@]} " | tr ' ' '\n' | cut -d '_' -f1 | tr '\n' ' ' )) files=($(echo "${files[@]} " | tr ' ' '\n' | sort -u | tr '\n' ' ' )) for file in ${files[@]} ; do salmon quant -i QUANT_CONTIG/contig_index \ -l A \ -1 ${file} _1.fastq \ -2 ${file} _2.fastq \ --validateMappings \ -o QUANT_CONTIG/${file#*/} \ --threads 24 done vim QUANT_CONTIG.sh qsub QUANT_CONTIG.sh
Bining no maxbin because maxbin is too slow
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 #!/bin/sh -login source ~/miniconda3/etc/profile.d/conda.shconda activate metawrap cd /home/xiongyi/cai2/metawrap binning -o INITIAL_BINNING -t 24 \ -a ASSEMBLY/final.contigs.fa \ --metabat2 \ --concoct cleanFastaq/*_*.fastq \ --run-checkm vim binning.sh qsub binning.sh
Species annotation of contigs 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 #!/bin/sh -login source ~/miniconda3/etc/profile.d/conda.shconda activate metawrap cd /home/xiongyi/cai2/metawrap kraken2 -o KRAKENreads \ -t 24 \ cleanFastaq/*_*.fastq ASSEMBLY/final.contigs.fa vim kraken-reads-run.sh qsub kraken-reads-run.sh
Refine bin 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 #!/bin/sh -login source ~/miniconda3/etc/profile.d/conda.shconda activate metawrap cd /home/xiongyi/cai2/metawrap bin_refinement -o BIN_REFINEMENT \ -t 24 \ -A INITIAL_BINNING/metabat2_bins \ -B INITIAL_BINNING/concoct_bins \ -c 50 -x 10 vim bin_refinement.sh qsub bin_refinement.sh
Visualize bins 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 #!/bin/sh -login source ~/miniconda3/etc/profile.d/conda.shconda activate metawrap cd /home/xiongyi/cai2/metawrap blobology -a ASSEMBLY/final.contigs.fa \ -t 24 \ -o BLOBOLOGY \ --bins BIN_REFINEMENT/metawrap_50_10_bins \ cleanFastaq/*_*.fastq vim blobology.sh qsub blobology.sh
Species annotation of bins 1 2 3 4 5 6 7 8 9 10 11 12 13 14 #!/bin/sh -login source ~/miniconda3/etc/profile.d/conda.shconda activate metawrap cd /home/xiongyi/cai2/metawrap classify_bins -b BIN_REFINEMENT/metawrap_50_10_bins \ -o BIN_CLASSIFICATION \ -t 24 vim classfication_bins.sh
Species annotation of proteins install eggnog
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 conda create -n egg -c bioconda eggnog-mapper source ~/miniconda3/etc/profile.d/conda.shconda activate egg wget https://software-ab.cs.uni-tuebingen.de/download/megan-annotree/annotree.fasta.gz diamond makedb --in annotree.fasta.gz --db /scratch/yixiong/diamond/annotree diamond blastp --db /scratch/yixiong/diamond/annotree.dmnd \ --query fasta/db.faa \ --tmpdir /dev/shm \ --out diamondAnnoTree \ --outfmt 100 \ --threads 30 ~/megan/tools/daa-meganizer \ -i CO2diamondAnnoTree.daa \ -mdb ../megan-mapping-annotree-June-2021.db ~/megan/tools/daa2info \ -i CO2diamondAnnoTree.daa \ -o CO2MEGAN-taxonomy-annoTree.txt \ -mro true \ --paths true \ -r true \ -r2c GTDB
Function annotation of bins 1 2 3 4 5 6 7 8 9 10 11 12 #!/bin/sh -login source ~/miniconda3/etc/profile.d/conda.shconda activate metawrap cd /home/xiongyi/cai2/metaWRAP annotate_bins -o FUNCT_ANNOT -t 24 -b BIN_REFINEMENT/metawrap_50_10_bins vim annotate_bins.sh
Function annotation of contigs gene prediction by fraggenescan 1 2 3 4 5 6 7 8 9 10 11 12 13 14 conda create -n fraggenescan -c bioconda -c conda-forge fraggenescan=1.31 source ~/miniconda3/etc/profile.d/conda.shconda activate fraggenescan run_FragGeneScan.pl \ -genome=../k87/final.contigs.fa \ -out=frag \ -complete=0 \ -train=complete \ -thread=40 vim frag.sh nohup ./frag.sh > frag.log.txt 2>&1 &
gene prediction by prodigal 1 2 3 4 5 6 7 8 9 conda create -n prodigal -c bioconda -c conda-forge prodigal=2.6.3 source ~/miniconda3/etc/profile.d/conda.shconda activate prodigal prodigal -i ../k87/final.contigs.fa -o prodigal.genes -a prodigal.faa -p meta vim prodigal.sh nohup ./prodigal.sh > prodigal.log.txt 2>&1 &
remove proteins with same seq 1 2 conda activate bio seqkit rmdup frag.faa -s -o fragNodup.fasta -D fragDup.txt
gene function annotation 1 2 3 4 5 6 7 8 source ~/miniconda3/etc/profile.d/conda.shconda activate egg export EGGNOG_DATA_DIR=db/eggemapper.py \ -i db.faa -o anno \ --sensmode more-sensitive \ --tax_scope 2157,2,4751 \ --cpu 0 --dbmem