Sipros stable isotope probe 13C labeled metaproteomic tutorial

This tutorial shows how to perform stable isotope labeling (SIP) proteomics search using Sipros on 13C-labeled E. coli DDA mass spectrometry data. This workflow works on WSL Ubuntu 20.04 in Windows 11 and CentOS 7.

Install environment

1
2
3
conda create -n py2 scikit-learn python=2.7
conda create -n mono -c conda-forge mono
conda create -n r -c conda-forge -c bioconda r-base r-stringr r-tidyr bioconductor-biostrings

Make folder for the workflow

1
mkdir fasta raw ft regular sip configs bin

Download raw file

1
2
3
4
5
cd raw
# Download raw file with 1% 13C
wget ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2023/04/PXD041414/Pan_062822_X1iso5.raw
# Download raw file with 50% 13C
wget ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2023/04/PXD041414/Pan_052322_X13.raw

Download Sipros program

1
2
3
4
cd bin
wget https://github.com/xyz1396/SiprosToolKits/releases/download/4.0/siprosRelease.zip
unzip siprosRelease.zip
chmod +x bin/*

“FASTA_Database = “ may be changed to your fasta file path in .cfg file

Convert Raw files

1
2
3
conda activate mono
# -j is the threads that you want to limit
mono bin/bin/Raxport.exe -i raw -o ft -j 8

Make fasta database with reverse decoy

1
2
3
4
5
6
7
8
9
10
11
cd fasta
# download E. coli protein fasta sequence
wget https://ftp.uniprot.org/pub/databases/uniprot/knowledgebase/reference_proteomes/Bacteria/UP000000625/UP000000625_83333.fasta.gz
gunzip UP000000625_83333.fasta.gz

conda activate py2
# make reverse
python ../bin/EnsembleScripts/sipros_prepare_protein_database.py \
-i UP000000625_83333.fasta \
-o Decoy.fasta \
-c ../bin/configTemplates/SiprosEnsembleConfig.cfg
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# OMP_NUM_THREADS is the threads that you want to limit
export OMP_NUM_THREADS=10
# search the scans against the fasta database, this command will take a long time
bin/bin/SiprosEnsembleOMP -f ft/Pan_062822_X1iso5.FT2 -c bin/configTemplates/SiprosEnsembleConfig.cfg -o regular

conda activate py2
# convert .Spe2Pep.txt file to .tab file
python bin/EnsembleScripts/sipros_psm_tabulating.py \
-i regular -c bin/configTemplates/SiprosEnsembleConfig.cfg \
-o regular
# filter PSMs, output qualified PSMs to .psm.txt file
python bin/EnsembleScripts/sipros_ensemble_filtering.py \
-i regular \
-c bin/configTemplates/SiprosEnsembleConfig.cfg \
-o regular
# assembly protein groups from peptide, output proteins to .pro.txt
python bin/EnsembleScripts/sipros_peptides_assembling.py \
-c bin/configTemplates/SiprosEnsembleConfig.cfg \
-w regular

conda activate r
# control FDR, output qualified protein groups to .proRefineFDR.txt
Rscript bin/V4Scripts/refineProteinFDR.R -pro regular/*.pro.txt -psm regular/*.psm.txt -fdr 0.005 -o regular/coli
# get spectra count of each protein groups, output spectra count to .SPcount.txt
Rscript bin/V4Scripts/getSpectraCountInEachFT.R -pro regular/*.proRefineFDR.txt -psm regular/*.psm.txt -o regular/coli
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# generate configs
bin/bin/configGenerator -i bin/configTemplates/SiprosV4Config.cfg -o configs -e C

conda activate r

# make db of identified proteins by SiprosEnsemble
# if protein database is small, this step can be ignored
# orginal protein database is also OK
Rscript bin/V4Scripts/makeDBforLabelSearch.R \
-pro regular/Pan_062822_X1iso5.SE.pro.txt \
-faa fasta/UP000000625_83333.fasta \
-o fasta/db.faa

# search the scans against the fasta database, this command will take a long time
# OMP_NUM_THREADS is the threads that you want to limit
export OMP_NUM_THREADS=10
configs=(configs/*.cfg)
echo "${configs[@]}" | xargs -n 1 -P 8 \
bash -c 'bin/bin/SiprosV4OMP -f ft/Pan_052322_X13.FT2 -c $0 -o sip'

conda activate py2

# filter PSMs
python bin/V4Scripts/sipros_peptides_filtering.py \
-c bin/configTemplates/SiprosV4Config.cfg \
-w sip

# filter proteins
python bin/V4Scripts/sipros_peptides_assembling.py \
-c bin/configTemplates/SiprosV4Config.cfg \
-w sip

# cluster SIP abundance of protein
python bin/V4Scripts/ClusterSip.py \
-c bin/configTemplates/SiprosV4Config.cfg \
-w sip

conda activate r

# refine protein FDR
Rscript bin/V4Scripts/refineProteinFDR.R \
-pro sip/*.pro.txt \
-psm sip/*.psm.txt \
-fdr 0.01 \
-o sip/coli

# get SIP abundance of each protein in each FT2 file
Rscript bin/V4Scripts/getLabelPCTinEachFT.R \
-pro sip/coli.proRefineFDR.txt \
-psm sip/*.psm.txt \
-thr 5 \
-o sip/coli

The isotopic abundance of PSMs and peptides is in the “SearchName” column in *.psm.txt. The percentage in “SearchName” is multiplied by 1000

The isotopic abundance of Proteins is in the “AverageEnrichmentLevel” column in *.pro.cluster.txt and in *.LabelPCTcount.txt. The percentage in “AverageEnrichmentLevel” is multiplied by 1000