This tutorial shows how to perform stable isotope labeling (SIP) proteomics search using Sipros on 13C-labeled E. coli DDA mass spectrometry data. This workflow works on WSL Ubuntu 20.04 in Windows 11 and CentOS 7.
cd raw # Download raw file with 1% 13C wget ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2023/04/PXD041414/Pan_062822_X1iso5.raw # Download raw file with 50% 13C wget ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2023/04/PXD041414/Pan_052322_X13.raw
Download Sipros program
1 2 3 4
cd bin wget https://github.com/xyz1396/SiprosToolKits/releases/download/4.0/siprosRelease.zip unzip siprosRelease.zip chmod +x bin/*
“FASTA_Database = “ may be changed to your fasta file path in .cfg file
Convert Raw files
1 2 3
conda activate mono # -j is the threads that you want to limit mono bin/bin/Raxport.exe -i raw -o ft -j 8
Make fasta database with reverse decoy
1 2 3 4 5 6 7 8 9 10 11
cd fasta # download E. coli protein fasta sequence wget https://ftp.uniprot.org/pub/databases/uniprot/knowledgebase/reference_proteomes/Bacteria/UP000000625/UP000000625_83333.fasta.gz gunzip UP000000625_83333.fasta.gz
# OMP_NUM_THREADS is the threads that you want to limit export OMP_NUM_THREADS=10 # search the scans against the fasta database, this command will take a long time bin/bin/SiprosEnsembleOMP -f ft/Pan_062822_X1iso5.FT2 -c bin/configTemplates/SiprosEnsembleConfig.cfg -o regular
conda activate py2 # convert .Spe2Pep.txt file to .tab file python bin/EnsembleScripts/sipros_psm_tabulating.py \ -i regular -c bin/configTemplates/SiprosEnsembleConfig.cfg \ -o regular # filter PSMs, output qualified PSMs to .psm.txt file python bin/EnsembleScripts/sipros_ensemble_filtering.py \ -i regular \ -c bin/configTemplates/SiprosEnsembleConfig.cfg \ -o regular # assembly protein groups from peptide, output proteins to .pro.txt python bin/EnsembleScripts/sipros_peptides_assembling.py \ -c bin/configTemplates/SiprosEnsembleConfig.cfg \ -w regular
conda activate r # control FDR, output qualified protein groups to .proRefineFDR.txt Rscript bin/V4Scripts/refineProteinFDR.R -pro regular/*.pro.txt -psm regular/*.psm.txt -fdr 0.005 -o regular/coli # get spectra count of each protein groups, output spectra count to .SPcount.txt Rscript bin/V4Scripts/getSpectraCountInEachFT.R -pro regular/*.proRefineFDR.txt -psm regular/*.psm.txt -o regular/coli
# generate configs bin/bin/configGenerator -i bin/configTemplates/SiprosV4Config.cfg -o configs -e C
conda activate r
# make db of identified proteins by SiprosEnsemble # if protein database is small, this step can be ignored # orginal protein database is also OK Rscript bin/V4Scripts/makeDBforLabelSearch.R \ -pro regular/Pan_062822_X1iso5.SE.pro.txt \ -faa fasta/UP000000625_83333.fasta \ -o fasta/db.faa
# search the scans against the fasta database, this command will take a long time # OMP_NUM_THREADS is the threads that you want to limit export OMP_NUM_THREADS=10 configs=(configs/*.cfg) echo "${configs[@]}" | xargs -n 1 -P 8 \ bash -c 'bin/bin/SiprosV4OMP -f ft/Pan_052322_X13.FT2 -c $0 -o sip'
# get SIP abundance of each protein in each FT2 file Rscript bin/V4Scripts/getLabelPCTinEachFT.R \ -pro sip/coli.proRefineFDR.txt \ -psm sip/*.psm.txt \ -thr 5 \ -o sip/coli
The isotopic abundance of PSMs and peptides is in the “SearchName” column in *.psm.txt. The percentage in “SearchName” is multiplied by 1000
The isotopic abundance of Proteins is in the “AverageEnrichmentLevel” column in *.pro.cluster.txt and in *.LabelPCTcount.txt. The percentage in “AverageEnrichmentLevel” is multiplied by 1000