From 2c2f50823f08c9c6cf9219e6a50dfdb1449d8769 Mon Sep 17 00:00:00 2001 From: Petar Petrov Date: Fri, 9 Oct 2020 09:40:49 +0100 Subject: academic/vCAPS_coevolution: Added (Coevolution Analysis) Signed-off-by: Dave Woodfall Signed-off-by: Willy Sudiarto Raharjo --- academic/vCAPS_coevolution/README | 29 ++++++ academic/vCAPS_coevolution/References | 14 +++ academic/vCAPS_coevolution/caps_verbose.patch | 114 ++++++++++++++++++++ academic/vCAPS_coevolution/slack-desc | 19 ++++ .../vCAPS_coevolution/vCAPS_coevolution.SlackBuild | 116 +++++++++++++++++++++ academic/vCAPS_coevolution/vCAPS_coevolution.info | 10 ++ 6 files changed, 302 insertions(+) create mode 100644 academic/vCAPS_coevolution/README create mode 100644 academic/vCAPS_coevolution/References create mode 100644 academic/vCAPS_coevolution/caps_verbose.patch create mode 100644 academic/vCAPS_coevolution/slack-desc create mode 100644 academic/vCAPS_coevolution/vCAPS_coevolution.SlackBuild create mode 100644 academic/vCAPS_coevolution/vCAPS_coevolution.info diff --git a/academic/vCAPS_coevolution/README b/academic/vCAPS_coevolution/README new file mode 100644 index 0000000000..51350cd21e --- /dev/null +++ b/academic/vCAPS_coevolution/README @@ -0,0 +1,29 @@ +vCAPS: (verbose) Coevolution Analysis using Protein Sequences + +CAPS is aimed at measuring the coevolution between amino acid sites +belonging to the same protein (intra-molecular coevolution) or to two +functionally or physically interacting proteins (inter-molecular +coevolution). The Software implements an improved method to detect +intra-molecular coevolution as published in Genetics (Fares and Travers, +2006) and also inter-protein coevolution analysis. The improved scoring +of amino acid sites is obtained by maximum likelihood ancestral state +reconstruction along with simulations to assess significance. + +In addition, a test which assesses whether two proteins are interacting +is implemented. + +NOTE: +This applies an _unofficial_ patch by Petrov et al 2020, to enable more +verbose output of the program, including CAPS generated phylogenetic +trees and p-values of the pairwise results. The produced executable is +called "vCAPS" and can be installed along "caps" from CAPS_coevolution. +Building CAPS from source requires the Bio++ 1.9 suite. + +CITING: +CAPS: coevolution analysis using protein sequences. Fares MA, McNally D. +Bioinformatics. 2006 Nov 15;22(22):2821-2. PMID: 17005535 + +The mathematical model has been described separately: +A novel method for detecting intramolecular coevolution: adding a +further dimension to selective constraints analyses. Fares MA, Travers +SA. Genetics. 2006 May;173(1):9-23. PMID: 16547113 diff --git a/academic/vCAPS_coevolution/References b/academic/vCAPS_coevolution/References new file mode 100644 index 0000000000..e8236cd282 --- /dev/null +++ b/academic/vCAPS_coevolution/References @@ -0,0 +1,14 @@ +If you use CAPS in your research, please include the following citations: + +CAPS: coevolution analysis using protein sequences. +Fares MA, McNally D. +Bioinformatics. 2006 Nov 15;22(22):2821-2. +PMID: 17005535 +https://www.ncbi.nlm.nih.gov/pubmed/17005535 + +The mathematical model has been described separately: +A novel method for detecting intramolecular coevolution: adding a further dimension to selective constraints analyses. +Fares MA, Travers SA. +Genetics. 2006 May;173(1):9-23. +PMID: 16547113 +https://www.ncbi.nlm.nih.gov/pubmed/16547113 diff --git a/academic/vCAPS_coevolution/caps_verbose.patch b/academic/vCAPS_coevolution/caps_verbose.patch new file mode 100644 index 0000000000..7f64d80f34 --- /dev/null +++ b/academic/vCAPS_coevolution/caps_verbose.patch @@ -0,0 +1,114 @@ +diff -pruN orig/caps.cpp new/caps.cpp +--- orig/caps.cpp 2012-12-15 17:13:23.000000000 +0200 ++++ new/caps.cpp 2020-09-09 23:07:46.080566000 +0300 +@@ -14,7 +14,7 @@ + #include + #include + #include +- ++#include + + + +@@ -69,6 +69,8 @@ + const gsl_rng_type * T; + gsl_rng *r; + ++vector totaltempnew; ++double alphathresh = 0; + int main(int argc, char *argv[]){ + + +@@ -543,16 +545,27 @@ int main(int argc, char *argv[]){ + + + print_splash(output); ++ OUTPUT << "\n\File1: " << files[i] << endl; + vec1.print_to_fasta(output.c_str()); ++ OUTPUT << "\n\nFile2: " << files[j] << endl; + vec2.print_to_fasta(output.c_str()); + int length1 = vec1.sequences[0].length(); + int length2 = vec2.sequences[0].length(); + ++ OUTPUT << "\n\nLength1: " << length1 << endl; ++ OUTPUT << "Length2: " << length2 << endl; + + + if(tree_in ==0){ + tree1 = create_input_tree(vec1.names, vec1.sequences); + tree2 = create_input_tree(vec2.names, vec2.sequences); ++ ++ // Output the CAPS generated trees to the .out file of each pair ++ string temptre1 = TreeTemplateTools::treeToParenthesis(*tree1, true); ++ string temptre2 = TreeTemplateTools::treeToParenthesis(*tree2, true); ++ OUTPUT << "\n" << endl; ++ OUTPUT << "CAPS generated tree 1: " << temptre1 << endl; ++ OUTPUT << "CAPS generated tree 2: " << temptre2 << endl; + }/*else if(tree_in ==1 && variable==1){ + + std::auto_ptr DS; +@@ -666,6 +679,7 @@ int main(int argc, char *argv[]){ + int value = floor(((totaltemp.size())*(1-(threshval))))+1; + + threshold = totaltemp[value]; ++ totaltempnew = totaltemp; + + + /*=======================================================*/ +@@ -870,6 +884,30 @@ int Chi_squared (int num_pairs, int num_ + + } /* ----- end of function Chi_squared ----- */ + ++/* ++ * === FUNCTION ====================================================================== ++ * Name: find_alpha ++ * Description: Find the index of an element in a vector totaltemp ++ * Help from: https://www.geeksforgeeks.org/how-to-find-index-of-a-given-element-in-a-vector-in-cpp/ ++ * https://stackoverflow.com/questions/8647635/elegant-way-to-find-closest-value-in-a-vector-from-above ++ * Author: Petar Petrov, University of Turku (Finland); pebope@utu.fi ++ * ===================================================================================== ++ */ ++double getIndex(std::vector const& v, double K) ++{ ++ auto const it = std::lower_bound(v.begin(), v.end(), fabs(K)); ++ //auto it = std::upper_bound(v.begin(), v.end(), fabs(K)); ++ ++ if (it != v.end()) { ++ int index = distance(v.begin(), it); ++ alphathresh = (((int)1+(double)v.size()-(int)index)/(double)v.size()); ++ return alphathresh; ++ //cerr << index << "\t" << alphathresh << endl; ++ } ++ else { ++ cerr << "ELEMENT NOT FOUND!" << endl; ++ } ++} + + + +@@ -890,9 +928,9 @@ int print_inter(vector& Correl1, + output << endl << endl; + + output << "Coevolving Pairs of amino acid sites\n"; +- output << "=============================================================================\n"; +- output << "Col1(real)\tCol2(real)\tDmean1\t\tDmean2\t\tCorrelation\tBootstrap value\n\n"; +- output << "=============================================================================\n"; ++ output << "================================================================================================================================\n"; ++ output << "Col1(real)\tCol2(real)\tDmean1\t\tDmean2\t\tCorrelation\tBootstrap value\tP-value1\tP-value2\tMean P-value\tCorrelation1\tCorrelation2\n\n"; ++ output << "================================================================================================================================\n"; + + //double mean = average_vec(Correl); + //double SD = SD_vf(Correl, mean); +@@ -951,9 +989,11 @@ int print_inter(vector& Correl1, + + // } + ++ double Alpha1 = getIndex(totaltempnew, Correl1[cor]); ++ double Alpha2 = getIndex(totaltempnew, Correl2[cor]); + //if(bootval>=bootcut && re1<=8 && re2<=8 ){ + if(bootval>=bootcut){ +- output << i+1 << "(" << i-gaps1+1 << ")\t\t" << j+1 << "(" << (j+1)-gaps2 << ")\t\t" << averDi << "\t\t" << averDj << "\t\t" << (Correl1[cor]+Correl2[cor])/2 << "\t" << bootval << endl; ++ output << i+1 << "(" << i-gaps1+1 << ")\t\t" << j+1 << "(" << (j+1)-gaps2 << ")\t\t" << averDi << "\t\t" << averDj << "\t" << (Correl1[cor]+Correl2[cor])/2 << "\t" << bootval << "\t" << Alpha1 << "\t" << Alpha2 << "\t" << (Alpha1+Alpha2)/2 << "\t" << Correl1[cor] << "\t" << Correl2[cor] << endl; + signif.push_back(((Correl1[cor]+Correl2[cor])/2)); + ++pairs; + vector tem; diff --git a/academic/vCAPS_coevolution/slack-desc b/academic/vCAPS_coevolution/slack-desc new file mode 100644 index 0000000000..5cd78f0770 --- /dev/null +++ b/academic/vCAPS_coevolution/slack-desc @@ -0,0 +1,19 @@ +# HOW TO EDIT THIS FILE: +# The "handy ruler" below makes it easier to edit a package description. +# Line up the first '|' above the ':' following the base package name, and +# the '|' on the right side marks the last column you can put a character in. +# You must make exactly 11 lines for the formatting to be correct. It's also +# customary to leave one space after the ':' except on otherwise blank lines. + + |-----handy-ruler------------------------------------------------------| +vCAPS_coevolution: vCAPS_coevolution (Coevolution Analysis using Protein Sequences) +vCAPS_coevolution: +vCAPS_coevolution: CAPS is aimed at measuring the coevolution between amino acid +vCAPS_coevolution: sites belonging to the same protein (intra-molecular coevolution) +vCAPS_coevolution: or to two functionally or physically interacting proteins (inter- +vCAPS_coevolution: molecular coevolution). In addition, a test which assesses +vCAPS_coevolution: whether two proteins are interacting is implemented. +vCAPS_coevolution: +vCAPS_coevolution: Home: http://bioinf.gen.tcd.ie/~faresm/software/software.html +vCAPS_coevolution: +vCAPS_coevolution: diff --git a/academic/vCAPS_coevolution/vCAPS_coevolution.SlackBuild b/academic/vCAPS_coevolution/vCAPS_coevolution.SlackBuild new file mode 100644 index 0000000000..bdd3b3e794 --- /dev/null +++ b/academic/vCAPS_coevolution/vCAPS_coevolution.SlackBuild @@ -0,0 +1,116 @@ +#!/bin/sh + +# Slackware build script for vCAPS_coevolution + +# Copyright 2020 Petar Petrov slackalaxy@gmail.com +# All rights reserved. +# +# Redistribution and use of this script, with or without modification, is +# permitted provided that the following conditions are met: +# +# 1. Redistributions of this script must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR IMPLIED +# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +# EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +PRGNAM=vCAPS_coevolution +VERSION=${VERSION:-2.0_1UN} +BUILD=${BUILD:-1} +TAG=${TAG:-_SBo} + +SRCNAM=caps +SRCVER=2.0 +BINNAM=vCAPS + +if [ -z "$ARCH" ]; then + case "$( uname -m )" in + i?86) ARCH=i586 ;; + arm*) ARCH=arm ;; + *) ARCH=$( uname -m ) ;; + esac +fi + +CWD=$(pwd) +TMP=${TMP:-/tmp/SBo} +PKG=$TMP/package-$PRGNAM +OUTPUT=${OUTPUT:-/tmp} + +if [ "$ARCH" = "i586" ]; then + SLKCFLAGS="-O2 -march=i586 -mtune=i686" + LIBDIRSUFFIX="" +elif [ "$ARCH" = "i686" ]; then + SLKCFLAGS="-O2 -march=i686 -mtune=i686" + LIBDIRSUFFIX="" +elif [ "$ARCH" = "x86_64" ]; then + SLKCFLAGS="-O2 -fPIC" + LIBDIRSUFFIX="64" +else + SLKCFLAGS="-O2" + LIBDIRSUFFIX="" +fi + +# This is needed for vCAPS to find its libraries at runtime. Thanks Urachlay! +LIBDIRPATH="-Wl,-rpath,/usr/lib${LIBDIRSUFFIX}/Bpp1.9" + +set -e + +rm -rf $PKG +mkdir -p $TMP $PKG $OUTPUT +cd $TMP +rm -rf ${SRCNAM}${SRCVER}_src +unzip $CWD/${SRCNAM}2_src.zip +cd ${SRCNAM}${SRCVER}_src +chown -R root:root . +find -L . \ + \( -perm 777 -o -perm 775 -o -perm 750 -o -perm 711 -o -perm 555 \ + -o -perm 511 \) -exec chmod 755 {} \; -o \ + \( -perm 666 -o -perm 664 -o -perm 640 -o -perm 600 -o -perm 444 \ + -o -perm 440 -o -perm 400 \) -exec chmod 644 {} \; + +# This is needed for gcc in Slackware 14.2 +sed -i "s:CC=g++ -g:CC=g++ -g -std=c++11:" Makefile + +# Use our CFLAGS and the custom (legacy) lib path +sed -i "s:CFLAGS=:CFLAGS=$SLKCFLAGS $LIBDIRPATH:" Makefile + +# Find the legacy bpp libraries +sed -i "s:-lbpp-phyl:-L/usr/lib${LIBDIRSUFFIX}/Bpp1.9 -lbpp-phyl:g" Makefile +sed -i "s:-lbpp-numcalc:-L/usr/lib64${LIBDIRSUFFIX}/Bpp1.9 -lbpp-numcalc:g" Makefile +sed -i "s:-lbpp-utils:-L/usr/lib64${LIBDIRSUFFIX}/Bpp1.9 -lbpp-utils:g" Makefile +sed -i "s:-lbpp-seq:-L/usr/lib64${LIBDIRSUFFIX}/Bpp1.9 -lbpp-seq:g" Makefile + +# Rename the produced executable +sed -i "s:-o caps:-o $BINNAM:" Makefile + +# Use our unofficial patch +patch -p1 -i $CWD/caps_verbose.patch + +make all + +# Install the binary produced from our patched source, as "vCAPS" +install -D -m755 $BINNAM $PKG/usr/bin/$BINNAM + +mkdir -p $PKG/usr/share/$PRGNAM +cp -a sample structures trees TLR1.fa.out $PKG/usr/share/$PRGNAM + +mkdir -p $PKG/usr/doc/$PRGNAM-$VERSION +cp -a \ + caps_manual.pdf \ + $PKG/usr/doc/$PRGNAM-$VERSION +cat $CWD/$PRGNAM.SlackBuild > $PKG/usr/doc/$PRGNAM-$VERSION/$PRGNAM.SlackBuild +cat $CWD/References > $PKG/usr/doc/$PRGNAM-$VERSION/References + +mkdir -p $PKG/install +cat $CWD/slack-desc > $PKG/install/slack-desc + +cd $PKG +/sbin/makepkg -l y -c n $OUTPUT/$PRGNAM-$VERSION-$ARCH-$BUILD$TAG.${PKGTYPE:-tgz} diff --git a/academic/vCAPS_coevolution/vCAPS_coevolution.info b/academic/vCAPS_coevolution/vCAPS_coevolution.info new file mode 100644 index 0000000000..84ebd91d4c --- /dev/null +++ b/academic/vCAPS_coevolution/vCAPS_coevolution.info @@ -0,0 +1,10 @@ +PRGNAM="vCAPS_coevolution" +VERSION="2.0_1UN" +HOMEPAGE="http://bioinf.gen.tcd.ie/~faresm/software/software.html" +DOWNLOAD="http://bioinf.gen.tcd.ie/~faresm/software/files/caps2_src.zip" +MD5SUM="0914007c32ed22a9cb8a47b55cd18a39" +DOWNLOAD_x86_64="" +MD5SUM_x86_64="" +REQUIRES="bpp1.9-phyl" +MAINTAINER="Petar Petrov" +EMAIL="slackalaxy@gmail.com" -- cgit v1.2.3