Source code for ConjointTriad

# -*- coding: utf-8 -*-
#  Copyright (c) 2016-2017, Zhijiang Yao, Jie Dong and Dongsheng Cao
#  All rights reserved.
#  This file is part of the PyBioMed.
#  The contents are covered by the terms of the BSD license
#  which is included in the file license.txt, found at the root
#  of the PyBioMed source tree.
"""
###############################################################################
This module is used for calculating the conjoint triad features only from the

protein sequence information. You can get 7*7*7=343 features.You can freely

use and distribute it. If you hava any problem, you could contact with us timely!

Reference:

Juwen Shen, Jian Zhang, Xiaomin Luo, Weiliang Zhu, Kunqian Yu, Kaixian Chen,

Yixue Li, Huanliang Jiang. Predicting proten-protein interactions based only

on sequences inforamtion. PNAS. 2007 (104) 4337-4341.

Authors: Zhijiang Yao and Dongsheng Cao.

Date: 2016.06.04

Email: gadsby@163.com

###############################################################################
"""

# Core Library modules
import string

###############################################################################
AALetter = [
    "A",
    "R",
    "N",
    "D",
    "C",
    "E",
    "Q",
    "G",
    "H",
    "I",
    "L",
    "K",
    "M",
    "F",
    "P",
    "S",
    "T",
    "W",
    "Y",
    "V",
]

# a Dipole scale (Debye): -, Dipole<1.0; +, 1.0<Dipole<2.0; ++, 2.0<Dipole<3.0; +++, Dipole>3.0; +'+'+', Dipole>3.0 with opposite orientation.
# b Volume scale (Å3): -, Volume<50; +, Volume> 50.
# c Cys is separated from class 3 because of its ability to form disulfide bonds.

_repmat = {
    1: ["A", "G", "V"],
    2: ["I", "L", "F", "P"],
    3: ["Y", "M", "T", "S"],
    4: ["H", "N", "Q", "W"],
    5: ["R", "K"],
    6: ["D", "E"],
    7: ["C"],
}


###############################################################################


def _Str2Num(proteinsequence):
    """
    translate the amino acid letter into the corresponding class based on the

    given form.

    """
    repmat = {}
    for i in _repmat:
        for j in _repmat[i]:
            repmat[j] = i

    res = proteinsequence
    for i in repmat:
        res = res.replace(i, str(repmat[i]))
    return res


###############################################################################
[docs]def CalculateConjointTriad(proteinsequence): """ Calculate the conjoint triad features from protein sequence. Useage: res = CalculateConjointTriad(protein) Input: protein is a pure protein sequence. Output is a dict form containing all 343 conjoint triad features. """ res = {} proteinnum = _Str2Num(proteinsequence) for i in range(1, 8): for j in range(1, 8): for k in range(1, 8): temp = str(i) + str(j) + str(k) res[temp] = proteinnum.count(temp) return res
############################################################################### if __name__ == "__main__": protein = "ADGCGVGEGTGQGPMCNCMCMKWVYADEDAADLESDSFADEDASLESDSFPWSNQRVFCSFADEDAS" print(CalculateConjointTriad(protein))