Source code for QuasiSequenceOrder

# -*- coding: utf-8 -*-
#  Copyright (c) 2016-2017, Zhijiang Yao, Jie Dong and Dongsheng Cao
#  All rights reserved.
#  This file is part of the PyBioMed.
#  The contents are covered by the terms of the BSD license
#  which is included in the file license.txt, found at the root
#  of the PyBioMed source tree.
"""
##############################################################################################
This module is used for computing the quasi sequence order descriptors based on the

given protein sequence. We can obtain two types of descriptors: Sequence-order-coupling

number and quasi-sequence-order descriptors. Two distance matrixes between 20 amino acids

are employed. You can freely use and distribute it. If you have any problem, please contact

us immediately.

References:

[1]:Kuo-Chen Chou. Prediction of Protein Subcellar Locations by Incorporating

Quasi-Sequence-Order Effect. Biochemical and Biophysical Research Communications

2000, 278, 477-483.

[2]: Kuo-Chen Chou and Yu-Dong Cai. Prediction of Protein sucellular locations by

GO-FunD-PseAA predictor, Biochemical and Biophysical Research Communications,

2004, 320, 1236-1239.

[3]:Gisbert Schneider and Paul wrede. The Rational Design of Amino Acid

Sequences by Artifical Neural Networks and Simulated Molecular Evolution: Do

Novo Design of an Idealized Leader Cleavge Site. Biophys Journal, 1994, 66,

335-344.

Authors: Zhijiang Yao and Dongsheng Cao.

Date: 2016.06.04

Email: gadsby@163.com

##############################################################################################
"""

# Core Library modules
import math
import string

AALetter = [
    "A",
    "R",
    "N",
    "D",
    "C",
    "E",
    "Q",
    "G",
    "H",
    "I",
    "L",
    "K",
    "M",
    "F",
    "P",
    "S",
    "T",
    "W",
    "Y",
    "V",
]
## Distance is the Schneider-Wrede physicochemical distance matrix used by Chou et. al.
_Distance1 = {
    "GW": 0.923,
    "GV": 0.464,
    "GT": 0.272,
    "GS": 0.158,
    "GR": 1.0,
    "GQ": 0.467,
    "GP": 0.323,
    "GY": 0.728,
    "GG": 0.0,
    "GF": 0.727,
    "GE": 0.807,
    "GD": 0.776,
    "GC": 0.312,
    "GA": 0.206,
    "GN": 0.381,
    "GM": 0.557,
    "GL": 0.591,
    "GK": 0.894,
    "GI": 0.592,
    "GH": 0.769,
    "ME": 0.879,
    "MD": 0.932,
    "MG": 0.569,
    "MF": 0.182,
    "MA": 0.383,
    "MC": 0.276,
    "MM": 0.0,
    "ML": 0.062,
    "MN": 0.447,
    "MI": 0.058,
    "MH": 0.648,
    "MK": 0.884,
    "MT": 0.358,
    "MW": 0.391,
    "MV": 0.12,
    "MQ": 0.372,
    "MP": 0.285,
    "MS": 0.417,
    "MR": 1.0,
    "MY": 0.255,
    "FP": 0.42,
    "FQ": 0.459,
    "FR": 1.0,
    "FS": 0.548,
    "FT": 0.499,
    "FV": 0.252,
    "FW": 0.207,
    "FY": 0.179,
    "FA": 0.508,
    "FC": 0.405,
    "FD": 0.977,
    "FE": 0.918,
    "FF": 0.0,
    "FG": 0.69,
    "FH": 0.663,
    "FI": 0.128,
    "FK": 0.903,
    "FL": 0.131,
    "FM": 0.169,
    "FN": 0.541,
    "SY": 0.615,
    "SS": 0.0,
    "SR": 1.0,
    "SQ": 0.358,
    "SP": 0.181,
    "SW": 0.827,
    "SV": 0.342,
    "ST": 0.174,
    "SK": 0.883,
    "SI": 0.478,
    "SH": 0.718,
    "SN": 0.289,
    "SM": 0.44,
    "SL": 0.474,
    "SC": 0.185,
    "SA": 0.1,
    "SG": 0.17,
    "SF": 0.622,
    "SE": 0.812,
    "SD": 0.801,
    "YI": 0.23,
    "YH": 0.678,
    "YK": 0.904,
    "YM": 0.268,
    "YL": 0.219,
    "YN": 0.512,
    "YA": 0.587,
    "YC": 0.478,
    "YE": 0.932,
    "YD": 1.0,
    "YG": 0.782,
    "YF": 0.202,
    "YY": 0.0,
    "YQ": 0.404,
    "YP": 0.444,
    "YS": 0.612,
    "YR": 0.995,
    "YT": 0.557,
    "YW": 0.244,
    "YV": 0.328,
    "LF": 0.139,
    "LG": 0.596,
    "LD": 0.944,
    "LE": 0.892,
    "LC": 0.296,
    "LA": 0.405,
    "LN": 0.452,
    "LL": 0.0,
    "LM": 0.062,
    "LK": 0.893,
    "LH": 0.653,
    "LI": 0.013,
    "LV": 0.133,
    "LW": 0.341,
    "LT": 0.397,
    "LR": 1.0,
    "LS": 0.443,
    "LP": 0.309,
    "LQ": 0.376,
    "LY": 0.205,
    "RT": 0.808,
    "RV": 0.914,
    "RW": 1.0,
    "RP": 0.796,
    "RQ": 0.668,
    "RR": 0.0,
    "RS": 0.86,
    "RY": 0.859,
    "RD": 0.305,
    "RE": 0.225,
    "RF": 0.977,
    "RG": 0.928,
    "RA": 0.919,
    "RC": 0.905,
    "RL": 0.92,
    "RM": 0.908,
    "RN": 0.69,
    "RH": 0.498,
    "RI": 0.929,
    "RK": 0.141,
    "VH": 0.649,
    "VI": 0.135,
    "EM": 0.83,
    "EL": 0.854,
    "EN": 0.599,
    "EI": 0.86,
    "EH": 0.406,
    "EK": 0.143,
    "EE": 0.0,
    "ED": 0.133,
    "EG": 0.779,
    "EF": 0.932,
    "EA": 0.79,
    "EC": 0.788,
    "VM": 0.12,
    "EY": 0.837,
    "VN": 0.38,
    "ET": 0.682,
    "EW": 1.0,
    "EV": 0.824,
    "EQ": 0.598,
    "EP": 0.688,
    "ES": 0.726,
    "ER": 0.234,
    "VP": 0.212,
    "VQ": 0.339,
    "VR": 1.0,
    "VT": 0.305,
    "VW": 0.472,
    "KC": 0.871,
    "KA": 0.889,
    "KG": 0.9,
    "KF": 0.957,
    "KE": 0.149,
    "KD": 0.279,
    "KK": 0.0,
    "KI": 0.899,
    "KH": 0.438,
    "KN": 0.667,
    "KM": 0.871,
    "KL": 0.892,
    "KS": 0.825,
    "KR": 0.154,
    "KQ": 0.639,
    "KP": 0.757,
    "KW": 1.0,
    "KV": 0.882,
    "KT": 0.759,
    "KY": 0.848,
    "DN": 0.56,
    "DL": 0.841,
    "DM": 0.819,
    "DK": 0.249,
    "DH": 0.435,
    "DI": 0.847,
    "DF": 0.924,
    "DG": 0.697,
    "DD": 0.0,
    "DE": 0.124,
    "DC": 0.742,
    "DA": 0.729,
    "DY": 0.836,
    "DV": 0.797,
    "DW": 1.0,
    "DT": 0.649,
    "DR": 0.295,
    "DS": 0.667,
    "DP": 0.657,
    "DQ": 0.584,
    "QQ": 0.0,
    "QP": 0.272,
    "QS": 0.461,
    "QR": 1.0,
    "QT": 0.389,
    "QW": 0.831,
    "QV": 0.464,
    "QY": 0.522,
    "QA": 0.512,
    "QC": 0.462,
    "QE": 0.861,
    "QD": 0.903,
    "QG": 0.648,
    "QF": 0.671,
    "QI": 0.532,
    "QH": 0.765,
    "QK": 0.881,
    "QM": 0.505,
    "QL": 0.518,
    "QN": 0.181,
    "WG": 0.829,
    "WF": 0.196,
    "WE": 0.931,
    "WD": 1.0,
    "WC": 0.56,
    "WA": 0.658,
    "WN": 0.631,
    "WM": 0.344,
    "WL": 0.304,
    "WK": 0.892,
    "WI": 0.305,
    "WH": 0.678,
    "WW": 0.0,
    "WV": 0.418,
    "WT": 0.638,
    "WS": 0.689,
    "WR": 0.968,
    "WQ": 0.538,
    "WP": 0.555,
    "WY": 0.204,
    "PR": 1.0,
    "PS": 0.196,
    "PP": 0.0,
    "PQ": 0.228,
    "PV": 0.244,
    "PW": 0.72,
    "PT": 0.161,
    "PY": 0.481,
    "PC": 0.179,
    "PA": 0.22,
    "PF": 0.515,
    "PG": 0.376,
    "PD": 0.852,
    "PE": 0.831,
    "PK": 0.875,
    "PH": 0.696,
    "PI": 0.363,
    "PN": 0.231,
    "PL": 0.357,
    "PM": 0.326,
    "CK": 0.887,
    "CI": 0.304,
    "CH": 0.66,
    "CN": 0.324,
    "CM": 0.277,
    "CL": 0.301,
    "CC": 0.0,
    "CA": 0.114,
    "CG": 0.32,
    "CF": 0.437,
    "CE": 0.838,
    "CD": 0.847,
    "CY": 0.457,
    "CS": 0.176,
    "CR": 1.0,
    "CQ": 0.341,
    "CP": 0.157,
    "CW": 0.639,
    "CV": 0.167,
    "CT": 0.233,
    "IY": 0.213,
    "VA": 0.275,
    "VC": 0.165,
    "VD": 0.9,
    "VE": 0.867,
    "VF": 0.269,
    "VG": 0.471,
    "IQ": 0.383,
    "IP": 0.311,
    "IS": 0.443,
    "IR": 1.0,
    "VL": 0.134,
    "IT": 0.396,
    "IW": 0.339,
    "IV": 0.133,
    "II": 0.0,
    "IH": 0.652,
    "IK": 0.892,
    "VS": 0.322,
    "IM": 0.057,
    "IL": 0.013,
    "VV": 0.0,
    "IN": 0.457,
    "IA": 0.403,
    "VY": 0.31,
    "IC": 0.296,
    "IE": 0.891,
    "ID": 0.942,
    "IG": 0.592,
    "IF": 0.134,
    "HY": 0.821,
    "HR": 0.697,
    "HS": 0.865,
    "HP": 0.777,
    "HQ": 0.716,
    "HV": 0.831,
    "HW": 0.981,
    "HT": 0.834,
    "HK": 0.566,
    "HH": 0.0,
    "HI": 0.848,
    "HN": 0.754,
    "HL": 0.842,
    "HM": 0.825,
    "HC": 0.836,
    "HA": 0.896,
    "HF": 0.907,
    "HG": 1.0,
    "HD": 0.629,
    "HE": 0.547,
    "NH": 0.78,
    "NI": 0.615,
    "NK": 0.891,
    "NL": 0.603,
    "NM": 0.588,
    "NN": 0.0,
    "NA": 0.424,
    "NC": 0.425,
    "ND": 0.838,
    "NE": 0.835,
    "NF": 0.766,
    "NG": 0.512,
    "NY": 0.641,
    "NP": 0.266,
    "NQ": 0.175,
    "NR": 1.0,
    "NS": 0.361,
    "NT": 0.368,
    "NV": 0.503,
    "NW": 0.945,
    "TY": 0.596,
    "TV": 0.345,
    "TW": 0.816,
    "TT": 0.0,
    "TR": 1.0,
    "TS": 0.185,
    "TP": 0.159,
    "TQ": 0.322,
    "TN": 0.315,
    "TL": 0.453,
    "TM": 0.403,
    "TK": 0.866,
    "TH": 0.737,
    "TI": 0.455,
    "TF": 0.604,
    "TG": 0.312,
    "TD": 0.83,
    "TE": 0.812,
    "TC": 0.261,
    "TA": 0.251,
    "AA": 0.0,
    "AC": 0.112,
    "AE": 0.827,
    "AD": 0.819,
    "AG": 0.208,
    "AF": 0.54,
    "AI": 0.407,
    "AH": 0.696,
    "AK": 0.891,
    "AM": 0.379,
    "AL": 0.406,
    "AN": 0.318,
    "AQ": 0.372,
    "AP": 0.191,
    "AS": 0.094,
    "AR": 1.0,
    "AT": 0.22,
    "AW": 0.739,
    "AV": 0.273,
    "AY": 0.552,
    "VK": 0.889,
}

## Distance is the Grantham chemical distance matrix used by Grantham et. al.
_Distance2 = {
    "GW": 0.923,
    "GV": 0.464,
    "GT": 0.272,
    "GS": 0.158,
    "GR": 1.0,
    "GQ": 0.467,
    "GP": 0.323,
    "GY": 0.728,
    "GG": 0.0,
    "GF": 0.727,
    "GE": 0.807,
    "GD": 0.776,
    "GC": 0.312,
    "GA": 0.206,
    "GN": 0.381,
    "GM": 0.557,
    "GL": 0.591,
    "GK": 0.894,
    "GI": 0.592,
    "GH": 0.769,
    "ME": 0.879,
    "MD": 0.932,
    "MG": 0.569,
    "MF": 0.182,
    "MA": 0.383,
    "MC": 0.276,
    "MM": 0.0,
    "ML": 0.062,
    "MN": 0.447,
    "MI": 0.058,
    "MH": 0.648,
    "MK": 0.884,
    "MT": 0.358,
    "MW": 0.391,
    "MV": 0.12,
    "MQ": 0.372,
    "MP": 0.285,
    "MS": 0.417,
    "MR": 1.0,
    "MY": 0.255,
    "FP": 0.42,
    "FQ": 0.459,
    "FR": 1.0,
    "FS": 0.548,
    "FT": 0.499,
    "FV": 0.252,
    "FW": 0.207,
    "FY": 0.179,
    "FA": 0.508,
    "FC": 0.405,
    "FD": 0.977,
    "FE": 0.918,
    "FF": 0.0,
    "FG": 0.69,
    "FH": 0.663,
    "FI": 0.128,
    "FK": 0.903,
    "FL": 0.131,
    "FM": 0.169,
    "FN": 0.541,
    "SY": 0.615,
    "SS": 0.0,
    "SR": 1.0,
    "SQ": 0.358,
    "SP": 0.181,
    "SW": 0.827,
    "SV": 0.342,
    "ST": 0.174,
    "SK": 0.883,
    "SI": 0.478,
    "SH": 0.718,
    "SN": 0.289,
    "SM": 0.44,
    "SL": 0.474,
    "SC": 0.185,
    "SA": 0.1,
    "SG": 0.17,
    "SF": 0.622,
    "SE": 0.812,
    "SD": 0.801,
    "YI": 0.23,
    "YH": 0.678,
    "YK": 0.904,
    "YM": 0.268,
    "YL": 0.219,
    "YN": 0.512,
    "YA": 0.587,
    "YC": 0.478,
    "YE": 0.932,
    "YD": 1.0,
    "YG": 0.782,
    "YF": 0.202,
    "YY": 0.0,
    "YQ": 0.404,
    "YP": 0.444,
    "YS": 0.612,
    "YR": 0.995,
    "YT": 0.557,
    "YW": 0.244,
    "YV": 0.328,
    "LF": 0.139,
    "LG": 0.596,
    "LD": 0.944,
    "LE": 0.892,
    "LC": 0.296,
    "LA": 0.405,
    "LN": 0.452,
    "LL": 0.0,
    "LM": 0.062,
    "LK": 0.893,
    "LH": 0.653,
    "LI": 0.013,
    "LV": 0.133,
    "LW": 0.341,
    "LT": 0.397,
    "LR": 1.0,
    "LS": 0.443,
    "LP": 0.309,
    "LQ": 0.376,
    "LY": 0.205,
    "RT": 0.808,
    "RV": 0.914,
    "RW": 1.0,
    "RP": 0.796,
    "RQ": 0.668,
    "RR": 0.0,
    "RS": 0.86,
    "RY": 0.859,
    "RD": 0.305,
    "RE": 0.225,
    "RF": 0.977,
    "RG": 0.928,
    "RA": 0.919,
    "RC": 0.905,
    "RL": 0.92,
    "RM": 0.908,
    "RN": 0.69,
    "RH": 0.498,
    "RI": 0.929,
    "RK": 0.141,
    "VH": 0.649,
    "VI": 0.135,
    "EM": 0.83,
    "EL": 0.854,
    "EN": 0.599,
    "EI": 0.86,
    "EH": 0.406,
    "EK": 0.143,
    "EE": 0.0,
    "ED": 0.133,
    "EG": 0.779,
    "EF": 0.932,
    "EA": 0.79,
    "EC": 0.788,
    "VM": 0.12,
    "EY": 0.837,
    "VN": 0.38,
    "ET": 0.682,
    "EW": 1.0,
    "EV": 0.824,
    "EQ": 0.598,
    "EP": 0.688,
    "ES": 0.726,
    "ER": 0.234,
    "VP": 0.212,
    "VQ": 0.339,
    "VR": 1.0,
    "VT": 0.305,
    "VW": 0.472,
    "KC": 0.871,
    "KA": 0.889,
    "KG": 0.9,
    "KF": 0.957,
    "KE": 0.149,
    "KD": 0.279,
    "KK": 0.0,
    "KI": 0.899,
    "KH": 0.438,
    "KN": 0.667,
    "KM": 0.871,
    "KL": 0.892,
    "KS": 0.825,
    "KR": 0.154,
    "KQ": 0.639,
    "KP": 0.757,
    "KW": 1.0,
    "KV": 0.882,
    "KT": 0.759,
    "KY": 0.848,
    "DN": 0.56,
    "DL": 0.841,
    "DM": 0.819,
    "DK": 0.249,
    "DH": 0.435,
    "DI": 0.847,
    "DF": 0.924,
    "DG": 0.697,
    "DD": 0.0,
    "DE": 0.124,
    "DC": 0.742,
    "DA": 0.729,
    "DY": 0.836,
    "DV": 0.797,
    "DW": 1.0,
    "DT": 0.649,
    "DR": 0.295,
    "DS": 0.667,
    "DP": 0.657,
    "DQ": 0.584,
    "QQ": 0.0,
    "QP": 0.272,
    "QS": 0.461,
    "QR": 1.0,
    "QT": 0.389,
    "QW": 0.831,
    "QV": 0.464,
    "QY": 0.522,
    "QA": 0.512,
    "QC": 0.462,
    "QE": 0.861,
    "QD": 0.903,
    "QG": 0.648,
    "QF": 0.671,
    "QI": 0.532,
    "QH": 0.765,
    "QK": 0.881,
    "QM": 0.505,
    "QL": 0.518,
    "QN": 0.181,
    "WG": 0.829,
    "WF": 0.196,
    "WE": 0.931,
    "WD": 1.0,
    "WC": 0.56,
    "WA": 0.658,
    "WN": 0.631,
    "WM": 0.344,
    "WL": 0.304,
    "WK": 0.892,
    "WI": 0.305,
    "WH": 0.678,
    "WW": 0.0,
    "WV": 0.418,
    "WT": 0.638,
    "WS": 0.689,
    "WR": 0.968,
    "WQ": 0.538,
    "WP": 0.555,
    "WY": 0.204,
    "PR": 1.0,
    "PS": 0.196,
    "PP": 0.0,
    "PQ": 0.228,
    "PV": 0.244,
    "PW": 0.72,
    "PT": 0.161,
    "PY": 0.481,
    "PC": 0.179,
    "PA": 0.22,
    "PF": 0.515,
    "PG": 0.376,
    "PD": 0.852,
    "PE": 0.831,
    "PK": 0.875,
    "PH": 0.696,
    "PI": 0.363,
    "PN": 0.231,
    "PL": 0.357,
    "PM": 0.326,
    "CK": 0.887,
    "CI": 0.304,
    "CH": 0.66,
    "CN": 0.324,
    "CM": 0.277,
    "CL": 0.301,
    "CC": 0.0,
    "CA": 0.114,
    "CG": 0.32,
    "CF": 0.437,
    "CE": 0.838,
    "CD": 0.847,
    "CY": 0.457,
    "CS": 0.176,
    "CR": 1.0,
    "CQ": 0.341,
    "CP": 0.157,
    "CW": 0.639,
    "CV": 0.167,
    "CT": 0.233,
    "IY": 0.213,
    "VA": 0.275,
    "VC": 0.165,
    "VD": 0.9,
    "VE": 0.867,
    "VF": 0.269,
    "VG": 0.471,
    "IQ": 0.383,
    "IP": 0.311,
    "IS": 0.443,
    "IR": 1.0,
    "VL": 0.134,
    "IT": 0.396,
    "IW": 0.339,
    "IV": 0.133,
    "II": 0.0,
    "IH": 0.652,
    "IK": 0.892,
    "VS": 0.322,
    "IM": 0.057,
    "IL": 0.013,
    "VV": 0.0,
    "IN": 0.457,
    "IA": 0.403,
    "VY": 0.31,
    "IC": 0.296,
    "IE": 0.891,
    "ID": 0.942,
    "IG": 0.592,
    "IF": 0.134,
    "HY": 0.821,
    "HR": 0.697,
    "HS": 0.865,
    "HP": 0.777,
    "HQ": 0.716,
    "HV": 0.831,
    "HW": 0.981,
    "HT": 0.834,
    "HK": 0.566,
    "HH": 0.0,
    "HI": 0.848,
    "HN": 0.754,
    "HL": 0.842,
    "HM": 0.825,
    "HC": 0.836,
    "HA": 0.896,
    "HF": 0.907,
    "HG": 1.0,
    "HD": 0.629,
    "HE": 0.547,
    "NH": 0.78,
    "NI": 0.615,
    "NK": 0.891,
    "NL": 0.603,
    "NM": 0.588,
    "NN": 0.0,
    "NA": 0.424,
    "NC": 0.425,
    "ND": 0.838,
    "NE": 0.835,
    "NF": 0.766,
    "NG": 0.512,
    "NY": 0.641,
    "NP": 0.266,
    "NQ": 0.175,
    "NR": 1.0,
    "NS": 0.361,
    "NT": 0.368,
    "NV": 0.503,
    "NW": 0.945,
    "TY": 0.596,
    "TV": 0.345,
    "TW": 0.816,
    "TT": 0.0,
    "TR": 1.0,
    "TS": 0.185,
    "TP": 0.159,
    "TQ": 0.322,
    "TN": 0.315,
    "TL": 0.453,
    "TM": 0.403,
    "TK": 0.866,
    "TH": 0.737,
    "TI": 0.455,
    "TF": 0.604,
    "TG": 0.312,
    "TD": 0.83,
    "TE": 0.812,
    "TC": 0.261,
    "TA": 0.251,
    "AA": 0.0,
    "AC": 0.112,
    "AE": 0.827,
    "AD": 0.819,
    "AG": 0.208,
    "AF": 0.54,
    "AI": 0.407,
    "AH": 0.696,
    "AK": 0.891,
    "AM": 0.379,
    "AL": 0.406,
    "AN": 0.318,
    "AQ": 0.372,
    "AP": 0.191,
    "AS": 0.094,
    "AR": 1.0,
    "AT": 0.22,
    "AW": 0.739,
    "AV": 0.273,
    "AY": 0.552,
    "VK": 0.889,
}


#############################################################################################
#############################################################################################
[docs]def GetSequenceOrderCouplingNumber(ProteinSequence, d=1, distancematrix=_Distance1): """ ############################################################################### Computing the dth-rank sequence order coupling number for a protein. Usage: result = GetSequenceOrderCouplingNumber(protein,d) Input: protein is a pure protein sequence. d is the gap between two amino acids. Output: result is numeric value. ############################################################################### """ NumProtein = len(ProteinSequence) tau = 0.0 for i in range(NumProtein - d): temp1 = ProteinSequence[i] temp2 = ProteinSequence[i + d] tau = tau + math.pow(distancematrix[temp1 + temp2], 2) return round(tau, 3)
#############################################################################################
[docs]def GetSequenceOrderCouplingNumberp(ProteinSequence, maxlag=30, distancematrix={}): """ ############################################################################### Computing the sequence order coupling numbers from 1 to maxlag for a given protein sequence based on the user-defined property. Usage: result = GetSequenceOrderCouplingNumberp(protein, maxlag,distancematrix) Input: protein is a pure protein sequence maxlag is the maximum lag and the length of the protein should be larger than maxlag. default is 30. distancematrix is the a dict form containing 400 distance values Output: result is a dict form containing all sequence order coupling numbers based on the given property ############################################################################### """ NumProtein = len(ProteinSequence) Tau = {} for i in range(maxlag): Tau["tau" + str(i + 1)] = GetSequenceOrderCouplingNumber( ProteinSequence, i + 1, distancematrix ) return Tau
#############################################################################################
[docs]def GetSequenceOrderCouplingNumberSW( ProteinSequence, maxlag=30, distancematrix=_Distance1 ): """ ############################################################################### Computing the sequence order coupling numbers from 1 to maxlag for a given protein sequence based on the Schneider-Wrede physicochemical distance matrix Usage: result = GetSequenceOrderCouplingNumberSW(protein, maxlag,distancematrix) Input: protein is a pure protein sequence maxlag is the maximum lag and the length of the protein should be larger than maxlag. default is 30. distancematrix is a dict form containing Schneider-Wrede physicochemical distance matrix. omitted! Output: result is a dict form containing all sequence order coupling numbers based on the Schneider-Wrede physicochemical distance matrix ############################################################################### """ NumProtein = len(ProteinSequence) Tau = {} for i in range(maxlag): Tau["tausw" + str(i + 1)] = GetSequenceOrderCouplingNumber( ProteinSequence, i + 1, distancematrix ) return Tau
#############################################################################################
[docs]def GetSequenceOrderCouplingNumberGrant( ProteinSequence, maxlag=30, distancematrix=_Distance2 ): """ ############################################################################### Computing the sequence order coupling numbers from 1 to maxlag for a given protein sequence based on the Grantham chemical distance matrix. Usage: result = GetSequenceOrderCouplingNumberGrant(protein, maxlag,distancematrix) Input: protein is a pure protein sequence maxlag is the maximum lag and the length of the protein should be larger than maxlag. default is 30. distancematrix is a dict form containing Grantham chemical distance matrix. omitted! Output: result is a dict form containing all sequence order coupling numbers based on the Grantham chemical distance matrix ############################################################################### """ NumProtein = len(ProteinSequence) Tau = {} for i in range(maxlag): Tau["taugrant" + str(i + 1)] = GetSequenceOrderCouplingNumber( ProteinSequence, i + 1, distancematrix ) return Tau
#############################################################################################
[docs]def GetSequenceOrderCouplingNumberTotal(ProteinSequence, maxlag=30): """ ############################################################################### Computing the sequence order coupling numbers from 1 to maxlag for a given protein sequence. Usage: result = GetSequenceOrderCouplingNumberTotal(protein, maxlag) Input: protein is a pure protein sequence maxlag is the maximum lag and the length of the protein should be larger than maxlag. default is 30. Output: result is a dict form containing all sequence order coupling numbers ############################################################################### """ Tau = {} Tau.update(GetSequenceOrderCouplingNumberSW(ProteinSequence, maxlag=maxlag)) Tau.update(GetSequenceOrderCouplingNumberGrant(ProteinSequence, maxlag=maxlag)) return Tau
#############################################################################################
[docs]def GetAAComposition(ProteinSequence): """ ############################################################################### Calculate the composition of Amino acids for a given protein sequence. Usage: result=CalculateAAComposition(protein) Input: protein is a pure protein sequence. Output: result is a dict form containing the composition of 20 amino acids. ############################################################################### """ LengthSequence = len(ProteinSequence) Result = {} for i in AALetter: Result[i] = round(float(ProteinSequence.count(i)) / LengthSequence, 3) return Result
#############################################################################################
[docs]def GetQuasiSequenceOrder1(ProteinSequence, maxlag=30, weight=0.1, distancematrix={}): """ ############################################################################### Computing the first 20 quasi-sequence-order descriptors for a given protein sequence. Usage: result = GetQuasiSequenceOrder1(protein,maxlag,weigt) see method GetQuasiSequenceOrder for the choice of parameters. ############################################################################### """ rightpart = 0.0 for i in range(maxlag): rightpart = rightpart + GetSequenceOrderCouplingNumber( ProteinSequence, i + 1, distancematrix ) AAC = GetAAComposition(ProteinSequence) result = {} temp = 1 + weight * rightpart for index, i in enumerate(AALetter): result["QSO" + str(index + 1)] = round(AAC[i] / temp, 6) return result
#############################################################################################
[docs]def GetQuasiSequenceOrder2(ProteinSequence, maxlag=30, weight=0.1, distancematrix={}): """ ############################################################################### Computing the last maxlag quasi-sequence-order descriptors for a given protein sequence. Usage: result = GetQuasiSequenceOrder2(protein,maxlag,weigt) see method GetQuasiSequenceOrder for the choice of parameters. ############################################################################### """ rightpart = [] for i in range(maxlag): rightpart.append( GetSequenceOrderCouplingNumber(ProteinSequence, i + 1, distancematrix) ) AAC = GetAAComposition(ProteinSequence) result = {} temp = 1 + weight * sum(rightpart) for index in range(20, 20 + maxlag): result["QSO" + str(index + 1)] = round(weight * rightpart[index - 20] / temp, 6) return result
#############################################################################################
[docs]def GetQuasiSequenceOrder1SW( ProteinSequence, maxlag=30, weight=0.1, distancematrix=_Distance1 ): """ ############################################################################### Computing the first 20 quasi-sequence-order descriptors for a given protein sequence. Usage: result = GetQuasiSequenceOrder1SW(protein,maxlag,weigt) see method GetQuasiSequenceOrder for the choice of parameters. ############################################################################### """ rightpart = 0.0 for i in range(maxlag): rightpart = rightpart + GetSequenceOrderCouplingNumber( ProteinSequence, i + 1, distancematrix ) AAC = GetAAComposition(ProteinSequence) result = {} temp = 1 + weight * rightpart for index, i in enumerate(AALetter): result["QSOSW" + str(index + 1)] = round(AAC[i] / temp, 6) return result
#############################################################################################
[docs]def GetQuasiSequenceOrder2SW( ProteinSequence, maxlag=30, weight=0.1, distancematrix=_Distance1 ): """ ############################################################################### Computing the last maxlag quasi-sequence-order descriptors for a given protein sequence. Usage: result = GetQuasiSequenceOrder2SW(protein,maxlag,weigt) see method GetQuasiSequenceOrder for the choice of parameters. ############################################################################### """ rightpart = [] for i in range(maxlag): rightpart.append( GetSequenceOrderCouplingNumber(ProteinSequence, i + 1, distancematrix) ) AAC = GetAAComposition(ProteinSequence) result = {} temp = 1 + weight * sum(rightpart) for index in range(20, 20 + maxlag): result["QSOSW" + str(index + 1)] = round( weight * rightpart[index - 20] / temp, 6 ) return result
#############################################################################################
[docs]def GetQuasiSequenceOrder1Grant( ProteinSequence, maxlag=30, weight=0.1, distancematrix=_Distance2 ): """ ############################################################################### Computing the first 20 quasi-sequence-order descriptors for a given protein sequence. Usage: result = GetQuasiSequenceOrder1Grant(protein,maxlag,weigt) see method GetQuasiSequenceOrder for the choice of parameters. ############################################################################### """ rightpart = 0.0 for i in range(maxlag): rightpart = rightpart + GetSequenceOrderCouplingNumber( ProteinSequence, i + 1, distancematrix ) AAC = GetAAComposition(ProteinSequence) result = {} temp = 1 + weight * rightpart for index, i in enumerate(AALetter): result["QSOgrant" + str(index + 1)] = round(AAC[i] / temp, 6) return result
#############################################################################################
[docs]def GetQuasiSequenceOrder2Grant( ProteinSequence, maxlag=30, weight=0.1, distancematrix=_Distance2 ): """ ############################################################################### Computing the last maxlag quasi-sequence-order descriptors for a given protein sequence. Usage: result = GetQuasiSequenceOrder2Grant(protein,maxlag,weigt) see method GetQuasiSequenceOrder for the choice of parameters. ############################################################################### """ rightpart = [] for i in range(maxlag): rightpart.append( GetSequenceOrderCouplingNumber(ProteinSequence, i + 1, distancematrix) ) AAC = GetAAComposition(ProteinSequence) result = {} temp = 1 + weight * sum(rightpart) for index in range(20, 20 + maxlag): result["QSOgrant" + str(index + 1)] = round( weight * rightpart[index - 20] / temp, 6 ) return result
#############################################################################################
[docs]def GetQuasiSequenceOrder(ProteinSequence, maxlag=30, weight=0.1): """ ############################################################################### Computing quasi-sequence-order descriptors for a given protein. [1]:Kuo-Chen Chou. Prediction of Protein Subcellar Locations by Incorporating Quasi-Sequence-Order Effect. Biochemical and Biophysical Research Communications 2000, 278, 477-483. Usage: result = GetQuasiSequenceOrder(protein,maxlag,weight) Input: protein is a pure protein sequence maxlag is the maximum lag and the length of the protein should be larger than maxlag. default is 30. weight is a weight factor. please see reference 1 for its choice. default is 0.1. Output: result is a dict form containing all quasi-sequence-order descriptors ############################################################################### """ result = dict() result.update(GetQuasiSequenceOrder1SW(ProteinSequence, maxlag, weight, _Distance1)) result.update(GetQuasiSequenceOrder2SW(ProteinSequence, maxlag, weight, _Distance1)) result.update( GetQuasiSequenceOrder1Grant(ProteinSequence, maxlag, weight, _Distance2) ) result.update( GetQuasiSequenceOrder2Grant(ProteinSequence, maxlag, weight, _Distance2) ) return result
#############################################################################################
[docs]def GetQuasiSequenceOrderp(ProteinSequence, maxlag=30, weight=0.1, distancematrix={}): """ ############################################################################### Computing quasi-sequence-order descriptors for a given protein. [1]:Kuo-Chen Chou. Prediction of Protein Subcellar Locations by Incorporating Quasi-Sequence-Order Effect. Biochemical and Biophysical Research Communications 2000, 278, 477-483. Usage: result = GetQuasiSequenceOrderp(protein,maxlag,weight,distancematrix) Input: protein is a pure protein sequence maxlag is the maximum lag and the length of the protein should be larger than maxlag. default is 30. weight is a weight factor. please see reference 1 for its choice. default is 0.1. distancematrix is a dict form containing 400 distance values Output: result is a dict form containing all quasi-sequence-order descriptors ############################################################################### """ result = dict() result.update( GetQuasiSequenceOrder1(ProteinSequence, maxlag, weight, distancematrix) ) result.update( GetQuasiSequenceOrder2(ProteinSequence, maxlag, weight, distancematrix) ) return result
############################################################################################# if __name__ == "__main__": protein = "ELRLRYCAPAGFALLKCNDADYDGFKTNCSNVSVVHCTNLMNTTVTTGLLLNGSYSENRT\ QIWQKHRTSNDSALILLNKHYNLTVTCKRPGNKTVLPVTIMAGLVFHSQKYNLRLRQAWC\ HFPSNWKGAWKEVKEEIVNLPKERYRGTNDPKRIFFQRQWGDPETANLWFNCHGEFFYCK\ MDWFLNYLNNLTVDADHNECKNTSGTKSGNKRAPGPCVQRTYVACHIRSVIIWLETISKK\ TYAPPREGHLECTSTVTGMTVELNYIPKNRTNVTLSPQIESIWAAELDRYKLVEITPIGF\ APTEVRRYTGGHERQKRVPFVVQSQHLLAGILQQQKNLLAAVEAQQQMLKLTIWGVK" print(len(protein)) SCN = GetSequenceOrderCouplingNumberTotal(protein, maxlag=30) print(len(SCN)) for i in SCN: print(i, SCN[i]) QSO1 = GetQuasiSequenceOrder1( protein, maxlag=30, weight=0.1, distancematrix=_Distance1 ) print(QSO1) for i in QSO1: print(i, QSO1[i]) QSO2 = GetQuasiSequenceOrder2( protein, maxlag=30, weight=0.1, distancematrix=_Distance1 ) print(QSO2) for i in QSO2: print(i, QSO2[i]) QSO = GetQuasiSequenceOrder(protein, maxlag=30, weight=0.1) print(len(QSO)) for i in QSO: print(i, QSO[i]) SCN = GetSequenceOrderCouplingNumberp(protein, maxlag=30, distancematrix=_Distance1) print(len(SCN)) for i in SCN: print(i, SCN[i]) QSO = GetQuasiSequenceOrderp(protein, maxlag=30, distancematrix=_Distance1) print(len(QSO)) for i in QSO: print(i, QSO[i])