RiboVision Visualize rRNA Structure

1.RiboVision Website

RiboVision 是一个可视化rRNA二级结构的网页工具。

2. icSHAPE data to color data

# 彩虹渐变
def shape2RiboColor_1(inFileName, outFileName, sequence):
    shape = loadicSHAPE(inFileName)
    if len(shape) > 1:
        print "Error: Multiple Sequence"
    trans_name = shape.keys()[0]
    shape = shape[trans_name]
    OUT = open(outFileName, "w")
    print >>OUT, "resNum,resName,DataCol,DataDescription"
    for idx in range(len(shape)):
        if shape[idx] == "NULL":
            print >>OUT,"%s:%s,%s,%s," % (trans_name, idx+1, sequence[idx], 0.000)
        else:
            print >>OUT,"%s:%s,%s,%s," % (trans_name, idx+1, sequence[idx], shape[idx])
    OUT.close()

# 只显示四种颜色
def shape2RiboColor_2(inFileName, outFileName, sequence):
    shape = loadicSHAPE(inFileName)
    if len(shape) > 1:
        print "Error: Multiple Sequence"
    trans_name = shape.keys()[0]
    shape = shape[trans_name]
    OUT = open(outFileName, "w")
    print >>OUT, "resNum,resName,ColorCol,DataDescription"
    for idx in range(len(shape)):
        if shape[idx] == "NULL":
            print >>OUT,"%s:%s,%s,%s," % (trans_name, idx+1, sequence[idx], "Gray")
        elif float(shape[idx]) < 0.3:
            print >>OUT,"%s:%s,%s,%s," % (trans_name, idx+1, sequence[idx], "Black")
        elif float(shape[idx]) < 0.7:
            print >>OUT,"%s:%s,%s,%s," % (trans_name, idx+1, sequence[idx], "Orange")
        else:
            print >>OUT,"%s:%s,%s,%s," % (trans_name, idx+1, sequence[idx], "Red")
    OUT.close()

def white_base(sequence, outFileName):
    OUT = open(outFileName, "w")
    print >>OUT, "resNum,DataCol,ColorCol,DataDescription"
    for idx in range(len(sequence)):
        print >>OUT,"18S:%s,%s,%s," % (idx+1, sequence[idx], "White")
    OUT.close()

调用

human_18S = readSeq("human_18S.fa")['18S'].replace('T', 'U')
shape2Ribo_color("cy_vivo.out", "cy_vivo.csv", human_18S)
shape2Ribo_color("cy_vitro.out", "cy_vitro.csv", human_18S)
shape2Ribo_color("wc_vivo.out", "wc_vivo.csv", human_18S)
shape2Ribo_color("wc_vitro.out", "wc_vitro.csv", human_18S)

white_base(human_18S, "White_Base.csv")

3. Correct interaction data

结构相关函数

class Structure:
    def __init__(self, length):
        self.len = int(length)
        self.bp = []
    def get_bp(self):
        return self.bp
    def show(self):
        for single_bp in self.bp:
            print str(single_bp[0]) + ' - ' + str(single_bp[1])
    def add_bp(self, left, right):
        assert(left < right <= self.length)
        self.bp.append( (left, right) )
    def finish(self):
        self.bp.sort(key=lambda x: x[0])
    def length(self):
        return self.len
    def get_bp_map(self):
        bp_map = {}
        for base_pair in self.bp:
            bp_map[ base_pair[0] ] = base_pair[1]
            bp_map[ base_pair[1] ] = base_pair[0]
        return bp_map

def read_ct_structure(ct_file, structure_number):
    IN = open(ct_file)
    structure_number -= 1
    # Skip Some Structure
    while structure_number > 0:
        line = IN.readline()
        data = line.strip().split()
        skip_num = int(data[0])
        for idx in range(skip_num):
            line = IN.readline()
        structure_number -= 1
    # Read the Structure
    line = IN.readline()
    data = line.strip().split()
    skip_num = int(data[0])
    structure = Structure(skip_num)
    for idx in range(skip_num):
        line = IN.readline()
        data = line.strip().split()
        if int(data[4]) != 0 and int(data[4]) < int(data[5]):
            structure.add_bp( int(data[4]), int(data[5]) )
    IN.close()
    structure.finish()
    return structure

文件转换

def RawRiboInter2Tab(inFile, outFile, sequence):
    # read base pairs
    BPs = {}
    IN = open(inFile)
    line = IN.readline()
    while line:
        left_s, left_base, right_s, right_base, bp_type = line.strip().split()
        left_s = int(left_s.split(':')[1])
        right_s = int(right_s.split(':')[1])
        BPs[left_s] = right_s
        BPs[right_s] = left_s
        line = IN.readline()
    IN.close()
    # write to ct
    OUT = open(outFile, "w")
    for idx in range(len(sequence)):
        if idx+1 in BPs and BPs[idx+1] > idx+1:
            print >>OUT, "%s\t%s\t%s" % (sequence[idx], idx+1, BPs[idx+1])
        else:
            print >>OUT, "%s\t%s\t%s" % (sequence[idx], idx+1, 0)
    OUT.close()

def tab2ct(inFile, outFile):
    # read base pairs
    ct = {}
    IN = open(inFile)
    line = IN.readline()
    sequence = ""
    while line:
        base, left, right = line.strip().split()
        sequence += base
        left = int(left)
        right = int(right)
        if right != 0:
            if left >= right:
                print "Error 1", line
                return
            if right in ct:
                print "Error 2", line
                return
            ct[left] = right
            ct[right] = left
        line = IN.readline()
    # write to ct
    OUT = open(outFile, "w")
    print >>OUT, "\t%s\tsequence" % (len(sequence), )
    for idx in range(len(sequence)):
        if idx+1 in ct:
            print >>OUT, "%s\t%s\t%s\t%s\t%s\t%s" % (idx+1, sequence[idx], idx, 0 if idx==len(sequence)+1 else idx+2, ct[idx+1], idx+1)
        else:
            print >>OUT, "%s\t%s\t%s\t%s\t%s\t%s" % (idx+1, sequence[idx], idx, 0 if idx==len(sequence)+1 else idx+2, 0, idx+1)
    OUT.close()

def ct2RiboInter(inFile, outFile):
    structure = read_ct_structure(inFile, 1)
    bp_map = structure.get_bp_map()
    length = structure.length()
    OUT = open(outFile, "w")
    print >>OUT, "Residue_i,Residue_j,Int_Type,ColorCol,Opacity,LineWidth,DataDescription"
    for idx in range(length):
        if idx+1 in bp_map:
            print >>OUT, "%s:%s,%s:%s,%s,%s,%s,%s," % ("18S", idx+1, "18S", bp_map[idx+1], "HELLO", "Gray", "1.00", "1.00")
    OUT.close()

调用

human_18S = readSeq("human_18S.fa")['18S'].replace('T', 'U')
RawRiboInter2Tab("human_18S_download.csv", 'human_18S.tab', human_18S)
# modify tab file....
tab2ct("human_18S.tab", "human_18S_2.ct")
# Check if the structure is correct...
ct2RiboInter("human_18S_2.ct", "human_18S_2.csv")

3. Other Functions

def simu_high_shape():
    import random
    random_shape = random.gauss(0.9, 0.1)
    random_shape = min(random_shape, 1)
    random_shape = max(random_shape, 0)
    return round(random_shape,3)

def simu_low_shape():
    import random
    random_shape = random.gauss(0.1, 0.1)
    random_shape = min(random_shape, 1)
    random_shape = max(random_shape, 0)
    return round(random_shape,3)

def dot_simulate_shape(dot, outFile):
    OUT = open(outFile, "w")
    for idx in range(len(dot)):
        if dot[idx] == '.':
            print >>OUT, "%s\t%s" % (idx+1, simu_high_shape())
        else:
            print >>OUT, "%s\t%s" % (idx+1, simu_low_shape())
    OUT.close()


def ct_siomutate_shape(ctFile, outFile):
    OUT = open(outFile, "w")
    structure = read_ct_structure(ctFile, 1)
    bp_map = structure.get_bp_map()
    length = structure.length()
    for idx in range(length):
        if idx+1 in bp_map:
            print >>OUT, "%s\t%s" % (idx+1, simu_low_shape())
        else:
            print >>OUT, "%s\t%s" % (idx+1, simu_high_shape())
    OUT.close()