|
| 1 | +#!/usr/bin/python |
| 2 | +# -*- coding: utf-8 -*- |
| 3 | +""" |
| 4 | +Created on Mon Oct 10 10:22:56 2016 |
| 5 | +
|
| 6 | +@author: philipp |
| 7 | +""" |
| 8 | +# Library sanity check |
| 9 | +# ======================================================================= |
| 10 | +# Imports |
| 11 | +import yaml |
| 12 | +import sys |
| 13 | +import os |
| 14 | +import pandas |
| 15 | + |
| 16 | +def RunSanityCheck(): |
| 17 | + # ------------------------------------------------ |
| 18 | + # Get parameters |
| 19 | + # ------------------------------------------------ |
| 20 | + configFile = open('configuration.yaml','r') |
| 21 | + config = yaml.load(configFile) |
| 22 | + configFile.close() |
| 23 | + LibDir = config['LibDir'] |
| 24 | + LibFilename = config['LibFilename'] |
| 25 | + LibFormat = LibFilename[-3:] |
| 26 | + if LibFormat == 'tsv': |
| 27 | + libsep = '\t' |
| 28 | + elif LibFormat == 'csv': |
| 29 | + libsep = ',' |
| 30 | + DataDir = config['DataDir'] |
| 31 | + WorkingDir = config['WorkingDir'] |
| 32 | + |
| 33 | + # -------------------------------------------------------------------- |
| 34 | + # Replace non-printable characters from library (...these cause problems in PlotCount.py) |
| 35 | + # -------------------------------------------------------------------- |
| 36 | + os.chdir(LibDir) |
| 37 | + LibCols = ['gene','ID','seq'] |
| 38 | + LibFile = pandas.read_table(LibFilename, sep = libsep, skiprows = 1, names = LibCols) |
| 39 | + GeneNames = list(LibFile['gene']) |
| 40 | + ID = list(LibFile['ID']) |
| 41 | + seq = list(LibFile['seq']) |
| 42 | + GeneNames0 = [] |
| 43 | + ID0 = [] |
| 44 | + BadCharacters = [' ','>','<',';',':',',','|','/','\\','(',')','[',']',\ |
| 45 | + '$','%','*','?','{','}','=','+','@'] |
| 46 | + for gene in GeneNames: |
| 47 | + for bad_char in BadCharacters: |
| 48 | + gene = gene.replace(bad_char,'_') |
| 49 | + GeneNames0.append(gene) |
| 50 | + for sgRNA in ID: |
| 51 | + for bad_char in BadCharacters: |
| 52 | + sgRNA = sgRNA.replace(bad_char,'_') |
| 53 | + ID0.append(sgRNA) |
| 54 | + if GeneNames != GeneNames0 or ID != ID0: |
| 55 | + LibFile0 = pandas.DataFrame(data = {'gene': [gene for gene in GeneNames0], |
| 56 | + 'ID': [sgRNA for sgRNA in ID0], |
| 57 | + 'seq': [s for s in seq]}, |
| 58 | + columns = ['gene','ID','seq']) |
| 59 | + LibFile0.to_csv(LibFilename, sep = libsep, index = False) |
| 60 | + print("WARNING: Special characters in library file have been replaced by '_' ") |
| 61 | + |
| 62 | + # -------------------------------------------------------------------- |
| 63 | + # Load Data Sheet |
| 64 | + # -------------------------------------------------------------------- |
| 65 | + os.chdir(WorkingDir) |
| 66 | + DataSheet = pandas.read_excel('DataSheet.xlsx') |
| 67 | + Filenames = list(DataSheet['FILENAME']) |
| 68 | + TreatmentList = list(DataSheet['TREATMENT']) |
| 69 | + F = len(Filenames) |
| 70 | + BadCharFound = False |
| 71 | + |
| 72 | + # -------------------------------------------------------------------- |
| 73 | + # Replace non-printable characters from filenames |
| 74 | + # -------------------------------------------------------------------- |
| 75 | + os.chdir(DataDir) |
| 76 | + BadCharacters = [' ','>','<',';',':',',','|','/','\\','(',')','[',']',\ |
| 77 | + '$','%','*','?','{','}','=','+','@'] |
| 78 | + for j in range(F): |
| 79 | + Filename = Filenames[j] |
| 80 | + Filename0 = Filename |
| 81 | + for bad_char in BadCharacters: |
| 82 | + Filename0 = Filename0.replace(bad_char,'_') |
| 83 | + if Filename0 != Filename: |
| 84 | + BadCharFound = True |
| 85 | + os.system('mv '+"'"+Filename+"'"+' '+Filename0) |
| 86 | + DataSheet['FILENAME'][j] = Filename0 |
| 87 | + |
| 88 | + # -------------------------------------------------------------------- |
| 89 | + # Replace non-printable characters from filenames |
| 90 | + # -------------------------------------------------------------------- |
| 91 | + TreatmentList0 = TreatmentList |
| 92 | + for bad_char in BadCharacters: |
| 93 | + TreatmentList0 = [str(treatment).replace(bad_char,'_') for treatment in TreatmentList0] |
| 94 | + if TreatmentList0 != TreatmentList: |
| 95 | + BadCharFound = True |
| 96 | + DataSheet['TREATMENT'] = TreatmentList0 |
| 97 | + |
| 98 | + # -------------------------------------------------------------------- |
| 99 | + # Update Data Sheet |
| 100 | + # -------------------------------------------------------------------- |
| 101 | + if BadCharFound: |
| 102 | + os.chdir(WorkingDir) |
| 103 | + DataSheet.to_excel('DataSheet.xlsx',columns=['FILENAME','TREATMENT']) |
| 104 | + print("WARNING: Special characters in sample names replaced by '_'") |
| 105 | + else: |
| 106 | + print('No special characters found.') |
| 107 | + |
| 108 | + |
| 109 | + |
| 110 | + |
| 111 | + |
| 112 | +if __name__ == "__main__": |
| 113 | + RunSanityCheck() |
0 commit comments