1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
|
SNPParser{
var <placement;
var <comboDict;
var <file;
var <fileLength;
var <userID;
var <testSet;
*new{
arg aFile, aTestSet = inf;
^super.new.init(aFile, aTestSet);
}
init{
arg aFile, aTestSet;
this.setFileAndUser(aFile);
testSet = aTestSet;
}
setFileAndUser{//set the file (again) to be parsed
arg aFile;
var allSlash, allDot;
file = aFile;
allSlash = file.findAll("/");
allDot = file.findAll(".");
if(allSlash.notNil,{//if there is no slash in the filename
userID = file[allSlash[allSlash.size-1]+1..allDot[allDot.size-2]-1].asInt;
},{
userID = file[0..allDot[allDot.size-2]-1].asInt;
});
userID.postln;
}
readFile{//parse the file this parser currently is setup with
var found = false, company = "";
"Guessing parser.".postln;
SNPInfo.sCompany.do({
arg cCompany, i;
if(file.contains(cCompany.asString),{
if(SNPInfo.workingSCompany.includes(i),{
("Attempting to parse a "++cCompany++" file now.").postln;
company = cCompany;
found = true;
},{
("Parsing files from "++cCompany++" is not supported (yet). Get me a coffee to make it work!").postln;
});
});
});
if(found.not,{
("This file might not be supported at all: "++file).postln;
},{
switch(company,
"23andme",{^this.parse23andme},
{"No no no no no!".postln}
);
});
}
parse23andme{//parsing a 23andme file: id, chromosome, position, base
var snpFile = File(file, "r"), line = "", counter = 0.0, tmp, snp, newSameCounter = 0.0;
fileLength = (("wc -l "++file.shellQuote).unixCmdGetStdOut).delimit({|ch| ch.isSpace});
("File "++file++" is "++fileLength[0]++" lines long.").postln;
"Parsing might require some minutes! So kick back an get a coffee, while it lasts.".postln;
comboDict = SNPDict.new(fileLength[0], userID);
if(snpFile.isOpen,{
protect{
while{(line = snpFile.getLine).notNil}{//FIXME: Reinsert testset
if(line[0].asString!="#",{//skip commented lines
tmp = line.delimit({|ch| ch.isSpace});//delimit the line by space and/or tab
if(tmp[3].asString!="--" && (SNPInfo.isBasePair(tmp[3]) || SNPInfo.isBase(tmp[3]) && (SNPInfo.chromosomesLength[SNPInfo.convertChromosome(tmp[1])-1]>=tmp[2].asFloat)),{//skip empty SNPs and make sure it's either a single base or a base pair and ignore out-of-range SNPs (yes, science is unclear!)
if(SNPInfo.isBasePair(tmp[3]),{//if it's a base pair, set it up
snp = SNP.new(tmp[1], tmp[2], tmp[0], tmp[3], SNPInfo.createResolverForPair(tmp[3]));
},{
if(SNPInfo.isBase(tmp[3]), {//if it's a single base, set it up
snp = SNP.new(tmp[1], tmp[2], tmp[0], tmp[3], \none);
});
});
newSameCounter = newSameCounter + comboDict.storeSNP(snp, SNPInfo.calcPosition(snp.chromosome, snp.position));
switch(newSameCounter,
1.0,{"Storing SNPs now: \n==========".postln;},
100000.0,{"=".post;},
200000.0,{"=".post;},
300000.0,{"=".post;},
400000.0,{"=".post;},
500000.0,{"=".post;},
600000.0,{"=".post;},
700000.0,{"=".post;},
800000.0,{"=".post;},
900000.0,{"=".post;},
);
});
});
counter = counter + 1;
};
}{
snpFile.close;
};
},{
("Couldn't open file for reading: "++file).warn;
});
"=".postln;
"Sorting lookup tables. This will also take some time!".postln;
comboDict.orderLookup(2);
"Done sorting lookup tables.".postln;
^comboDict;
}
}
|